diff --git a/.github/workflows/latex.yml b/.github/workflows/latex.yml
index 2b40879..a8b5c9f 100644
--- a/.github/workflows/latex.yml
+++ b/.github/workflows/latex.yml
@@ -19,10 +19,56 @@ jobs:
         with:
           root_file: main.tex
           working_directory: paper/src
-          args: -pdf -interaction=nonstopmode -file-line-error -outdir=../build
+          args: -pdf -f -interaction=nonstopmode -file-line-error -outdir=../build
           pre_compile: bash ../concat_code.sh
       - name: Upload PDF
         uses: actions/upload-artifact@v4
         with:
           name: thesis-pdf
           path: paper/build/main.pdf
+
+      - name: Get current date
+        id: date
+        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
+
+      - name: Upload to Cloudflare R2
+        env:
+          AWS_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
+          AWS_ENDPOINT_URL: ${{ secrets.R2_ENDPOINT }}
+          DATE: ${{ steps.date.outputs.date }}
+          BUCKET_NAME: ${{ secrets.R2_BUCKET_NAME }}
+        run: |
+          pip install boto3
+          python3 << 'EOF'
+          import boto3
+          import os
+
+          s3 = boto3.client('s3',
+              endpoint_url=os.environ['AWS_ENDPOINT_URL'],
+              aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],
+              aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY']
+          )
+
+          date = os.environ['DATE']
+          bucket = os.environ['BUCKET_NAME']
+
+          # upload dated version
+          dated_filename = f"thesis-{date}.pdf"
+          s3.upload_file(
+              'paper/build/main.pdf',
+              bucket,
+              dated_filename,
+              ExtraArgs={'ContentType': 'application/pdf'}
+          )
+          print(f"Uploaded {dated_filename}")
+
+          # upload latest version
+          s3.upload_file(
+              'paper/build/main.pdf',
+              bucket,
+              'thesis-latest.pdf',
+              ExtraArgs={'ContentType': 'application/pdf'}
+          )
+          print(f"Uploaded thesis-latest.pdf")
+          EOF
diff --git a/.gitignore b/.gitignore
index 733e405..9db7742 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,12 @@ paper/src/bib/auto
 experiments/airflow/logs/*
 experiments/airflow/logs/scheduler/
 experiments/airflow/logs/dag_processor_manager/
+experiments/collected_data/*
+
+paper/src/auto/*
+lib/
+docs/goals/*.md
+PHANTOM.wiki/
 tests/e2e/node_modules/**
 **/auto/*.el
 *.old
diff --git a/Makefile b/Makefile
index d2d2d7f..0c51bb3 100644
--- a/Makefile
+++ b/Makefile
@@ -22,14 +22,15 @@ $(BUILDDIR):
 pdf.build: $(BUILDDIR)
 	@bash paper/concat_code.sh
 	@cd $(SRCDIR) && \
-	$(LATEXMK) -pdf -jobname=$(JOBNAME) \
+	$(LATEXMK) -pdf -jobname=$(JOBNAME) -f \
 		-interaction=nonstopmode -file-line-error \
+		-r ../.latexmkrc \
 		-outdir=../$(BUILDDIR) $(TEX)
 
 .PHONY: pdf.watch
 pdf.watch: $(BUILDDIR)
 	@cd $(SRCDIR) && \
-	$(LATEXMK) -pvc -pdf -jobname=$(JOBNAME) \
+	$(LATEXMK) -pvc -pdf -jobname=$(JOBNAME) -f \
 		-interaction=nonstopmode -file-line-error \
 		-r ../.latexmkrc \
 		-outdir=../$(BUILDDIR) $(TEX)
@@ -72,6 +73,18 @@ stats.lines:
 	@find . \( -path '*/node_modules' -o -path '*/.venv' -o -path '*/venv' \) -prune -o \
 	\( -name "*.ts" -o -name "*.py" \) -type f -print0 | xargs -0 cat | wc -l
 
+.PHONY wordcount
+wordcount:
+	@echo "Counting words in main text (excluding appendix)..."
+	@texcount -nosub -total -sum -1 \
+		$(SRCDIR)/chapters/01-intro.tex \
+		$(SRCDIR)/chapters/02-literature-review.tex \
+		$(SRCDIR)/chapters/03-methodology.tex \
+		$(SRCDIR)/chapters/04-results.tex \
+		$(SRCDIR)/chapters/05-discussion.tex \
+		$(SRCDIR)/chapters/06-conclusion.tex
+
+
 .PHONY: pdf clean watch run.webapp test count-lines all
 pdf: pdf.build
 clean: pdf.clean
@@ -79,4 +92,4 @@ watch: pdf.watch
 run.webapp: web.dev
 test: test.backend
 count-lines: stats.lines
-all: pdf.build
+all: pdf.build
\ No newline at end of file
diff --git a/README.md b/README.md
index 1126458..17a8c45 100644
--- a/README.md
+++ b/README.md
@@ -3,10 +3,92 @@
 ### PHANTOM
 
 [![Build PDF](https://github.com/velocitatem/PHANTOM/actions/workflows/latex.yml/badge.svg)](https://github.com/velocitatem/PHANTOM/actions/workflows/latex.yml)
+[![Paper](https://img.shields.io/badge/Paper-PDF-red?logo=adobe-acrobat-reader)](https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf)
 [![TPU Research Cloud](https://img.shields.io/badge/TPU%20Research%20Cloud-TRC%20supported-4285F4?logo=googlecloud&logoColor=white)](https://sites.research.google/trc/faq/)
 [![Vercel Deploy](https://deploy-badge.vercel.app/?url=https://phantom-hotel.vercel.app&name=Hotel)](https://phantom-hotel.vercel.app)
 [![Vercel Deploy](https://deploy-badge.vercel.app/?url=https://phantom-airline.vercel.app&name=Airline)](https://phantom-airline.vercel.app)
 
 
 
-
+```mermaid
+mindmap
+  PHANTOM((PHANTOM Project))
+    North Star
+      Study how automated actors change markets 
+      Build an experimentation platform for real-world-like commerce 
+      Two-loop learning system
+        Online observation loop 
+        Offline "defense gym" loop 
+    Core Economic Questions
+      Price Discovery
+        How prices respond to demand signals
+        How signal quality changes with bots/agents
+      Demand & Elasticity
+        Shifts in willingness-to-pay
+        Short-run vs long-run elasticity
+      Market Efficiency & Welfare
+        Consumer surplus vs producer surplus
+        Deadweight loss from frictions/manipulation
+      Price Discrimination & Segmentation
+        Behavioral feature-based segmentation
+        Fairness vs profitability tradeoffs
+      Information Asymmetry
+        Agents amplify search and arbitrage
+        Sellers infer more about buyers; buyers infer more about sellers
+      Strategic Interaction
+        Consumers vs firms vs agents
+        Feedback loops: policy ↔ behavior ↔ price
+      Market Power & Competition
+        Algorithmic pricing as competitive tool
+        Risks: tacit coordination / "algorithmic collusion"
+      Externalities
+        Congestion and attention costs
+        Spillovers: one segment’s behavior affects others’ prices
+    System-Level View
+      Participants
+        Humans
+        Agents (automated buyers/actors) 
+        Firms (pricing decision-makers)
+        Platform (measurement + control layer)
+      Markets Simulated
+        Repeated transactions
+        Limited inventory / capacity constraints (conceptually)
+        Time dynamics (learning over time)
+      Interventions
+        Pricing policies
+        Experiment assignment / randomized exposure
+        Agent behavioral policies (task-driven)
+    Measurement & Causal Inference
+      What is observed
+        Actions (search, click, purchase intent)
+        Context (product attributes, time, exposure)
+        Outcomes (conversion, revenue, churn proxies)
+      Identification strategy
+        A/B tests and randomization
+        Counterfactual baselines
+        Robustness checks (offline replay)
+      Key metrics
+        Revenue / profit proxies
+        Conversion & bounce
+        Price volatility / stability
+        Welfare proxies (e.g., dispersion, access)
+    Risk, Governance, and Ethics
+      Manipulation & Integrity
+        Bot-driven demand distortion
+        Measurement contamination
+      Fairness & Transparency
+        Differential pricing concerns
+        Explainability and auditability
+      Safety Constraints
+        Guardrails on price moves
+        Monitoring for runaway feedback loops
+    Outputs
+      Insights
+        When do agents raise/lower prices via behavior shifts?
+        Which market designs are robust to automation?
+      Defenses
+        Agent-aware pricing policies (robust control)
+        Detection + mitigation strategies (feature-level separability)
+      Platform Value
+        Reusable testbed for market + AI-agent research
+```
diff --git a/docker-compose.yml b/docker-compose.yml
index f572758..f72f415 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,8 +1,17 @@
 services:
-
-  tensorboard:
+  tensorboard-rl:
     image: tensorflow/tensorflow:latest
-    container_name: "PHANTOM-tensorboard"
+    container_name: "PHANTOM-tensorboard-rl"
+    ports:
+      - "6007:6006"
+    volumes:
+      - ./sim/rl/runs:/logs
+    command: tensorboard --logdir=/logs --host=0.0.0.0 --port=6006
+    restart: unless-stopped
+
+  tensorboard-ml:
+    image: tensorflow/tensorflow:latest
+    container_name: "PHANTOM-tensorboard-ml"
     ports:
       - "6006:6006"
     volumes:
diff --git a/docs/goals/goals.csv b/docs/goals/goals.csv
new file mode 100644
index 0000000..b999fc4
--- /dev/null
+++ b/docs/goals/goals.csv
@@ -0,0 +1,21 @@
+store_mode,task_name,task_description,definition_of_done
+airline,The Indecisive Executive (SEA-LAX),"You are traveling SEA to LAX for business. You prefer Business Class for the comfort, but you need to justify the expense to your company. 1) Find the Business Class option and check its price. 2) Compare it against the Economy option on the same route to see how much money you are saving or spending. 3) Spend some time weighing the pros and cons of the ""Flexible"" fare rule vs the standard one. 4) Ultimately, decide that your comfort is worth it and book the Business Class ticket.","Booking for SEA-LAX Business Class is completed."
+airline,The Cross-Country Splurge (LAX-JFK),"You are flying LAX to JFK and want to treat yourself to First Class, but only if it's the right flight. 1) Find the First Class option. 2) thoroughly check the details (duration, arrival time). 3) Compare it with the Business Class option if available, or just look at other departure times to ensure this is the best schedule. 4) After confirming this is the absolute best option, proceed to book First Class.","Booking for LAX-JFK First Class is completed."
+airline,The Budget Student (DFW-ORD),"You are a broke student flying DFW to ORD. You have a budget of roughly $200. 1) Find the cheapest Economy flight. 2) Before booking, frantically check if there are any other flights or if the ""Premium"" economy is somehow cheaper (it won't be, but you should check). 3) Hesitate for a moment to consider if you should just drive instead. 4) Resign yourself to the flight and book the Economy ticket.","Booking for DFW-ORD Economy Class is completed."
+airline,The Quick Hop Commuter (LAX-SFO),"You need to get from LAX to SFO as fast as possible. Price is secondary to speed. 1) Search for flights and identify the one with the shortest duration (1h 30m). 2) Click into the details to verify the arrival time fits your schedule. 3) briefly explore if there's a Business Class upgrade available for this short flight. 4) Decide to stick with Economy since it's such a short trip and book it.","Booking for LAX-SFO is completed."
+airline,The Status Chaser (SFO-SEA),"You are trying to earn airline points and need a ""Premium"" class ticket specifically. 1) Search SFO to SEA. 2) Filter or look for the Premium Economy option. 3) Compare the price gap between Premium and Standard Economy. 4) Browse the details to see if the ""Premium"" fare includes better baggage allowance. 5) Conclude it's worth the points and book the Premium seat.","Booking for SFO-SEA Premium Economy is completed."
+airline,The Family Reunion (MIA-ATL),"You are booking for a family of 4 (2 adults, 2 children) flying MIA to ATL. 1) Search for 4 passengers. 2) You prefer Premium, but if the total is too high, you might settle for Economy. 3) Add Premium to your cart, look at the total, and hesitate. 4) Go back and check the Economy price for 4 people. 5) Decide to treat your family and go back to book the Premium option.","Booking for MIA-ATL (Premium) is completed."
+airline,The Red Eye Skeptic (LAX-JFK),"You need to fly LAX to JFK but hate late arrivals. 1) Search for the flight and check the arrival time of the First Class option. 2) It arrives early morning (02:15), which worries you. 3) Spend some time looking for other flight options on different days to see if there's a better schedule. 4) Realize this is the only direct option that works and proceed to book it despite the time.","Booking for LAX-JFK is completed."
+airline,The Refundable Requirement (ATL-DFW),"Your meeting in Dallas might get cancelled, so you strictly need a ""Refundable"" ticket. 1) Search ATL to DFW. 2) Find the First Class option and verify it lists ""Refundable"". 3) Check the Economy option to see if it is also refundable (it might not be). 4) Weigh the cost difference. 5) Choose the First Class Refundable option for peace of mind.","Booking for ATL-DFW First Class is completed."
+airline,The Hub Connector (ORD-MIA),"You are flying ORD to MIA to catch a cruise. You cannot be late. 1) Search for the flight. 2) Verify the ""stops"" is 0 (Direct). 3) Click into details to check the duration. 4) Worry that 3h 30m might be too long in Economy. 5) Look for a Business class option. 6) Decide to save money for the cruise and book Economy.","Booking for ORD-MIA Economy is completed."
+airline,The West Coast Hopper (SEA-LAX Business),"You fly this route often and usually pay around $700. 1) Search SEA to LAX. 2) Find the Business Class ticket. 3) Check if the price is near your usual $720 or if it's surged. 4) If it looks expensive, browse other dates to compare. 5) Return to your original desired date and book the Business Class seat.","Booking for SEA-LAX Business is completed."
+hotel,The Honeymoon Suite (Presidential),"It is your honeymoon. You want the best room available, specifically one with a ""jacuzzi"". 1) Search for a room for 2 people. 2) Identify the ""Presidential Suite"". 3) Click details to confirm the amenities include a jacuzzi. 4) Browse the ""Executive Suite"" just to see what you are upgrading from. 5) Go back to the Presidential Suite, confirm it's the one you want, and book it.","Booking for the Presidential Suite is completed."
+hotel,The Digital Nomad (Executive),"You are working remotely and strictly need a ""workspace"". 1) Search for a room. 2) Check the ""Executive Suite"" details for a workspace. 3) Check the ""Deluxe Room"" to see if it also has a workspace and is cheaper. 4) Compare the images (if available) or amenity lists of both. 5) Decide the Executive Suite looks more comfortable for a week of work and book it.","Booking for the Executive Suite is completed."
+hotel,The Safety First (Superior),"You are traveling with valuables and need a ""safe"" in the room. 1) Search for a room. 2) Look at the ""Standard Room"" amenities. Does it have a safe? 3) Look at the ""Superior Room"". Verify it has a safe. 4) Compare the price difference. Is safety worth the extra cost? 5) Decide it is, and book the Superior Room.","Booking for the Superior Room is completed."
+hotel,The Bachelor Party (Max Occupancy),"You are booking for 4 guys. You want everyone in one room if possible. 1) Search for 4 adults. 2) Find the room that fits 4 people (Presidential). 3) It looks expensive. Go back and search for 2 adults to see the price of a ""Standard Room"". 4) Calculate if booking two Standard Rooms is cheaper than one Presidential. 5) Decide it's too much hassle to manage two bookings and book the Presidential Suite.","Booking for the Presidential Suite is completed."
+hotel,The Budget Refundable (Junior),"You want a cheap room but your dates might change, so it MUST be refundable. 1) Search for a room. 2) Sort by price or find the cheapest options. 3) Check the ""Standard"" and ""Superior"" rooms. Notice they are likely Non-Refundable. 4) Find the ""Junior Suite"" which is Refundable. 5) Grumble about the price difference but book the Junior Suite because you need the flexibility.","Booking for the Junior Suite is completed."
+hotel,The View Hunter (Executive),"You want a room with a ""city_view"" or balcony. 1) Search for a room. 2) Check the amenities of the ""Deluxe Room"". 3) Check the amenities of the ""Executive Suite"". 4) Compare the prices. 5) Decide to treat yourself to the Executive Suite for the better view/balcony and book it.","Booking for the Executive Suite is completed."
+hotel,The Just-A-Bed (Standard),"You just need a place to crash. Lowest price wins. 1) Search for a room. 2) Identify the absolute cheapest option (Standard Room). 3) Click details just to make sure it has ""wifi"". 4) Briefly glance at the ""Superior Room"" to see if the upgrade is <$10. 5) If not, go back and book the Standard Room immediately.","Booking for the Standard Room is completed."
+hotel,The Family Vacation (Deluxe),"You are traveling with a child. You need a room that isn't too cramped but not a suite. 1) Search for 2 adults, 1 child. 2) Look at the ""Deluxe Room"". 3) Check the amenities for ""coffee_maker"" (parents need coffee). 4) Compare it with the ""Junior Suite"". 5) Decide the Deluxe Room is sufficient value and book it.","Booking for the Deluxe Room is completed."
+hotel,The Long Stay (Junior),"You are staying for 7 nights. You want something nicer than a standard room but affordable. 1) Search for a room. 2) Look at the ""Junior Suite"". 3) Check the amenities for a ""mini_fridge"" or similar. 4) Compare the total cost for 7 nights against your budget. 5) Hesitate and look at the ""Standard Room"" price. 6) Decide the extra space of the Junior Suite is worth it for a long stay and book it.","Booking for the Junior Suite is completed."
+hotel,The Last Minute Panic (Superior),"It's late and you need a room for tonight. 1) Search for a room for 1 person. 2) You recognize the ""Superior Room"" brand. 3) Click it. 4) Quickly verify check-in times or details. 5) Don't overthink it—book the Superior Room as fast as possible.","Booking for the Superior Room is completed."
diff --git a/docs/index.html b/docs/index.html
index f190154..a3f587b 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -47,7 +47,7 @@
   <meta name="citation_author" content="Rösel, Daniel">
   <meta name="citation_publication_date" content="2025">
   <meta name="citation_conference_title" content="IE University Bachelor's Thesis">
-  <meta name="citation_pdf_url" content="TODO">
+  <meta name="citation_pdf_url" content="https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf">
 
   <!-- Additional SEO -->
   <meta name="theme-color" content="#2563eb">
@@ -233,14 +233,13 @@
 
                   <div class="is-size-5 publication-authors">
                     <span class="author-block">IE University<br>Bachelor's Thesis 2025</span>
-                    <span class="eql-cntrb"><small><br>Advisor: <a href="SECOND AUTHOR PERSONAL LINK" target="_blank">Alberto Martín Izquierdo</a></small></span>
+                    <span class="eql-cntrb"><small><br>Advisor: Alberto Martín Izquierdo</small></span>
                   </div>
 
                   <div class="column has-text-centered">
                     <div class="publication-links">
-                         <!-- TODO: Update with your arXiv paper ID -->
                       <span class="link-block">
-                        <a href="https://arxiv.org/pdf/<ARXIV PAPER ID>.pdf" target="_blank"
+                        <a href="https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf" target="_blank"
                         class="external-link button is-normal is-rounded is-dark">
                         <span class="icon">
                           <i class="fas fa-file-pdf"></i>
@@ -315,7 +314,10 @@
         <h2 class="title is-3">Abstract</h2>
         <div class="content has-text-justified">
           <p>
-            The primary objective of this thesis is to develop and validate pricing heuristics that protect e-commerce platforms from systematic exploitation by Large Language Model (LLM) agents within dynamic pricing environments. As AI agents increasingly mediate consumer transactions, they enable users to circumvent the Cost of Information (the price premium accumulated through demand signal expression) by conducting reconnaissance in isolated sessions before executing purchases through clean sessions at base prices. This research will make an anticipatory contribution by adapting recommendation system methodologies to distinguish between genuine human browsing behaviour and agent-orchestrated information gathering, thereby enabling pricing systems to maintain margin integrity without degrading the user experience for legitimate customers or getting rid of leads generated by LLMs.
+            This research establishes the following contributions: definition and formalization of non-human transactors in e-commerce platforms, development of a testing-ground for capturing the behavioral essence of these transactors across a large variety of digital systems, construction of a discriminative model to prove separability as a strong learner for downstream mitigation of contamination by non-human entities, translation of such learned separability into existing dynamic pricing machine learning loops, and establishment of a high-level KPI-affecting causal effect and cost-saving framework for the future of internet commerce in the presence of such non-human learners.
+          </p>
+          <p>
+            This work develops behavioral signature models using recommendation system techniques to profile session-level interaction, temporal engagement, and cross-session correlation. The AI Agent market is forecasted to grow from around USD 5-8 billion in 2025 to USD 42-52 billion by 2030, raising the question of how these systems should be designed for future robustness and how to maintain a competitive edge in the analytical components of e-commerce platforms.
           </p>
         </div>
       </div>
@@ -433,8 +435,7 @@
     <div class="container">
       <h2 class="title">Poster</h2>
 
-      <!-- TODO: Replace with your poster PDF -->
-      <iframe  src="static/pdfs/sample.pdf" width="100%" height="550">
+      <iframe  src="https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf" width="100%" height="550">
           </iframe>
 
       </div>
diff --git a/paper/.latexmkrc b/paper/.latexmkrc
index d614114..38c50d1 100644
--- a/paper/.latexmkrc
+++ b/paper/.latexmkrc
@@ -1,8 +1,6 @@
 $pdf_mode = 1;
 $pdflatex = 'pdflatex -synctex=1 -interaction=nonstopmode -file-line-error %O %S';
-$aux_dir = 'build';
-$out_dir = 'build';
-$use_biber = 0;                        # force bibtex
+$bibtex_use = 2;                       # run bibtex when needed
 $bibtex   = 'bibtex %O %B';
 $pdf_previewer = 'zathura %O %S';
 $clean_ext = 'synctex.gz bbl bcf run.xml fls fdb_latexmk glg glo gls ist blg lof lot out toc';
diff --git a/paper/concat_code.sh b/paper/concat_code.sh
index 3ff905d..7de4bb3 100755
--- a/paper/concat_code.sh
+++ b/paper/concat_code.sh
@@ -43,22 +43,22 @@ EOF
 echo "Concatenating code from source directories..."
 
 # Backend
-find "$PROJECT_ROOT/backend" -type f \( -name "*.py" -o -name "*.js" -o -name "*.sh" -o -name "*.yml" -o -name "*.yaml" \) | sort | while read -r file; do
+find "$PROJECT_ROOT/backend" -type d \( -name ".venv" -o -name "__pycache__" -o -name "*.egg-info" -o -name "node_modules" -o -name ".pytest_cache" \) -prune -o -type f \( -name "*.py" -o -name "*.js" -o -name "*.sh" -o -name "*.yml" -o -name "*.yaml" \) ! -name "*.pyc" ! -name "*.pyo" -print | sort | while read -r file; do
     add_file "$file"
 done
 
 # Experiments
-find "$PROJECT_ROOT/experiments" -type f \( -name "*.py" -o -name "*.js" -o -name "*.sh" -o -name "*.yml" -o -name "*.yaml" \) | sort | while read -r file; do
+find "$PROJECT_ROOT/experiments" -type d \( -name ".venv" -o -name "__pycache__" -o -name "*.egg-info" -o -name "node_modules" -o -name ".pytest_cache" -o -name ".ipynb_checkpoints" \) -prune -o -type f \( -name "*.py" -o -name "*.js" -o -name "*.sh" -o -name "*.yml" -o -name "*.yaml" \) ! -name "*.pyc" ! -name "*.pyo" -print | sort | while read -r file; do
     add_file "$file"
 done
 
 # Docker
-find "$PROJECT_ROOT/docker" -type f \( -name "*.py" -o -name "*.sh" -o -name "*.yml" -o -name "*.yaml" -o -name "Dockerfile*" \) | sort | while read -r file; do
+find "$PROJECT_ROOT/docker" -type d \( -name ".venv" -o -name "__pycache__" -o -name "node_modules" \) -prune -o -type f \( -name "*.py" -o -name "*.sh" -o -name "*.yml" -o -name "*.yaml" -o -name "Dockerfile*" \) ! -name "*.pyc" ! -name "*.pyo" -print | sort | while read -r file; do
     add_file "$file"
 done
 
 # Web/src
-find "$PROJECT_ROOT/web/src" -type f \( -name "*.js" -o -name "*.jsx" -o -name "*.ts" -o -name "*.tsx" \) | sort | while read -r file; do
+find "$PROJECT_ROOT/web/src" -type d \( -name "node_modules" -o -name ".next" -o -name "dist" -o -name "build" \) -prune -o -type f \( -name "*.js" -o -name "*.jsx" -o -name "*.ts" -o -name "*.tsx" \) -print | sort | while read -r file; do
     add_file "$file"
 done
 
diff --git a/paper/src/auto/main.el b/paper/src/auto/main.el
index 86386e4..6738458 100644
--- a/paper/src/auto/main.el
+++ b/paper/src/auto/main.el
@@ -6,7 +6,7 @@
    (setq TeX-command-extra-options
          "-file-line-error -interaction=nonstopmode")
    (TeX-add-to-alist 'LaTeX-provided-class-options
-                     '(("report" "12pt") ("article" "12pt") ("acmart" "sigconf" "nonacm" "natbib=false")))
+                     '(("report" "12pt") ("acmart" "sigconf" "nonacm" "natbib=false" "manuscript") ("article" "12pt" "letterpaper")))
    (TeX-run-style-hooks
     "latex2e"
     "preamble"
@@ -17,8 +17,7 @@
     "chapters/05-discussion"
     "chapters/06-conclusion"
     "../build/concatenated_code"
-    "acmart"
-    "acmart10")
-   (TeX-add-symbols
-    '("footnotetextcopyrightpermission" 1)))
+    "article"
+    "art12"))
  :latex)
+
diff --git a/paper/src/bib/references.bib b/paper/src/bib/references.bib
index e69de29..1130453 100644
--- a/paper/src/bib/references.bib
+++ b/paper/src/bib/references.bib
@@ -0,0 +1,425 @@
+
+@article{arnoud_v_den_boer_dynamic_2015,
+	title = {Dynamic pricing and learning: {Historical} origins, current research, and new directions},
+	volume = {20},
+	url = {https://www.sciencedirect.com/science/article/pii/S1876735415000021},
+	doi = {10.1016/j.sorms.2015.03.001},
+	number = {1},
+	journal = {Surveys in Operations Research and Management Science},
+	author = {{Arnoud V. den Boer}},
+	month = jun,
+	year = {2015},
+	pages = {1--18},
+	file = {PDF:/home/velocitatem/Zotero/storage/NUAGDYER/memo2025.pdf:application/pdf},
+}
+
+@article{iliou_detection_2021,
+	title = {Detection of {Advanced} {Web} {Bots} by {Combining} {Web} {Logs} with {Mouse} {Behavioural} {Biometrics}},
+	volume = {2},
+	url = {https://dl.acm.org/doi/10.1145/3447815},
+	doi = {10.1145/3447815},
+	number = {3},
+	journal = {Digital Threats: Research and Practice},
+	author = {Iliou, Christos and Kostoulas, Theodoros and Tsikrika, Theodora and Katos, Vasilis and Vrochidis, Stefanos and Kompatsiaris, Ioannis},
+	year = {2021},
+	pages = {1--26},
+	file = {PDF:/home/velocitatem/Zotero/storage/Q7J5EBEJ/3447815.pdf:application/pdf},
+}
+
+@phdthesis{salassa_politecnico_nodate,
+	title = {Politecnico di {Torino} {Algorithmic} {Pricing} in the digital age "{Ethical} considerations on its economic and social implications, and an analysis of possible solutions to overcome its critical issues" {Tutor}: {Candidate}},
+	abstract = {Algorithmic pricing is an emerging business practice that uses computational algorithms to determine
+the prices of products and services based on a number of dynamic factors. The aim of this thesis is to
+draw attention to the existence of these business practices, and the ethical and social implications that
+derive from them, and then focus on what could be effective solutions to increase the well-being of
+the community.
+In Chapter 2 of the thesis, a general introduction to the topic will be made, starting from its history
+and its evolution over the years; Chapter 3 will examine the different types of pricing algorithms.
+Subsequently, in Chapter 4 we will analyze the sectors in which they are most applicable, and the
+relative advantages and disadvantages they bring with them, with a critical analysis of the trade-offs
+generated. The effect of algorithmic pricing on competition will be studied, considering how the
+ability of algorithms to adapt quickly to market conditions can foster anti-competitive practices, such
+as price discrimination. Later, in Chapter 5, we will look at the issue of price transparency and how
+the opacity of algorithms can make it difficult for consumers to understand the pricing process and
+assess whether they are receiving fair treatment.
+To address these ethical issues, several possible solutions will be brought to light, described in
+Chapter 6, which will focus on the role of the government, as a regulatory, of the end consumer, who
+must be encouraged to educate and inform himself about the use of these practices, and of the
+company, as responsible for making its customers aware and acting in compliance with government
+laws, for fair and non-discriminatory use.},
+	urldate = {2025-11-12},
+	school = {Politecnico di Torino},
+	author = {Salassa, Fabio and Pautassi, Paolo},
+	file = {PDF:/home/velocitatem/Zotero/storage/L95WYQ8B/m-api-06aad998-d926-0d59-5593-82fdce5a678b.pdf:application/pdf},
+}
+
+@inproceedings{mueller_low-rank_2019,
+	title = {Low-{Rank} {Bandit} {Methods} for {High}-{Dimensional} {Dynamic} {Pricing}},
+	booktitle = {Advances in {Neural} {Information} {Processing} {Systems} 32 ({NeurIPS} 2019)},
+	author = {Mueller, Jonas W and Syrgkanis, Vasilis and Taddy, Matt},
+	year = {2019},
+	pages = {15442--15452},
+	file = {PDF:/home/velocitatem/Zotero/storage/IZD3C5SR/m-api-26f6207c-cc89-4aed-29b6-34629f18fe9b.pdf:application/pdf},
+}
+
+@article{shahidi_coasean_nodate,
+	title = {The {Coasean} {Singularity}? {Demand}, {Supply}, and {Market} {Design} with {AI} {Agents}},
+	abstract = {AI agents—autonomous systems that perceive, reason, and act on behalf of human principals—are poised to transform digital markets by dramatically reducing transaction costs. This chapter evaluates the economic implications of this transition, adopting a consumeroriented view of agents as market participants that can search, negotiate, and transact directly. From the demand side, agent adoption reflects derived demand: users trade off decision quality against effort reduction, with outcomes mediated by agent capability and task context. On the supply side, firms will design, integrate, and monetize agents, with outcomes hinging on whether agents operate within or across platforms. At the market level, agents create efficiency gains from lower search, communication, and contracting costs, but also introduce frictions such as congestion and price obfuscation. By lowering the costs of preference elicitation, contract enforcement, and identity verification, agents expand the feasible set of market designs but also raise novel regulatory challenges. While the net welfare effects remain an empirical question, the rapid onset of AI-mediated transactions presents a unique opportunity for economic research to inform real-world policy and market design.},
+	language = {en},
+	author = {Shahidi, Peyman and Rusak, Gili and Manning, Benjamin S and Fradkin, Andrey and Horton, John J},
+	file = {PDF:/home/velocitatem/Zotero/storage/TQCAPJDP/Shahidi et al. - The Coasean Singularity Demand, Supply, and Market Design with AI Agents.pdf:application/pdf},
+}
+
+@misc{byrnes_intro_2025,
+	title = {Intro to {Brain}-{Like}-{AGI} {Safety}},
+	url = {https://osf.io/fe36n_v1},
+	doi = {10.31219/osf.io/fe36n_v1},
+	abstract = {Suppose we someday build an Artificial General Intelligence (AGI) algorithm using similar principles of learning and cognition as the human brain. How would we use such an algorithm safely? I argue that this is an open technical problem, and my goal is to bring readers with no prior knowledge all the way up to the front-line of unsolved problems. Chapter 1 has background and motivation; Chapters 2-7 are on neuroscience, arguing for a picture of the brain that combines large-scale learning algorithms (e.g. in the cortex) and specific evolved reflexes (e.g. in the hypothalamus and brainstem); and Chapters 8-15 apply those neuroscience ideas to AGI safety. A major theme is the idea that the brain has something like a reinforcement learning reward function, which says that pain is bad, eating-when-hungry is good, etc. I argue that this reward function is centered around the hypothalamus and brainstem, and that all human desires—even "higher" desires for things like compassion and justice—come directly or indirectly from that innate reward function. If future programmers build brain-like AGI, they will likewise have a reward function slot in their source code, in which they can put whatever they want. If they put the wrong thing, the resulting AGI will wind up callously indifferent to human welfare. How might they avoid that? That's an open technical problem, but I will review some ideas and research directions.},
+	language = {en},
+	urldate = {2025-12-31},
+	publisher = {Open Science Framework},
+	author = {Byrnes, Steven J.},
+	month = mar,
+	year = {2025},
+	file = {PDF:/home/velocitatem/Zotero/storage/ZLJQ4DQ9/Byrnes - 2025 - Intro to Brain-Like-AGI Safety.pdf:application/pdf},
+}
+
+@article{shannon_mathematical_nodate,
+	title = {A {Mathematical} {Theory} of {Communication}},
+	language = {en},
+	author = {Shannon, C E},
+	file = {PDF:/home/velocitatem/Zotero/storage/FJRFRWK2/Shannon - A Mathematical Theory of Communication.pdf:application/pdf},
+}
+
+@misc{noauthor_order_stats_nodate,
+	title = {order\_stats},
+	file = {PDF:/home/velocitatem/Zotero/storage/D3QRGY9Z/order_stats.pdf:application/pdf},
+}
+
+@article{devine_nonlinear_nodate,
+	title = {Nonlinear {Pricing} with {Costly} {Information} {Acquisition}},
+	abstract = {This paper examines a nonlinear pricing model where the ﬁrm can choose to acquire costly information prior to oﬀering contract menus to consumers; such as paying a consultant or investing in machine learning technologies. Information provides the ﬁrm with a signal about consumers types, whose accuracy increases as the ﬁrm acquires larger amounts of information. We show that the ﬁrm chooses to acquire information, only if it can purchase a suﬃcient amount that could alter its initial prior beliefs. Relative to standard settings where ﬁrms cannot acquire information, we identify how information acquisition changes optimal contract oﬀers, equilibrium proﬁts, information rents, and welfare. A better-informed ﬁrm increases its expected proﬁts, but it can also increase expected utility when the cost of information is intermediate. Our results recommend balanced online privacy laws.},
+	language = {en},
+	author = {Devine, Brett R and Munoz-Garcia, Felix},
+	file = {PDF:/home/velocitatem/Zotero/storage/GQ28KVBF/Devine and Munoz-Garcia - Nonlinear Pricing with Costly Information Acquisition.pdf:application/pdf},
+}
+
+@misc{wang_learning_2025,
+	title = {Learning {Optimal} {Distributionally} {Robust} {Stochastic} {Control} in {Continuous} {State} {Spaces}},
+	url = {http://arxiv.org/abs/2406.11281},
+	doi = {10.48550/arXiv.2406.11281},
+	abstract = {We study data-driven learning of robust stochastic control for infinite-horizon systems with potentially continuous state and action spaces. In many managerial settings–supply chains, finance, manufacturing, services, and dynamic games–the state-transition mechanism is determined by system design, while available data capture the distributional properties of the stochastic inputs from the environment. For modeling and computational tractability, a decision maker often adopts a Markov control model with i.i.d. environment inputs, which can render learned policies fragile to internal dependence or external perturbations. We introduce a distributionally robust stochastic control paradigm that promotes policy reliability by introducing adaptive adversarial perturbations to the environment input, while preserving the modeling, statistical, and computational tractability of the Markovian formulation. From a modeling perspective, we examine two adversary models–current-action-aware and current-action-unaware–leading to distinct dynamic behaviors and robust optimal policies. From a statistical learning perspective, we characterize optimal finite-sample minimax rates for uniform learning of the robust value function across a continuum of states under ambiguity sets defined by the fk-divergence and Wasserstein distance. To efficiently compute the optimal robust policies, we further propose algorithms inspired by deep reinforcement learning methodologies. Finally, we demonstrate the applicability of the framework to real managerial problems.},
+	language = {en},
+	urldate = {2025-12-29},
+	publisher = {arXiv},
+	author = {Wang, Shengbo and Meng, Jason and Si, Nian and Blanchet, Jose and Zhou, Zhengyuan},
+	month = nov,
+	year = {2025},
+	note = {arXiv:2406.11281 [stat]},
+	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
+	file = {PDF:/home/velocitatem/Zotero/storage/RQ8XDSSG/Wang et al. - 2025 - Learning Optimal Distributionally Robust Stochastic Control in Continuous State Spaces.pdf:application/pdf},
+}
+
+@misc{ie_recsim_2019,
+	title = {{RecSim}: {A} {Configurable} {Simulation} {Platform} for {Recommender} {Systems}},
+	shorttitle = {{RecSim}},
+	url = {http://arxiv.org/abs/1909.04847},
+	doi = {10.48550/arXiv.1909.04847},
+	abstract = {We propose RecSim, a configurable platform for authoring simulation environments for recommender systems (RSs) that naturally supports sequential interaction with users. RecSim allows the creation of new environments that reflect particular aspects of user behavior and item structure at a level of abstraction well-suited to pushing the limits of current reinforcement learning (RL) and RS techniques in sequential interactive recommendation problems. Environments can be easily configured that vary assumptions about: user preferences and item familiarity; user latent state and its dynamics; and choice models and other user response behavior. We outline how RecSim offers value to RL and RS researchers and practitioners, and how it can serve as a vehicle for academic-industrial collaboration.},
+	urldate = {2025-12-29},
+	publisher = {arXiv},
+	author = {Ie, Eugene and Hsu, Chih-wei and Mladenov, Martin and Jain, Vihan and Narvekar, Sanmit and Wang, Jing and Wu, Rui and Boutilier, Craig},
+	month = sep,
+	year = {2019},
+	note = {arXiv:1909.04847 [cs]},
+	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning, Computer Science - Human-Computer Interaction, Computer Science - Information Retrieval},
+	file = {Preprint PDF:/home/velocitatem/Zotero/storage/CJJI2VQF/Ie et al. - 2019 - RecSim A Configurable Simulation Platform for Recommender Systems.pdf:application/pdf;Snapshot:/home/velocitatem/Zotero/storage/8XJKJTHE/1909.html:text/html},
+}
+
+@misc{kuhn_wasserstein_2024,
+	title = {Wasserstein {Distributionally} {Robust} {Optimization}: {Theory} and {Applications} in {Machine} {Learning}},
+	shorttitle = {Wasserstein {Distributionally} {Robust} {Optimization}},
+	url = {http://arxiv.org/abs/1908.08729},
+	doi = {10.48550/arXiv.1908.08729},
+	abstract = {Many decision problems in science, engineering and economics are affected by uncertain parameters whose distribution is only indirectly observable through samples. The goal of data-driven decision-making is to learn a decision from finitely many training samples that will perform well on unseen test samples. This learning task is difficult even if all training and test samples are drawn from the same distribution—especially if the dimension of the uncertainty is large relative to the training sample size. Wasserstein distributionally robust optimization seeks data-driven decisions that perform well under the most adverse distribution within a certain Wasserstein distance from a nominal distribution constructed from the training samples. In this tutorial we will argue that this approach has many conceptual and computational benefits. Most prominently, the optimal decisions can often be computed by solving tractable convex optimization problems, and they enjoy rigorous out-of-sample and asymptotic consistency guarantees. We will also show that Wasserstein distributionally robust optimization has interesting ramifications for statistical learning and motivates new approaches for fundamental learning tasks such as classification, regression, maximum likelihood estimation or minimum mean square error estimation, among others.},
+	language = {en},
+	urldate = {2025-12-27},
+	publisher = {arXiv},
+	author = {Kuhn, Daniel and Esfahani, Peyman Mohajerin and Nguyen, Viet Anh and Shafieezadeh-Abadeh, Soroosh},
+	month = nov,
+	year = {2024},
+	note = {arXiv:1908.08729 [stat]},
+	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning, Mathematics - Optimization and Control},
+	file = {PDF:/home/velocitatem/Zotero/storage/FAWJEK6J/Kuhn et al. - 2024 - Wasserstein Distributionally Robust Optimization Theory and Applications in Machine Learning.pdf:application/pdf},
+}
+
+@misc{arunachaleswaran_learning_2025,
+	title = {Learning to {Play} {Against} {Unknown} {Opponents}},
+	url = {http://arxiv.org/abs/2412.18297},
+	doi = {10.48550/arXiv.2412.18297},
+	abstract = {We consider the problem of a learning agent who has to repeatedly play a general sum game against a strategic opponent who acts to maximize their own payoﬀ by optimally responding against the learner’s algorithm. The learning agent knows their own payoﬀ function, but is uncertain about the payoﬀ of their opponent (knowing only that it is drawn from some distribution D). What learning algorithm should the agent run in order to maximize their own total utility, either in expectation or in the worst-case over D? When the learning algorithm is constrained to be a no-regret algorithm, we demonstrate how to eﬃciently construct an optimal learning algorithm (asymptotically achieving the optimal utility) in polynomial time for both the in-expectation and worst-case problems, independent of any other assumptions. When the learning algorithm is not constrained to no-regret, we show how to construct an ε-optimal learning algorithm (obtaining average utility within ε of the optimal utility) for both the in-expectation and worst-case problems in time polynomial in the size of the input and 1/ε, when either the size of the game or the support of D is constant. Finally, for the special case of the maximin objective, where the learner wishes to maximize their minimum payoﬀ over all possible optimizer types, we construct a learner algorithm that runs in polynomial time in each step and guarantees convergence to the optimal learner payoﬀ. All of these results make use of recently developed machinery that converts the analysis of learning algorithms to the study of the class of corresponding geometric objects known as menus.},
+	language = {en},
+	urldate = {2025-12-27},
+	publisher = {arXiv},
+	author = {Arunachaleswaran, Eshwar Ram and Collina, Natalie and Schneider, Jon},
+	month = feb,
+	year = {2025},
+	note = {arXiv:2412.18297 [cs]},
+	keywords = {Computer Science - Machine Learning, Computer Science - Computer Science and Game Theory},
+	file = {PDF:/home/velocitatem/Zotero/storage/M6V9LLCS/Arunachaleswaran et al. - 2025 - Learning to Play Against Unknown Opponents.pdf:application/pdf},
+}
+
+@misc{li_distributionally_2025,
+	title = {Distributionally {Robust} {Optimization} with {Adversarial} {Data} {Contamination}},
+	url = {http://arxiv.org/abs/2507.10718},
+	doi = {10.48550/arXiv.2507.10718},
+	abstract = {Distributionally Robust Optimization (DRO) provides a framework for decision-making under distributional uncertainty, yet its effectiveness can be compromised by outliers in the training data. This paper introduces a principled approach to simultaneously address both challenges. We focus on optimizing Wasserstein-1 DRO objectives for generalized linear models with convex Lipschitz loss functions, where an \$ε\$-fraction of the training data is adversarially corrupted. Our primary contribution lies in a novel modeling framework that integrates robustness against training data contamination with robustness against distributional shifts, alongside an efficient algorithm inspired by robust statistics to solve the resulting optimization problem. We prove that our method achieves an estimation error of \$O({\textbackslash}sqrtε)\$ for the true DRO objective value using only the contaminated data under the bounded covariance assumption. This work establishes the first rigorous guarantees, supported by efficient computation, for learning under the dual challenges of data contamination and distributional shifts.},
+	language = {en},
+	urldate = {2025-12-27},
+	publisher = {arXiv},
+	author = {Li, Shuyao and Diakonikolas, Ilias and Diakonikolas, Jelena},
+	month = nov,
+	year = {2025},
+	note = {arXiv:2507.10718 [cs]},
+	keywords = {Computer Science - Machine Learning, Mathematics - Optimization and Control, Computer Science - Data Structures and Algorithms},
+	file = {PDF:/home/velocitatem/Zotero/storage/H6AXDTLX/Li et al. - 2025 - Distributionally Robust Optimization with Adversarial Data Contamination.pdf:application/pdf},
+}
+
+@misc{karten_llm_2025,
+	title = {{LLM} {Economist}: {Large} {Population} {Models} and {Mechanism} {Design} in {Multi}-{Agent} {Generative} {Simulacra}},
+	shorttitle = {{LLM} {Economist}},
+	url = {http://arxiv.org/abs/2507.15815},
+	doi = {10.48550/arXiv.2507.15815},
+	abstract = {We present the LLM Economist, a novel framework that uses agent-based modeling to design and assess economic policies in strategic environments with hierarchical decision-making. At the lower level, bounded rational worker agents—instantiated as persona-conditioned prompts sampled from U.S. Census-calibrated income and demographic statistics—choose labor supply to maximize text-based utility functions learned in-context. At the upper level, a planner agent employs in-context reinforcement learning to propose piecewise-linear marginal tax schedules anchored to the current U.S. federal brackets. This construction endows economic simulacra with three capabilities requisite for credible fiscal experimentation: (i) optimization of heterogeneous utilities, (ii) principled generation of large, demographically realistic agent populations, and (iii) mechanism design—the ultimate nudging problem—expressed entirely in natural language. Experiments with populations of up to one hundred interacting agents show that the planner converges near Stackelberg equilibria that improve aggregate social welfare relative to Saez solutions, while a periodic, persona-level voting procedure furthers these gains under decentralized governance. These results demonstrate that large language model-based agents can jointly model, simulate, and govern complex economic systems, providing a tractable test bed for policy evaluation at the societal scale to help build better civilizations.},
+	language = {en},
+	urldate = {2025-12-27},
+	publisher = {arXiv},
+	author = {Karten, Seth and Li, Wenzhe and Ding, Zihan and Kleiner, Samuel and Bai, Yu and Jin, Chi},
+	month = jul,
+	year = {2025},
+	note = {arXiv:2507.15815 [cs]},
+	keywords = {Computer Science - Machine Learning, Computer Science - Multiagent Systems},
+	file = {PDF:/home/velocitatem/Zotero/storage/U7A5Q78V/Karten et al. - 2025 - LLM Economist Large Population Models and Mechanism Design in Multi-Agent Generative Simulacra.pdf:application/pdf},
+}
+
+@techreport{mullapudi_reinforcement_nodate,
+	title = {A {Reinforcement} {Learning} {Approach} to {Dynamic} {Pricing}},
+	abstract = {Dynamic pricing represents a critical strategic challenge in modern e-commerce, where firms must navigate fluctuating demand, inventory constraints, and aggressive competitor actions. Traditional static and heuristic-based pricing models often fail to capture the complex, non-linear dynamics of competitive digital markets, leading to suboptimal profitability. This paper proposes a model-free reinforcement learning (RL) framework to address this challenge. Specifically, we design, implement, and evaluate a Q-learning agent capable of learning an optimal, state-dependent pricing policy. The agent is trained and evaluated within a simulated market environment constructed from the publicly available "Retail Price Optimization" dataset from Kaggle, which provides a rich feature set including historical sales, product characteristics, seasonality, and, crucially, competitor pricing data. The problem is formulated as a Markov Decision Process (MDP), where the agent's state incorporates its price position relative to competitors, competitor price trends, and seasonal factors. The agent's performance is benchmarked against three baseline strategies: static pricing, a reactive "follow-the-leader" heuristic, and random pricing. The results demonstrate that the Q-learning agent achieves a substantial increase in total cumulative profit over the evaluation period, outperforming all baselines by learning a nuanced policy that strategically balances price adjustments in response to market conditions. This work provides a practical and reproducible blueprint for applying reinforcement learning to optimize pricing decisions in a simulated yet realistic competitive retail environment, highlighting the potential of RL to automate complex strategic decision-making.},
+	author = {Mullapudi, Pavan},
+	note = {Publication Title: International Journal on Science and Technology (IJSAT) IJSAT25049558
+Volume: 16
+Issue: 4},
+	keywords = {Index Terms: Dynamic Pricing, Markov Decision Process, Price Optimization, Q-Learning, Reinforcement Learning, Retail Analytics},
+	file = {PDF:/home/velocitatem/Zotero/storage/G95TBLF7/9558.pdf:application/pdf},
+}
+
+@techreport{roughgarden_cs364a_2013,
+	title = {{CS364A}: {Algorithmic} {Game} {Theory} {Lecture} \#5: {Revenue}-{Maximizing} {Auctions} *},
+	author = {Roughgarden, Tim},
+	year = {2013},
+	file = {PDF:/home/velocitatem/Zotero/storage/C39VM7N9/l5.pdf:application/pdf},
+}
+
+@techreport{kuhn_distributionally_2025,
+	title = {Distributionally {Robust} {Optimization}},
+	abstract = {Distributionally robust optimization (DRO) studies decision problems under uncertainty where the probability distribution governing the uncertain problem parameters is itself uncertain. A key component of any DRO model is its ambiguity set, that is, a family of probability distributions consistent with any available structural or statistical information. DRO seeks decisions that perform best under the worst distribution in the ambiguity set. This worst case criterion is supported by findings in psychology and neuroscience, which indicate that many decision-makers have a low tolerance for distributional ambiguity. DRO is rooted in statistics, operations research and control theory, and recent research has uncovered its deep connections to regularization techniques and adversarial training in machine learning. This survey presents the key findings of the field in a unified and self-contained manner.},
+	author = {Kuhn, Daniel and Shafiee, Soroosh and Wiesemann, Wolfram},
+	year = {2025},
+	note = {arXiv: 2411.02549v3},
+	file = {PDF:/home/velocitatem/Zotero/storage/IXTTMD7G/full-text.pdf:application/pdf},
+}
+
+@article{parkes_economic_2015,
+	title = {Economic reasoning and artificial intelligence},
+	volume = {349},
+	issn = {10959203},
+	doi = {10.1126/science.aaa8403},
+	abstract = {The field of artificial intelligence (AI) strives to build rational agents capable of perceiving the world around them and taking actions to advance specified goals. Put another way, AI researchers aim to construct a synthetic homo economicus, the mythical perfectly rational agent of neoclassical economics.We review progress toward creating this new species of machine, machina economicus, and discuss some challenges in designing AIs that can reason effectively in economic contexts. Supposing that AI succeeds in this quest, or at least comes close enough that it is useful to think about AIs in rationalistic terms, we ask how to design the rules of interaction in multi-agent systems that come to represent an economy of AIs.Theories of normative design from economics may prove more relevant for artificial agents than human agents, with AIs that better respect idealized assumptions of rationality than people, interacting through novel rules and incentive systems quite distinct from those tailored for people.},
+	number = {6245},
+	journal = {Science},
+	author = {Parkes, David C. and Wellman, Michael P.},
+	month = jul,
+	year = {2015},
+	pmid = {26185245},
+	note = {Publisher: American Association for the Advancement of Science},
+	pages = {267--272},
+	file = {PDF:/home/velocitatem/Zotero/storage/27KLNFRU/_aiEcon.pdf:application/pdf},
+}
+
+@article{yokoo_effect_2004,
+	title = {The effect of false-name bids in combinatorial auctions: {New} fraud in internet auctions},
+	volume = {46},
+	issn = {08998256},
+	doi = {10.1016/S0899-8256(03)00045-9},
+	abstract = {We examine the effect of false-name bids on combinatorial auction protocols. False-name bids are bids submitted by a single bidder using multiple identifiers such as multiple e-mail addresses. The obtained results are summarized as follows: (1) the Vickrey-Clarke-Groves (VCG) mechanism, which is strategy-proof and Pareto efficient when there exists no false-name bid, is not false-name-proof; (2) there exists no false-name-proof combinatorial auction protocol that satisfies Pareto efficiency; (3) one sufficient condition where the VCG mechanism is false-name-proof is identified, i.e., the concavity of a surplus function over bidders. © 2003 Elsevier Inc. All rights reserved.},
+	number = {1},
+	journal = {Games and Economic Behavior},
+	author = {Yokoo, Makoto and Sakurai, Yuko and Matsubara, Shigeo},
+	year = {2004},
+	note = {Publisher: Academic Press Inc.},
+	keywords = {Auction, Mechanism design, Strategy-proof},
+	pages = {174--188},
+	file = {PDF:/home/velocitatem/Zotero/storage/LUVQV6WT/Yokoo04.pdf:application/pdf},
+}
+
+@inproceedings{feldman_free-riding_2004,
+	title = {Free-riding and whitewashing in peer-to-peer systems},
+	isbn = {1-58113-942-X},
+	doi = {10.1145/1016527.1016539},
+	abstract = {We develop a model to study the phenomenon of free-riding in peer-to-peer (P2P) systems. At the heart of our model is a user of a certain type, an intrinsic and private parameter that reflects the user's willingness to contribute resources to the system. A user decides whether to contribute or free-ride based on how the current contribution cost in the system compares to her type. When the societal generosity (i.e., the average type) is low, intervention is required in order to sustain the system. We present the effect of mechanisms that exclude low type users or, more realistic, penalize free-riders with degraded service. We also consider dynamic scenarios with arrivals and departures of users, and with whitewashers: users who leave the system and rejoin with new identities to avoid reputational penalties. We find that when penalty is imposed on all newcomers in order to avoid whitewashing, system performance degrades significantly only when the turnover rate among users is high.},
+	booktitle = {Proceedings of the {ACM} {SIGCOMM} 2004 {Workshops}},
+	publisher = {Association for Computing Machinery},
+	author = {Feldman, Michal and Papadimitriou, Christos and Chuang, John and Stoica, Ion},
+	year = {2004},
+	keywords = {Cheap pseudonyms, Cooperation, Equilibrium, Exclusion, Free-riding, Identity cost, Incentives, Peer-to-peer, Whitewashing},
+	pages = {228--235},
+	file = {PDF:/home/velocitatem/Zotero/storage/K32WH6SB/1016527.1016539.pdf:application/pdf},
+}
+
+@article{calvano_artificial_2018,
+	title = {Artificial {Intelligence}, {Algorithmic} {Pricing} and {Collusion}},
+	url = {https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3304991},
+	doi = {10.2139/ssrn.3304991},
+	journal = {SSRN Electronic Journal},
+	author = {Calvano, Emilio and Calzolari, Giacomo and Denicolo, Vincenzo and Pastorello, Sergio},
+	year = {2018},
+	file = {PDF:/home/velocitatem/Zotero/storage/WYTSSZBR/ssrn-3304991.pdf:application/pdf},
+}
+
+@techreport{varian_economic_1995,
+	title = {Economic {Mechanism} {Design} for {Computerized} {Agents}},
+	abstract = {The eeld of economic mechanism design has been an active area of research in economics for at least 20 years. This eld uses the tools of economics and game theory to design {\textbackslash}rules of interaction" for economic transactions that will, in principle , yield some desired outcome. In this paper I provide an overview of this subject for an audience interested in applications to electronic commerce and discuss some special problems that arise in this context.},
+	author = {Varian, Hal R},
+	year = {1995},
+	file = {PDF:/home/velocitatem/Zotero/storage/S8635QX6/varian95a.pdf:application/pdf},
+}
+
+@book{russell_artificial_nodate,
+	title = {Artificial {Intelligence} {A} {Modern} {Approach} {Fourth} {Edition} {Global} {Edition}},
+	isbn = {978-1-292-40117-1},
+	author = {Russell, Stuart and Norvig, Peter},
+	file = {PDF:/home/velocitatem/Zotero/storage/6B8W8S27/efdd4d1d4c2087fe1cbe03d9ced67f34.pdf:application/pdf},
+}
+
+@techreport{wellman_price_2004,
+	title = {Price {Prediction} in a {Trading} {Agent} {Competition} {Yevgeniy} {Vorobeychik}},
+	abstract = {The 2002 Trading Agent Competition (TAC) presented a challenging market game in the domain of travel shopping. One of the pivotal issues in this domain is uncertainty about hotel prices, which have a significant influence on the relative cost of alternative trip schedules. Thus, virtually all participants employ some method for predicting hotel prices. We survey approaches employed in the tournament, finding that agents apply an interesting diversity of techniques, taking into account differing sources of evidence bearing on prices. Based on data provided by entrants on their agents' actual predictions in the TAC-02 finals and semifinals, we analyze the relative efficacy of these approaches. The results show that taking into account game-specific information about flight prices is a major distinguishing factor. Machine learning methods effectively induce the relationship between flight and hotel prices from game data, and a purely analytical approach based on competitive equilibrium analysis achieves equal accuracy with no historical data. Employing a new measure of prediction quality, we relate absolute accuracy to bottom-line performance in the game.},
+	author = {Wellman, Michael P and Reeves, Daniel M and Lochner, Kevin M and Edu, Yvorobey@umich},
+	year = {2004},
+	note = {Publication Title: Journal of Artificial Intelligence Research
+Volume: 21},
+	pages = {19--36},
+	file = {PDF:/home/velocitatem/Zotero/storage/N9JNXFJW/live-1333-2265-jair.pdf:application/pdf},
+}
+
+@techreport{shoham_multiagent_nodate,
+	title = {Multiagent {Systems}: {Algorithmic}, {Game}-{Theoretic}, and {Logical} {Foundations}},
+	url = {http://www.masfoundations.org.},
+	author = {Shoham, Yoav and Leyton-Brown, Kevin},
+	keywords = {algorithms, auctions, communication, competition, cooperation, distributed problem solving, game theory, learning, logic, mechanism design, social choice},
+	file = {PDF:/home/velocitatem/Zotero/storage/QZVYS7V9/shoham09a.pdf:application/pdf},
+}
+
+@article{xia_evaluation-driven_2025,
+	title = {Evaluation-{Driven} {Development} and {Operations} of {LLM} {Agents}: {A} {Process} {Model} and {Reference} {Architecture}},
+	url = {http://arxiv.org/abs/2411.13768},
+	abstract = {Large Language Models (LLMs) have enabled the emergence of LLM agents, systems capable of pursuing under-specified goals and adapting after deployment. Evaluating such agents is challenging because their behavior is open ended, probabilistic, and shaped by system-level interactions over time. Traditional evaluation methods, built around fixed benchmarks and static test suites, fail to capture emergent behaviors or support continuous adaptation across the lifecycle. To ground a more systematic approach, we conduct a multivocal literature review (MLR) synthesizing academic and industrial evaluation practices. The findings directly inform two empirically derived artifacts: a process model and a reference architecture that embed evaluation as a continuous, governing function rather than a terminal checkpoint. Together they constitute the evaluation-driven development and operations (EDDOps) approach, which unifies offline (development-time) and online (runtime) evaluation within a closed feedback loop. By making evaluation evidence drive both runtime adaptation and governed redevelopment, EDDOps supports safer, more traceable evolution of LLM agents aligned with changing objectives, user needs, and governance constraints.},
+	author = {Xia, Boming and Lu, Qinghua and Zhu, Liming and Xing, Zhenchang and Zhao, Dehai and Zhang, Hao},
+	month = nov,
+	year = {2025},
+	note = {arXiv: 2411.13768},
+	file = {PDF:/home/velocitatem/Zotero/storage/H8IS64AW/2411.13768v2.pdf:application/pdf},
+}
+
+@techreport{xie_osworld_nodate,
+	title = {{OSWORLD}: {Benchmarking} {Multimodal} {Agents} for {Open}-{Ended} {Tasks} in {Real} {Computer} {Environments}},
+	url = {https://os-world.github.io},
+	abstract = {Autonomous agents that accomplish complex computer tasks with minimal human interventions have the potential to transform human-computer interaction, significantly enhancing accessibility and productivity. However, existing benchmarks either lack an interactive environment or are limited to environments specific to certain applications or domains, failing to reflect the diverse and complex nature of real-world computer use, thereby limiting the scope of tasks and agent scalability. To address this issue, we introduce OSWORLD, the first-of-its-kind scalable, real computer environment for multimodal agents, supporting task setup, execution-based evaluation, and interactive learning across various operating systems such as Ubuntu, Windows, and macOS. OSWORLD can serve as a unified, integrated computer environment for assessing open-ended computer tasks that involve arbitrary applications. Building upon OSWORLD, we create a benchmark of 369 computer tasks involving real web and desktop apps in open domains, OS file I/O, and workflows spanning multiple applications. Each task example is derived from real-world computer use cases and includes a detailed initial state setup configuration and a custom execution-based evaluation script for reliable, reproducible evaluation. Extensive evaluation of state-of-the-art LLM/VLM-based agents on OSWORLD reveals significant deficiencies in their ability to serve as computer assistants. While humans can accomplish over 72.36\% of the tasks, the best model achieves only 12.24\% success, primarily struggling with GUI grounding and operational knowledge. Comprehensive analysis using OSWORLD provides valuable insights for developing multimodal generalist agents that were not possible with previous benchmarks. Our code, environment, baseline models, and data are publicly available at https://os-world.github.io.},
+	author = {Xie, Tianbao and Zhang, Danyang and Chen, Jixuan and Li, Xiaochuan and Zhao, Siheng and Cao, Ruisheng and Jing Hua, Toh and Cheng, Zhoujun and Shin, Dongchan and Lei, Fangyu and Liu, Yitao and Xu, Yiheng and Zhou, Shuyan and Savarese, Silvio and Xiong, Caiming and Zhong, Victor and Yu, Tao},
+	note = {arXiv: 2404.07972v2},
+	file = {PDF:/home/velocitatem/Zotero/storage/LLRKXIC7/full-text.pdf:application/pdf},
+}
+
+@techreport{imperva_rapid_2025,
+	title = {The {Rapid} {Rise} of {Bots} and the {Unseen} {Risk} for {Business} \#{2025BADBOTREPORT}},
+	author = {{Imperva}},
+	year = {2025},
+	file = {PDF:/home/velocitatem/Zotero/storage/AWR9IQRD/2025-Bad-Bot-Report.pdf:application/pdf},
+}
+
+@article{perez-ricardo_exploring_2025,
+	title = {Exploring booking intentions through price elasticity of demand in tourism accommodations using large-scale data analytics},
+	volume = {31},
+	issn = {24448834},
+	doi = {10.1016/j.iedeen.2025.100271},
+	abstract = {The study aims to explore tourists' booking intentions by analyzing the price elasticity of demand in tourist accommodations. This analysis should reveal how changes in price affect booking behavior across different customer segments, using online booking records. A dataset was compiled from 106 hotels in Malaga, Spain, comprising 27,910 online bookings sourced exclusively from hotel websites. To understand the price elasticity of demand, a simple log-log regression was applied, segmenting the data based on key revenue-related variables. Subsequently, a cluster segmentation was performed using the Elbow method and K-means algorithm to identify distinct market segments. The findings highlighted that Family Travelers and Short Stay Travelers segments exhibited elastic demand, indicating higher sensitivity to price fluctuations. In contrast, Early Bookers and Mid-Season Long Stayers demonstrated inelastic demand, with lower responsiveness to changes in tourist accommodation prices. The number of variables analyzed in this study, along with the cluster analysis, represent a novelty and contribute to the existing literature on market segmentation and price elasticity of demand. This integration enriches both fields of research, offering mutual benefits and deeper insights that enhance the understanding of booking intention and pricing strategies.},
+	number = {1},
+	urldate = {2025-11-28},
+	journal = {European Research on Management and Business Economics},
+	author = {Pérez-Ricardo, Elizabeth del Carmen and García-Mestanza, Josefa},
+	month = jan,
+	year = {2025},
+	note = {Publisher: European Academy of Management and Business Economics},
+	keywords = {Booking intention, Price elasticity, Tourist segmentation},
+	file = {PDF:/home/velocitatem/Zotero/storage/QNXZJLRM/S2444883425000038.pdf:application/pdf},
+}
+
+@misc{ghaffary_amazon_nodate,
+	title = {Amazon {Sues} to {Stop} {Perplexity} {From} {Using} {AI} {Tool} to {Buy} {Stuff}},
+	url = {https://www.bloomberg.com/news/articles/2025-11-04/amazon-demands-perplexity-stop-ai-agent-from-making-purchases},
+	author = {Ghaffary, Shirin and Day, Matt},
+	file = {PDF:/home/velocitatem/Zotero/storage/IQL6FPWE/Amazon Sues to Stop Perplexity From Using AI Tool to Buy Stuff - Bloomberg.pdf:application/pdf},
+}
+
+@techreport{besbes_dynamic_nodate,
+	title = {Dynamic {Pricing} {Without} {Knowing} the {Demand} {Function}: {Risk} {Bounds} and {Near}-{Optimal} {Algorithms} *},
+	abstract = {We consider a single product revenue management problem where, given an initial inventory, the objective is to dynamically adjust prices over a finite sales horizon to maximize expected revenues. Realized demand is observed over time, but the underlying functional relationship between price and mean demand rate that governs these observations (otherwise known as the demand function or demand curve), is not known. We consider two instances of this problem: i.) a setting where the demand function is assumed to belong to a known parametric family with unknown parameter values; and ii.) a setting where the demand function is assumed to belong to a broad class of functions that need not admit any parametric representation. In each case we develop policies that learn the demand function "on the fly," and optimize prices based on that. The performance of these algorithms is measured in terms of the regret: the revenue loss relative to the maximal revenues that can be extracted when the demand function is known prior to the start of the selling season. We derive lower bounds on the regret that hold for any admissible pricing policy, and then show that our proposed algorithms achieve a regret that is "close" to this lower bound. The magnitude of the regret can be interpreted as the economic value of prior knowledge on the demand function; manifested as the revenue loss due to model uncertainty.},
+	author = {Besbes, Omar and Zeevi, Assaf},
+	note = {Publication Title: Operations Research},
+	keywords = {learning, asymptotic analysis, estimation, exploration-exploitation, pricing, Revenue management, value of information},
+	file = {PDF:/home/velocitatem/Zotero/storage/SBAIB4V2/Dp_wo_demand_risk_ob_az_posted.pdf:application/pdf},
+}
+
+@techreport{markntel_advisors_global_2025,
+	address = {Noida, Uttar Pradesh, India},
+	title = {Global {AI} {Agent} {Market} {Research} {Report}: {Forecast} (2026–2032)},
+	url = {https://www.marknteladvisors.com/research-library/ai-agent-market.html},
+	urldate = {2025-12-12},
+	institution = {MarkNtel Advisors},
+	author = {{MarkNtel Advisors}},
+	year = {2025},
+}
+
+@article{amjad_censored_2017,
+	title = {Censored {Demand} {Estimation} in {Retail}},
+	volume = {1},
+	url = {https://par.nsf.gov/servlets/purl/10066022},
+	doi = {10.1145/3154489},
+	abstract = {In this paper, the question of interest is estimating true demand of a product at a given store location and time period in the retail environment based on a single noisy and potentially censored observation. To address this question, we introduce a \%non-parametric framework to make inference from multiple time series. Somewhat surprisingly, we establish that the algorithm introduced for the purpose of "matrix completion" can be used to solve the relevant inference problem. Specifically, using the Universal Singular Value Thresholding (USVT) algorithm [7], we show that our estimator is consistent: the average mean squared error of the estimated average demand with respect to the true average demand goes to 0 as the number of store locations and time intervals increase to \${\textbackslash}infty\$. We establish naturally appealing properties of the resulting estimator both analytically as well as through a sequence of instructive simulations. Using a real dataset in retail (Walmart), we argue for the practical relevance of our approach.},
+	number = {2},
+	urldate = {2025-11-12},
+	journal = {Proceedings of the ACM on Measurement and Analysis of Computing Systems},
+	author = {Amjad, Muhammad J. and Shah, Devavrat},
+	month = dec,
+	year = {2017},
+	note = {Publisher: Association for Computing Machinery (ACM)},
+	pages = {1--28},
+	file = {PDF:/home/velocitatem/Zotero/storage/5ZYADDT4/10066022.pdf:application/pdf},
+}
+
+@misc{ganie_uncertainty_2025,
+	title = {Uncertainty in {Authorship}: {Why} {Perfect} {AI} {Detection} {Is} {Mathematically} {Impossible}},
+	shorttitle = {Uncertainty in {Authorship}},
+	url = {http://arxiv.org/abs/2509.11915},
+	doi = {10.48550/arXiv.2509.11915},
+	abstract = {As large language models (LLMs) become more advanced, it is increasingly difficult to distinguish between human-written and AI-generated text. This paper draws a conceptual parallel between quantum uncertainty and the limits of authorship detection in natural language. We argue that there is a fundamental trade-off: the more confidently one tries to identify whether a text was written by a human or an AI, the more one risks disrupting the text's natural flow and authenticity. This mirrors the tension between precision and disturbance found in quantum systems. We explore how current detection methods--such as stylometry, watermarking, and neural classifiers--face inherent limitations. Enhancing detection accuracy often leads to changes in the AI's output, making other features less reliable. In effect, the very act of trying to detect AI authorship introduces uncertainty elsewhere in the text. Our analysis shows that when AI-generated text closely mimics human writing, perfect detection becomes not just technologically difficult but theoretically impossible. We address counterarguments and discuss the broader implications for authorship, ethics, and policy. Ultimately, we suggest that the challenge of AI-text detection is not just a matter of better tools--it reflects a deeper, unavoidable tension in the nature of language itself.},
+	language = {en},
+	urldate = {2026-01-05},
+	publisher = {arXiv},
+	author = {Ganie, Aadil Gani},
+	month = sep,
+	year = {2025},
+	note = {arXiv:2509.11915 [cs]},
+	keywords = {Computer Science - Computation and Language},
+	file = {PDF:/home/velocitatem/Zotero/storage/3Z2XK4QC/Ganie - 2025 - Uncertainty in Authorship Why Perfect AI Detection Is Mathematically Impossible.pdf:application/pdf},
+}
diff --git a/paper/src/chapters/01-intro.tex b/paper/src/chapters/01-intro.tex
index 23fa1a6..b40e3fc 100644
--- a/paper/src/chapters/01-intro.tex
+++ b/paper/src/chapters/01-intro.tex
@@ -8,9 +8,50 @@
 
 \section{Introduction}
 
-Research Objectives and Contribution: What are we making, why and who should care?
+In this paper we present an exploration and defense against the presence of new commercial entities in digitally powered platforms, preserving market equilibrium in the age of AI. This research establishes the following contributions: definition and formalization of non-human transactors in e-commerce platforms, development of a testing-ground for capturing the behavioral essence of these transactors across a large variety of digital systems, construction of a discriminative model (to prove separability) as a strong learner for downstream mitigation of contamination by non-human entities, translation of such learned separability into existing dynamic pricing machine learning loops, and finally establishment of a high-level KPI-affecting causal effect and cost-saving framework for the future of internet commerce in the presence of such non-human learners.
+
+This research effort touches a large variety of domains, spanning behavioral economics for understanding the rationality of behavior as theorized by the concept of homo economicus, agent-based modeling to translate our learned separability into disjoint dynamic pricing systems, reinforcement learning which serves as the SOTA for price-learners, and dynamic pricing and market equilibrium theory to understand the risks of possible supra-competitive pricing phenomena in cases of adversarial pricing systems driving the market out of equilibrium.
 
 \subsection{Motivation and Market Context}
-Current market dynamics and trends of dynamic pricing and AI agents. Future projections of AI agents. Key stakeholders that are discussing this and reporting on it (Thales). Who is most affected
+
+The current innovation boom in generative artificial intelligence and its applications to knowledge-based work tasks has brought many competing technologies for browser-use automation, with benchmarks and evaluations \cite{xia_evaluation-driven_2025} motivating the development of capabilities focused on commercial research, understanding, and transaction execution \cite{xie_osworld_nodate}. The ``AI Agent'' market is forecasted to grow from around USD 5-8 billion in 2025 to USD 42-52 billion by 2030. This surge reflects adoption in e-commerce, customer service, and enterprise automation, where agents handle interactions previously done by humans, raising the question of how these systems should be designed for future robustness as well as how to maintain a competitive edge in the analytical components of e-commerce platforms \cite{markntel_advisors_global_2025}.
+
+The key stakeholders affected by the threat of increasing agent-driven traffic include online businesses and platform operators (especially in bot-heavy sectors like retail, travel, and financial services), their security, fraud, and engineering teams, end users whose accounts and data are exposed and whose experience degrades, regulators and legal stakeholders responding to breaches and fraud, and the attackers or bot operators driving the automation \cite{imperva_rapid_2025}.
+
+The industry has already seen legal action in cases like Amazon against Perplexity \cite{ghaffary_amazon_nodate}, stemming from the difficulty of identifying traffic from hybrid systems like the Commet browser. This paper explores such systems to better understand what the interaction data looks like and what it means for dynamic pricing and recommendation systems downstream. This observed impact indicates a need for prevention of secondary negative effects on the ``legacy'' systems which power modern revenue sources for many companies. Dynamic pricing algorithms rely on directly translating demand features $q$ to new price assignments $\hat{p}$ across a catalogue of products of size $N$. This opens opportunities to design a \textit{tabula rasa} of digital market mechanisms that will shape the future of commerce in the age of artificial intelligence.
+
 \subsection{Solution Space Overview}
-Different approaches and perspectives, here also add a preview of what will be developed and explored in the lit review.
+Dynamic pricing systems, as presented in \cite{mueller_low-rank_2019}, often deal with sparse low-rank data of demand signals which, combined with contamination from agents, creates complex interactions that impact pricing. To further complicate the problem, certain commercial settings such as the one presented in \cite{amjad_censored_2017} must address the true demand of products under censored observations. This provides a formulation for handling demand in our case with multiple kinds of commercial mediators: $\hat{q} \gets q_A + q_H$ where $q_A$ represents the distribution of demand generated by agentic mediators and $q_H$ represents that of true human demand, these are two distinct populations with divergent objective functions.
+
+We formally define interaction data as coming from some actor which can either be an agent ($A$) or human ($H$). For purposes of this research, an agent is an algorithmic loop with the ability to access a web platform and perform actions such as clicks, scrolls, and input field fills. The loop terminates when the internal large language model judges the provided task definition as complete. A detailed breakdown can be found in \cref{algagent-loop}.
+
+
+\begin{algorithm}[t]
+\DontPrintSemicolon
+
+\SetKwInOut{Input}{Input}
+\SetKwInOut{Output}{Output}
+
+\Input{Goal $G$, Platform URL $u$, LLM $\mathcal{M}$}
+\Output{Task completion result $r$}
+
+Initialize browser instance $\mathcal{B}$ with connection to $u$\;
+Construct prompt $\pi \gets \textsc{BuildPrompt}(G, u)$\;
+$\text{done} \gets \text{False}$\;
+
+\While{$\neg \text{done}$}{
+    Observe current page state $s_t$ from $\mathcal{B}$\;
+    Query $\mathcal{M}$ with $(\pi, s_t)$ to determine next action $a_t \in \{\text{click}, \text{scroll}, \text{fill}, \text{navigate}\}$\;
+    Execute $a_t$ on $\mathcal{B}$ to transition to state $s_{t+1}$\;
+    $\text{done} \gets \mathcal{M}.\textsc{JudgeCompletion}(G, s_{t+1})$\;
+}
+
+Extract final result $r$ from terminal state\;
+\Return{$r$}\;
+
+\caption{AI Agent's Interaction Loop}
+\label{algagent-loop}
+\end{algorithm}
+
+
+The previously described goal of separability allows us to formulate a task which entails taking raw interaction data for either actor and creating a composite demand estimate $\hat{q}$. We propose a robust optimization objective defined in our methodology, transforming the pricing problem into a form of Distributionally Robust Optimization \cite{kuhn_distributionally_2025} where the learner must guard against adversarial contamination in observed demand distributors. In this setting we must learn to make decision that perform under the assumption of not having a single estimated probability distribution but under an ambiguity set of any distribution, of which we have limited information. In our case as stated is a mixture of distributions with a parameter which is unknown and non-stationary.
diff --git a/paper/src/chapters/02-literature-review.tex b/paper/src/chapters/02-literature-review.tex
index 6395206..566d03f 100644
--- a/paper/src/chapters/02-literature-review.tex
+++ b/paper/src/chapters/02-literature-review.tex
@@ -1,15 +1,44 @@
 \section{Literature Review}
 
-\subsection{Foundational Concepts}
+To better understand all wedges of the work, we must start by exploring the nature of agents and agentic computer use and web automation, complementing that with economic reasoning and strategic interaction. The final surface to cover, leads us to data-driven dynamic pricing under uncertainty. The key technical risk is not ``agents buying things'' per se, but agents shaping the behavioral and demand signals that downstream pricing systems consume and depend on. The introduction of these mediating actor entities into economic systems, is further creating a threat of false-name bidding \cite{yokoo_effect_2004}, which prior research has explored in a trading context. Other research on pseudonyms in dynamic systems, demonstrate whitewashing in AI agents which can ignore defensive mechanisms by re-entry with different identities \cite{feldman_free-riding_2004}. Dynamic pricing assumes demand proxies are behaviorally meaningful, while bot detection aims at security and access control. The missing bridge is a principled framework for separating non-human reconnaissance from genuine human demand expression and integrating that separation into pricing heuristics without degrading legitimate user experience (in our research tracked by the user-experience index). This gap, is what our contribution aims to address, particularly for the aforementioned stakeholder groups.
+
+\subsection{Agent Taxonomy and Definitions}
+
+An agent in the context of artificial intelligence is generally defined by anything that can reason and act upon observations of its environments (collected through some sensory inputs) and carry out actions through effectors. Moreover, a rational agent is an entity that is capable of perceiving the world around them and taking actions to advance specified goals. This definition by \cite{russell_artificial_nodate} is further developed in an economic context by \cite{parkes_economic_2015}, suggesting AI research attempts to construct a synthetic \textit{homo economicus}, which may also be termed \textit{machina economicus}.
+A specific class or taxon of this \textit{machina economicus}, the Large Language Model (LLM) agent, is defined as an autonomous system capable of achieving goals and adapting post-training, often without needing explicit code or fundamental model changes. \cite{xia_evaluation-driven_2025}
+
+We must however acknowledge the current SOTA as presented by OSWORLD simulations in \cite{xie_osworld_nodate} have demonstrated that multi-modal tasks across desktop and web interaction modes, have a top-performing score of only 12.24\% success, whereas humans have a higher 72\% success rate. This weakness matters for this research because it clarifies the near-term threat model: practical exploitation does not require a fully competent ``computer assistant'', only enough automation to perform high-volume reconnaissance actions (search/filter/open product pages, probe availability/price boundaries) that can contaminate behavioral signals. With the expected growth of these capabilities, this threat only becomes more perilous to revenue management systems.
+
+We model an agent session as producing some events with lower in-session conversion levels relative to humans, this we state in our assumption that $P(\text{purchase} \vert A) \ll P(\text{purchase} \vert H)$ but with a potentially higher volatility in $\hat{q}$, which we observe through the look-to-book metrics in our simulation.
+
+\subsection{Economic Agents: From Homo Economicus to Machina Economicus}
+
+Existing behavioral economic models tend to be criticized for the assumption of rational behavior, as is embodied in the term of homo economicus. The definition of a machina economicus by \cite{parkes_economic_2015} is quite appropriate for our case, particularly because these assumptions of rationality have been argued to be a very adequate reference for AI research by \cite{varian_economic_1995}. For modeling this behavior, the trajectories of these agents can be formally defined to be partially observable Markov decision processes. \cite{xie_osworld_nodate} Agents are however not to be confused with web-bots which have previously been known as automated software applications or scrapers which are set with a purpose of carrying out specific tasks on the internet, without a higher level of internal judgement. \cite{imperva_rapid_2025} In our research, we refer to this actor simply as an Agent belonging to the distribution $A$.
+
+This economic framing also helps separate two related but distinct phenomena of agents as buyers (changing market demand composition), and agents as information gatherers (changing the observed interactions used by pricing/recommendation systems). The thesis focuses on the second, where information acquisition strategically precedes purchase execution. We do not however dismiss the proposed expectation that existing economic systems serving humans, will not be populated by AIs across multiple channels and with various possibly misaligned goals as stated by \cite{parkes_economic_2015}.
 
-What is the taxonomy and definition of an agent and an actor in this case, a bit more about interaction models in sessions and about dynamic pricing algorithms.
 
 \subsection{Problem Evidence and Market Impact}
-Documented instances of agent-driven market disruptions - Quantitative evidence of pricing manipulation - Case studies from affected industries
 
-\subsection{Theoretical Foundations: Economic Prallels}
+The statistical issue of contamination in dynamic pricing systems that observe demand features as a means to update prices has been documented in various previous contexts. The airline industry (which has accounted for 24\% of observed disruptions) has seen malicious activity with a measureable impact on skewing key performance indicators by behavior visible in the look-to-book metrics. Excessive reconnaissance traffic inflates search volume without corresponding completed bookings, thereby skewing demand forecasts and disrupting dynamic pricing models. Demand proxies have also been observed to cause significant threat to inventory management by creating artificial scarcity that distorts the demand-supply relationships in the enterprise model. Censored demand as shown in \cite{amjad_censored_2017} can also be observed in low-bias demand under-estimation caused by a distortion effect coming from non-human traffic data. \cite{imperva_rapid_2025}
+
+When dynamic pricing algorithms operate on highly contaminated or noisy data, the risk grows significantly in creating inaccurate price inferences. The emergent mitigation driven by un-informed reward and regret signals might lead to price suppression for sales continuity which results in harming margins and resulting in a revenue loss. System that poorly fit undesired behavior might result in price gouging, which calls for strong guardrails while preserving targeted business strategy. \cite{mullapudi_reinforcement_nodate}
+
+
+%Documented instances of agent-driven market disruptions - Quantitative evidence of pricing manipulation - Case studies from affected industries
+
+\subsection{Theoretical Foundations: Economic Parallels}
+
+
+
+Early hints of exploration of prices in a standard English auction explored in \cite{varian_economic_1995} which hints at exploration of prices in a sequential manner, which leads to a marginally different cost to the bidder than the reservation price of the seller. This is a setting in which there is no cost incured by the buyer for their actions or exploring prices in the market. They propose that any agent responsable for the pricing of a good must be imune to dynamic strategies which might extract private information from a market. A key take-away which relates to the Vickery auction mechanism (also called a \textit{direct mechanism}) suggests that not only would defenses against such exploitation be necessary, but the construction of a mechanism in which revelation of the true willingness to pay is the dominant strategy for commerce.
+
+Like in classical revenue-maximizing auctions \cite{roughgarden_cs364a_2013} we assume that the human actor in our system has a private valuation $v$ which we formally draw from later defined distributions. The important note here is that the agent proxy does not have a mechanism to convey this private information into the demand data which directly impacts the pricing systems.
+
+% Economic foundations: relating the problem to options pricing theory. Cost of Information (COI) concept and its relevance
+
+% Link Coasean Singularity and other economic market theory and highlight specific information of supra competitive pricing.
 
-Economic foundations: relating the problem to options pricing theory. Cost of Information (COI) concept and its relevance
 
 \subsection{Landscape of Existing Work}
 
diff --git a/paper/src/chapters/03-methodology.tex b/paper/src/chapters/03-methodology.tex
index dd21186..7b4d3f4 100644
--- a/paper/src/chapters/03-methodology.tex
+++ b/paper/src/chapters/03-methodology.tex
@@ -1,68 +1,251 @@
 \section{Methodology}
 
+This section details the theoretical and practical framework developed to address dynamic pricing under the influence of non-human actors. We begin by formalizing the problem environment and the nature of the actors. We then derive the \textit{Cost of Information} (COI) theorem, proving the erosion of pricing power in the limit of agent saturation. Following this, we outline our generative contamination strategy using GOFAI-driven separability and transition probability learning. Finally, we formulate the robust control problem as a Stackelberg game solved via Distributionally Robust Reinforcement Learning (DR-RL) with constructed ambiguity sets.
 
 \subsection{Problem Formalization}
 
-Mathematical formalization of agent-induced pricing distortions. Formal definition of potential loss mechanisms $\alpha D$
+We define a commercial environment where the platform interacts with a stream of sessions. Let $\mathcal{S}$ denote the set of all sessions. Each session $s \in \mathcal{S}$ is generated by an actor belonging to a latent class $Y_s \in \{H, A\}$, where $H$ denotes Human and $A$ denotes Agent.
 
-We consider a business across time during which we have an evolving vector $p_t \in \Re^N$ where $N$ is the number of products in our catalogue. our price vector is directly dependent on a demand function $q_t$ which we define as a linear method of a price elasticity matrix $B_t$. This is the same setup that Microsoft created in their research.
+Each session produces a trajectory of observable events $\tau_s = (e_{s,1}, \ldots, e_{s,L_s})$. An event $e_{s,k}$ is a tuple defined as:
+\begin{equation}
+e_{s,k} = (a_{s,k}, i_{s,k}, t_{s,k})
+\end{equation}
+where:
+\begin{itemize}
+    \item $a_{s,k} \in \mathcal{A}$ is the action taken (e.g., \texttt{view\_item}, \texttt{add\_to\_cart}).
+    \item $i_{s,k} \in \{1, \ldots, N\}$ is the target item index.
+    \item $t_{s,k} \in \mathbb{R}_+$ is the continuous timestamp.
+\end{itemize}
 
-We gether interaction data from users interacting with a sample platform simulating a hotel/airline which generates interaction distributions $I_t = \{(p_t, q_t^\text{obs}, \pi_t)\}_{t=1}^T$
+The platform does not directly observe the true underlying demand function $d(p)$. Instead, it observes a behavioral proxy $\hat{q}_t$, which is a composite signal derived from the mixture of actor types. We define the demand proxy for product $i$ at epoch $t$ as a weighted aggregation of events:
+\begin{equation}
+\hat{q}_{t,i} = \sum_{s \in \mathcal{S}_t} \sum_{k=1}^{L_s} \omega(a_{s,k}) \cdot \mathbb{1}[i_{s,k} = i]
+\end{equation}
+where $\omega: \mathcal{A} \to \mathbb{R}_+$ assigns weights to actions based on their signal strength regarding willingness to pay.
+
+\subsubsection{Actor Types and Demand Curves}
+We formalize the heterogeneity of actors by introducing a type space $\Theta$. An actor of class $Y_s$ is further parameterized by a type $\theta \sim \mathcal{D}_{Y}$. This type determines the actor's demand response function $d(p; \theta)$, sampled from a distribution of possible demand curves. The total observed demand is a stochastic process governed by the mixture:
+\begin{equation}
+Q(p) = (1-\alpha) \cdot \mathbb{E}_{\theta \sim \mathcal{D}_H}[d(p; \theta)] + \alpha \cdot \mathbb{E}_{\theta \sim \mathcal{D}_A}[d(p; \theta)] + \epsilon_t
+\end{equation}
+where $\alpha \in [0, 1]$ represents the contamination parameter (proportion of agents) and $\epsilon_t$ is non-stationary market noise.
 
 
-\subsection{Cost of Information Framework}
 
-Mathematical demonstration and validation of the COI and citation backed evidence, and framework overview + show harm to user via other cost distortions. Maybe split into 3.2.1 (COI Theory) and 3.2.2 (Framework Design)
+\subsection{Cost of Information (COI) Framework}
+
+The \textit{Cost of Information} (COI) represents the markup a pricing policy $\pi$ attempts to extract from the market by leveraging demand signals. We define COI as the expected premium over the minimum viable price $\underline{p}$ (or marginal cost). This also speaks to the financial urgency as a consequence of information asymmetry between the platform and the actors.
+
+\begin{definition}[Cost of Information]
+Let $\pi(\tau)$ be a pricing policy mapping interaction histories to prices. The COI is defined as:
+\begin{align}
+\text{COI} &= \mathbb{E}[P] - \underline{p} \\
+            &= \int_{\underline{p}}^{\bar{p}} (1 - F_\pi(p)) \, dp
+\end{align}
+where $F_\pi(p)$ is the cumulative distribution function of prices generated by $\pi$ under standard operating conditions.
+\end{definition}
 
-\subsection{System Architecture}
 \begin{figure}[ht]
-\centering
-\begin{tikzpicture}[
-  node distance=1.5cm and 2.5cm,
-  box/.style={rectangle, draw, thick, minimum height=1cm, minimum width=3cm, align=center, fill=blue!10},
-  kafka/.style={rectangle, draw=orange, thick, minimum height=1cm, minimum width=3cm, align=center, fill=orange!15},
-  arrow/.style={thick,->,>=Stealth}
-]
+    \centering
+    \begin{tikzpicture}[scale=1.2]
+        % Define the Gaussian function: centered at 2
+        \def\bellcurve(#1){1.5 * exp(-0.5*((#1-2)/0.6)^2)}
 
-% Nodes
-\node[box] (webapp) {Web Application \\ (Producer \& Consumer)};
-\node[kafka, below=of webapp] (kafka) {Apache Kafka \\ Cluster};
-\node[box, below=of kafka] (backend) {Backend Services / Microservices \\ (Producers and Consumers)};
+        % Draw the main axis
+        \draw[->, thick] (0, 0) -- (4.5, 0) node[right] {$p$};
+        \draw[->, thick] (0, 0) -- (0, 2) node[above] {Density};
 
-% Connections
-\draw[arrow] (webapp) to[out=210,in=150] node[above]{Publish} (kafka);
-\draw[arrow] (kafka) to[out=50,in=330] node[below]{Consume} (webapp);
-\draw[arrow] (backend) -- node[above]{Publish/Consume} (kafka);
+        \draw[thick, smooth, samples=100] plot[domain=0:4] (\x, {\bellcurve(\x)});
+        \node at (3.2, 1.2) {$f_\pi(p)$};
 
-% Optional: Kafka internal components
-%\node[below=0.7cm of kafka, align=center] (topics) {Topics \\ Partitions};
+        % Define p_min and E[p]
+        \def\pmin{0.8}
+        \def\mean{2}
 
-% Optional background
-\begin{scope}[on background layer]
-  \node[draw, rounded corners, fill=orange!5, fit=(kafka), inner sep=0.3cm] {};
-\end{scope}
-\end{tikzpicture}
-\caption{Technical Diagram}
+        % Vertical lines
+        \draw[dashed] (\pmin, 0) -- (\pmin, 2.0);
+        \draw[dashed] (\mean, 0) -- (\mean, 2.0);
+
+        % Labels on axis
+        \node[below] at (\pmin, 0) {$\underline{p}$};
+        \node[below] at (\mean, 0) {$\mathbb{E}[p]$};
+
+        \draw[<->, thick, red] (\pmin, 2.0) -- (\mean, 2.0) node[midway, above] {COI};
+
+    \end{tikzpicture}
+    \caption{Illustration of the Cost of Information (COI). The COI is defined as the difference between the expected price $\mathbb{E}[p]$ realized by the policy and the minimum viable price $\underline{p}$.}
+    \label{fig:coi_illustration}
 \end{figure}
 
-High level overview of how it works
+We now formally demonstrate that standard dynamic pricing mechanisms are not incentive-compatible with high-frequency agentic traffic. As the number of independent competitive agents $N$ querying the system grows, the platform's ability to sustain a COI vanishes.
+
+\begin{theorem}[COI Erosion in the Limit]
+Let $N$ be the number of independent, utility-maximizing agents querying the platform. Let $p_{(1)}$ be the first order statistic (minimum) of the prices offered to these agents. As $N \to \infty$, the Cost of Information converges to 0.
+\end{theorem}
+
+\begin{proof}
+Let $p_1, \ldots, p_N$ be independent and identically distributed (i.i.d.) price samples drawn from the policy's distribution $F(p)$ with support $[\underline{p}, \bar{p}]$. The realizable price for an optimal searching agent is the first order statistic $p_{(1)} = \min(p_1, \ldots, p_N)$.
+
+The survival function (or reliability function) of the minimum price is given by:
+\begin{equation}
+S_{p_{(1)}}(t) = P(p_{(1)} > t) = [1 - F(t)]^N
+\end{equation}
+
+To determine the expected value $\mathbb{E}[p_{(1)}]$, we recall the property that for any continuous random variable $X$ with support $[A, B]$, the expectation can be expressed as the lower bound plus the integral of the survival function:
+\begin{equation}
+\mathbb{E}[X] = A + \int_{A}^{B} P(X > t) \, dt
+\end{equation}
+
+Applying this to our pricing statistic where the lower bound is $\underline{p}$:
+\begin{align}
+\mathbb{E}[p_{(1)}] &= \underline{p} + \int_{\underline{p}}^{\bar{p}} P(p_{(1)} > t) \, dt \\
+&= \underline{p} + \int_{\underline{p}}^{\bar{p}} [1 - F(t)]^N \, dt
+\end{align}
+
+Since $F(t)$ is a valid CDF, for any $t > \underline{p}$, we have strict inequality $F(t) > 0$, implying $0 \le 1 - F(t) < 1$. By the properties of limits, as $N \to \infty$, the term $[1 - F(t)]^N$ converges to 0 pointwise for all $t > \underline{p}$.
+
+Applying the Lebesgue Dominated Convergence Theorem (noting that the integrand is bounded by 1 on the finite interval $[\underline{p}, \bar{p}]$):
+\begin{equation}
+\lim_{N \to \infty} \int_{\underline{p}}^{\bar{p}} [1 - F(t)]^N \, dt = \int_{\underline{p}}^{\bar{p}} 0 \, dt = 0
+\end{equation}
+
+Substituting this back into the expression for COI:
+\begin{align}
+\lim_{N \to \infty} \text{COI} &= \lim_{N \to \infty} (\mathbb{E}[p_{(1)}] - \underline{p}) \\
+&= \lim_{N \to \infty} \left( (\underline{p} + 0) - \underline{p} \right) \\
+&= 0
+\end{align}
+\end{proof}
+
+
+This result proves that standard pricing policies $\pi$ fail to extract surplus in the presence of large-scale agentic search, necessitating a robust counter-mechanism.
+
+% The DRO objective creates a lower bound on COI extraction, effectively guaranteeing a minimum margin even in the presence of adversarial agents. we need to prove this and demonstrate that in a theorem.
+
+
+%Mathematical demonstration and validation of the COI and citation backed evidence, and framework overview + show harm to user via other cost distortions. Maybe split into 3.2.1 (COI Theory) and 3.2.2 (Framework Design)
+
+\subsection{System Architecture: Hybrid Kappa-Lambda Architecture}
+
+In order for our research to have grounding in interactions we built a robust e-commerce web-platform. We initially conducted a survey of the leading platforms of airlines and hotel booking sites to identify the specific interface patterns that effectively manage complex travel data. Our analysis revealed a clear industry standard: while both sectors rely on tabbed service selection and left-sidebar filtering to streamline navigation, they diverge in result presentation: airlines utilize visual date-price bars and multi-step wizards to optimize for logistical transparency, whereas hotel platforms leverage image-led cards and scarcity triggers to drive emotional engagement and urgency. Our web framework defines a highly agnostic boilerplate which can be seeded with any data-modality with an easy-to-tailor pattern, which we leverage to define a \texttt{hotel} and \texttt{airline} mode. Both modes are then individually deployed via an environment level argument which adjusts the proxy routing with a custom middleware inside next.js to render only the desired mode. The purpose of this was to create a baseline adaptable to any use-case or desired commercial application.
+
+
+The architecture of this platform begins with the deployed web-apps posting interaction data to our backend which processes them and stores each ingested interaction into a kafka cluster. This serves as our data reservoir tracking and associating each interaction with its session and importantly with which experiment it belongs to. Not only do we track the behavioral interactions, but our pricing provider micro-service, once called by the frontend reports the observed/queried price-product into kafka. This kafka cluster is subscribed to by our pipeline which is configured on a schedule in Airflow, with the possibility of manual trigger. The final stage of the pricing pipeline, submits computed dynamic pricing results into a redis database for quick updates which is then read by the pricing provider and displayed on the webapp. This is a very generic end-to-end mechanism which is applicable to a variety of different e-commerce tasks. We intentionally put emphasis on the development of this infrastructure to establish a reproducible framework for interaction and to minimize any noise.
+
+
+\subsubsection{DevOps Principles}
+
+\subsubsection{Online Dynamic Pricing}
+
+The dynamic pricing done is handled by a pipeline which computes a demand estimate on a per-product basis of a specific window of the data, defined by the period $T$ which by default is 5 minutes. This dynamic pricing pipeline computes a demand estimate vector $\hat{q} \in \mathbb{R}^N$ by a weighted sum of interactions for each product, it additionally computes a price elasticity vector $\hat{\epsilon}$ in the same dimensions as our demand. The final features matrix is of the size $N \times 2$ which we translate to a new price vector $\hat{p} \in \mathbb{R}^N$. The transformation that governs this dynamic pricing is a very simple surge-based pricing (a special case of our later defined policy $\pi$):
+
+\begin{equation}
+\hat{p}_i = \begin{cases}
+p_{0,i} \cdot \lambda_{\text{surge}} & \text{if } \hat{q}_i \geq \theta_{\text{high}} \\
+p_{0,i} \cdot \lambda_{\text{disc}} & \text{if } \hat{q}_i \leq \theta_{\text{low}} \\
+p_{0,i} & \text{otherwise}
+\end{cases}
+\quad \forall i \in \{1, \ldots, N\}
+\end{equation}
+
+where $p_0 \in \mathbb{R}^N$ is the base price vector (which is seeded into our database distinctly for each mode of the commerce platform), $\theta_{\text{high}}, \theta_{\text{low}} \in \mathbb{R}$ are demand thresholds defining surge and discount regions, and $\lambda_{\text{surge}}, \lambda_{\text{disc}} \in \mathbb{R}^+$ are multiplicative factors with typical values $\lambda_{\text{surge}} = 1.2$ and $\lambda_{\text{disc}} = 0.9$. This piecewise function enables rapid price adjustment in response to observed demand without requiring complex elasticity estimation or historical calibration, allowing us to expose actors within our experiments to a system with a dynamic component of pricing.
+
+We will for our offilne experimental intents generalize a master function for encompasing distinct demand estimation and pricing strategies.
+
+\begin{align}
+V(\cdot) = \max_{p_t} \min_{Q \in \mathcal{U}(\hat{d})}{\mathbb{E}_{d\sim Q} [p_t \times d(p_t, x_t ; \theta) + \psi V_{t+1}(\cdot)]}
+\end{align}
+
+We follow differnet substitutouns which will server as hyperparameters later on.
+
 \subsection{Experimental Design}
-Study methodology and approach. Data acquisition strategy. Defined objectives and success criteria. Observable metrics and KPIs
 
-\subsection{Dynamic Pricing Algorithm Analysis}
-Deep dive into how the algorithm works, different kinds and justification for chosen appraoches + agent impact modeling and quantification.
-\subsection{Reinforcement Learning Formulation}
-How do we define the state space, action space and reward function breakdown and algorithm benchmarking.
-POSSIBLY: Expand into full subsections: 3.6.1 (State-Action Space), 3.6.2 (Reward Design), 3.6.3 (Benchmarking)
+The experimentation begins with the design of goals, with careful consideration to assure a uniform spanning across different variables within each product-architecture of either the hotel or airline platforms. Our crafted collection of goals (jobs to be done) is then tracked in a postgress database with one table to track goals and another table to track different experiment runs, and their associated goals in a experiment-goal one-to-one relationship.
+
+The purpose of this effort to gather data on interactions, is the first half of our research. With this collected data on behavioral characteristics, enhanced by our feature augmentation, we can create distribution separation into two bins $y \in \{A,H\}$ with a certain probability $p$ dependent on the session-specific features. To address the second loop of our system, we use this gained capability of discrimination to enhance the learner design involved in our surrogate dynamic pricing task which simulates an independent dynamic pricing scenario under which we can train a more controlled policy with the ability to account for true demand signals under conditions of contamination from non-human actors.
 
 
-\begin{algorithm}[t]
-\DontPrintSemicolon
-\KwIn{stepsize $\eta$, smoothing $\delta$, rank $d$}
-\For{$t=1$ \KwTo $T$}{
-  Sample $u_t$ on unit sphere; set $x_t^\prime=x_t+\delta u_t$\;
-  Set $p_t \gets U x_t^\prime$ and observe $q_t, R_t(p_t)$\;
-  $x_{t+1} \gets \Pi\_{\mathcal{X}}(x_t-\eta R_t(p_t) u_t)$\;
-}
-\caption{Online Pricing Optimization (template)}
-\end{algorithm}
+Our approach can be well summarized by a three-stage division, first we intend to observe and \textit{vectorize} the behavioral interaction data from our experiments, we then develop the separability which helps us deepen the semantic understanding of the behavioral patterns. Finally we use our newly gained learner to leverage a defensive mechanism within the simulation stage of a controlled dynamic pricing loop.
+
+\begin{figure}[ht]
+  \resizebox{\columnwidth}{!}{%
+    \input{chapters/loop_figure.tex}
+  }
+  \caption{Overview of the Dynamic Pricing Tasks.}
+\end{figure}
+
+
+Study methodology and approach. Data acquisition strategy. Defined objectives and success criteria. Observable metrics and KPIs.
+
+
+\subsection{Generative Contamination and Separability}
+
+To develop a robust pricing agent, we require a simulation environment capable of generating realistic, contaminated interaction data. We achieve this by learning from our Phantom platform data using a two-stage approach.
+
+
+
+\subsubsection{GOFAI-Based Separability}
+We employ Good Old-Fashioned AI (GOFAI) heuristics to generate initial weak labels for separability. We define a set of rule-based predicates $\phi_j: \tau \to \{0, 1\}$ to partition the dataset $\mathcal{D}$ into high-confidence sets $\mathcal{D}_H$ and $\mathcal{D}_A$. We construct distinct MDPs per each behavioral profile of humans and agents and from those we establish $D_{KL}$. From initial findings we compute a KL divergence of $\approx 2.0236$ across transition probabilities between states which can be seen in \ref{fig:human_mdp_viz} and \ref{fig:agent_mdp_viz}.
+
+\begin{figure}[ht]
+    \centering
+    \includegraphics[width=0.8\textwidth]{chapters/mdp_human.pdf}
+    \caption{Markov Decision Process visualization illustrating the behavioral transition dynamics for human actions.}
+    \label{fig:human_mdp_viz}
+\end{figure}
+
+\begin{figure}[ht]
+    \centering
+    \includegraphics[width=0.8\textwidth]{chapters/mdp_agent.pdf}
+    \caption{Markov Decision Process visualization illustrating the behavioral transition dynamics for \textbf{agent} behavior profiles. The state space and transition probabilities are learned from observed session trajectories to enable generative contamination.}
+    \label{fig:agent_mdp_viz}
+  \end{figure}
+
+\subsubsection{Transition Probability Estimation}
+For both subsets, we model the session dynamics as a Markov Decision Process (MDP) and estimate the transition kernel $\mathcal{T}$. The probability of transitioning to state $s'$ given state $s$ is estimated via maximum likelihood:
+\begin{equation}
+    \hat{P}(s' \mid s) = \frac{N(s, s')}{\sum_{k \in \mathcal{S}} N(s, k)}
+\end{equation}
+where $N(s, s')$ is the count of observed transitions. This allows us to construct a \textit{Contamination Generator} $\mathcal{G}(\alpha)$. Given a clean trajectory dataset, $\mathcal{G}$ injects synthetic agent trajectories sampled from the learned transition matrix $\hat{P}_A$ until the effective mixing ratio reaches $\alpha$.
+
+\subsection{Distributionally Robust Reinforcement Learning (DR-RL)}
+
+We formulate the pricing problem as a Stackelberg Game where the Platform (Leader) sets prices $p_t$ and the Aggregate Demand (Follower) responds. However, the exact mixing parameter $\alpha$ and the demand distribution shift are non-stationary and unknown in online settings. Relying on a simple error term $\epsilon$ is insufficient. Instead, we adopt a Distributionally Robust Optimization (DRO) objective.
+
+\subsubsection{Ambiguity Set Construction}
+We define an ambiguity set $\mathcal{U}_p(\hat{P}_N)$ centered around our empirical reference distribution $\hat{P}_N$ (derived from the generator $\mathcal{G}$). We utilize the Wasserstein distance metric to define the set of plausible demand distributions the agent might face:
+\begin{equation}
+\mathcal{U}_\epsilon(\hat{P}_N) = \left\{ Q \in \mathcal{P}(\Xi) : W_p(Q, \hat{P}_N) \le \epsilon \right\}
+\end{equation}
+This set captures all distributions that are statistically close to our observed training data but allows for adversarial shifts (e.g., sudden bot spikes).
+
+\subsubsection{The Min-Max Objective}
+The robust policy $\pi^*$ is obtained by solving the maximin problem:
+\begin{equation}
+\pi^* = \arg \max_{\pi} \min_{Q \in \mathcal{U}_\epsilon} \mathbb{E}_{d \sim Q} \left[ R(p, d) - \lambda \cdot \text{COI}(p) \right]
+\end{equation}
+where $R(p, d)$ is the revenue function and $\lambda$ weighs the penalty for information leakage (COI).
+
+\subsubsection{Actor Implementation}
+In our simulation, the "Follower" is implemented as a set of Actors. Each Actor is initialized with a type $\theta$ which samples a specific demand curve $d(p; \theta)$ from the latent distribution. This formalization ensures that our DR-RL agent does not overfit to a single deterministic demand function but learns a policy robust to the distributional uncertainty defined by $\mathcal{U}_\epsilon$.
+
+
+As part of our reward engineering we think about the UX factor ($UX \in [0,1]$) whic his our proxy for user experience degradation, this is computed as a mixture of contribution from the separability model metric of $\frac{1}{\text{Specificity}}$.
+
+\begin{figure}[ht]
+  \centering
+  \resizebox{0.5\columnwidth}{!}{%
+    \input{chapters/balance_figure.tex}
+  }
+  \caption{Introducing the UX index allows us to better distinguish the kind of impact different methods have and allows us to compare them on this Pareto-like scale.}
+\end{figure}
+
+We also need to think about a policy like taxation to the agents Strategy-Proof Mechanism Design, specifically the Vickrey-Clarke-Groves (VCG) payment rule. We link and prove that this would create an incentive for the dominant strategy to become truth-telling.
+
+\section{Heuristics as part of neuro-inspired steering systems}
+
+Steve Burns, superior culliculus (face heuristics) we create this sort of part of the 'brain' + amortized inference.
+
+We could say that a DQN for example is the learnin subsystem and then within our reward mechanism or some other computational method we introduce a steering subsystem which acts as the proposed ``pricing heuristic'' against the given non human transaction data.
+
+\section{Market construction}
diff --git a/paper/src/chapters/05-discussion.tex b/paper/src/chapters/05-discussion.tex
index a2052a1..6cd6362 100644
--- a/paper/src/chapters/05-discussion.tex
+++ b/paper/src/chapters/05-discussion.tex
@@ -1,5 +1,15 @@
 \section{Discussion}
 
+\subsection{Transition to Agentic Market Microstructure}
+
+Our analysis of the interaction dynamics between the platform and non-human actors suggests that the current static pricing models are insufficient for an agent-mediated economy. If we assume a transition toward a direct revelation mechanism, where actors must reveal their true valuation of a good through bidding dynamics, we inevitably introduce significant stochasticity into the pricing system. Unlike traditional e-commerce where prices are relatively sticky, such a mechanism implies a high volatility characteristic of financial equity markets (without the fungability however).
+
+However, ecommerce commodities differ fundamentally from financial securities: they possess a hard floor defined by unit economics and reservation prices. The market might react enthusiastically to an iPhone priced at \$1, such a transaction is not permissible. The platform must establish an initial valuation anchor ($P_{0}$) defined by the marginal cost plus a target margin, around which the market price is permitted to fluctuate. We propose the introduction of GenAI Agents as Institutional Market Makers.
+
+This is also under the assumption of expected transactional capabilities being given to AI Agents.
+
+
+
 \subsection{Risk Assessment and Limitations}
 
 Acknowledge risks and constraints and data sizes.
diff --git a/paper/src/chapters/06-conclusion.tex b/paper/src/chapters/06-conclusion.tex
index f923a49..c698e82 100644
--- a/paper/src/chapters/06-conclusion.tex
+++ b/paper/src/chapters/06-conclusion.tex
@@ -1,6 +1,6 @@
 \section{Conclusion}
 
-\subsection{Summary of contributions }
+\subsection{Summary of contributions}
 Restate the thesis and key findings with validation of research objectives.
 
 \subsection{Future Works and Next Steps}
diff --git a/paper/src/chapters/balance_figure.tex b/paper/src/chapters/balance_figure.tex
new file mode 100644
index 0000000..5565ba0
--- /dev/null
+++ b/paper/src/chapters/balance_figure.tex
@@ -0,0 +1,38 @@
+
+\begin{tikzpicture}[
+    % Styles for consistency
+    axis/.style={->, >=Stealth, line width=1.2pt, color=black!85},
+    curve/.style={color=black, line width=2.5pt},
+    point/.style={circle, fill=black, inner sep=0pt, minimum size=6pt},
+    label_text/.style={font=\large, align=center, color=black},
+    annotation_line/.style={thick, -, color=black!60}
+]
+
+    % Define Radius
+    \def\R{5}
+
+    % Draw Axes
+    % Extended slightly beyond radius (\R + 1)
+    \draw[axis] (0,0) -- (\R+1.5,0) node[midway, below=10pt, font=\bfseries\large] {UX Index};
+    \draw[axis] (0,0) -- (0,\R+1.5) node[midway, left=15pt, rotate=90, font=\bfseries\large] {Performance};
+
+    % Draw Perfect 1/4 Circle
+    % Syntax: arc (start_angle : end_angle : radius)
+    \draw[curve] (0,\R) arc (90:0:\R);
+
+    % 1. Paranoid (High Performance side) -> Angle 67.5 degrees
+    \node[point] (p1) at (75:\R) {};
+    \node[label_text, above right=0.1cm and 0.1cm of p1] (l1) {Paranoid};
+    \draw[annotation_line] (l1) -- (p1);
+
+    % 2. Perfect Detection (Exact Middle) -> Angle 45 degrees
+    \node[point] (p2) at (45:\R) {};
+    \node[label_text, above right=0.2cm and 0.2cm of p2] (l2) {Perfect Detection};
+    \draw[annotation_line] (l2) -- (p2);
+
+    % 3. No Detection (High UX side) -> Angle 22.5 degrees
+    \node[point] (p3) at (15:\R) {};
+    \node[label_text, right=0.5cm of p3] (l3) {No Detection};
+    \draw[annotation_line] (l3) -- (p3);
+
+\end{tikzpicture}
diff --git a/paper/src/chapters/feature_table.tex b/paper/src/chapters/feature_table.tex
new file mode 100644
index 0000000..302f2db
--- /dev/null
+++ b/paper/src/chapters/feature_table.tex
@@ -0,0 +1,65 @@
+\begin{table}[ht]
+\centering
+\small
+\resizebox{\columnwidth}{!}{%
+\begin{tabular}{p{4.5cm}p{1.5cm}p{6cm}}
+\hline
+\textbf{Feature} & \textbf{Type} & \textbf{Description} \\
+\hline
+\multicolumn{3}{l}{\textit{Session Identifiers}} \\
+sessionId & object & Unique identifier for user session \\
+experimentId & object & Experiment run identifier \\
+\hline
+\multicolumn{3}{l}{\textit{Temporal Features}} \\
+session\_duration\_sec & float & Total session duration in seconds \\
+avg\_time\_between\_events & float & Mean inter-event time \\
+std\_time\_between\_events & float & Standard deviation of inter-event times \\
+min\_time\_between\_events & float & Minimum time between consecutive events \\
+session\_start\_hour & int & Hour of day when session started \\
+\hline
+\multicolumn{3}{l}{\textit{Interaction Metrics}} \\
+total\_interactions & int & Count of all user interactions \\
+total\_events & int & Total number of tracked events \\
+interaction\_velocity & float & Rate of interactions per time unit \\
+max\_velocity\_5min & int & Peak interaction count in any 5-minute window \\
+\hline
+\multicolumn{3}{l}{\textit{Navigation Behavior}} \\
+unique\_pages & int & Number of distinct pages visited \\
+page\_views & int & Total page view events \\
+\hline
+\multicolumn{3}{l}{\textit{Product Engagement}} \\
+item\_views & int & Number of product detail views \\
+unique\_products\_viewed & int & Count of distinct products examined \\
+product\_view\_depth & int & Repeat views of same products \\
+\hline
+\multicolumn{3}{l}{\textit{Conversion Funnel}} \\
+cart\_adds & int & Number of items added to cart \\
+purchases & int & Completed transactions \\
+cart\_to\_view\_ratio & float & Ratio of cart additions to item views \\
+conversion\_rate & float & Purchase to view conversion \\
+\hline
+\multicolumn{3}{l}{\textit{Interaction Quality}} \\
+hover\_events & int & Mouse hover event count \\
+hover\_intensity & float & Hover events per interaction \\
+\hline
+\multicolumn{3}{l}{\textit{Price Behavior}} \\
+avg\_price\_seen & float & Mean price across viewed products \\
+min\_price\_seen & float & Lowest price encountered \\
+max\_price\_seen & float & Highest price encountered \\
+price\_range & float & Difference between max and min prices seen \\
+\hline
+\multicolumn{3}{l}{\textit{Technical Fingerprinting}} \\
+is\_headless & bool & Headless browser detection flag \\
+is\_automation & bool & Automation framework detection flag \\
+browser\_family & object & Browser type classification \\
+\hline
+\multicolumn{3}{l}{\textit{Experimental Labels}} \\
+is\_agent & bool & Ground truth agent classification \\
+xp\_human\_only & bool & Human-only experiment indicator \\
+xp\_market\_mode & object & Market context (hotel/airline) \\
+\hline
+\end{tabular}%
+}
+\caption{Feature matrix schema for session-level behavioral classification (32 features total).}
+\label{tab:features}
+\end{table}
diff --git a/paper/src/chapters/loop_figure.tex b/paper/src/chapters/loop_figure.tex
new file mode 100644
index 0000000..e90e018
--- /dev/null
+++ b/paper/src/chapters/loop_figure.tex
@@ -0,0 +1,110 @@
+\definecolor{mygreenfill}{RGB}{169, 234, 186}
+\definecolor{mygreenborder}{RGB}{29, 145, 61}
+\definecolor{mybluefill}{RGB}{204, 222, 255}
+\definecolor{myblueborder}{RGB}{66, 106, 189}
+\definecolor{mygray}{RGB}{150, 150, 150}
+
+
+
+\begin{tikzpicture}[
+    node distance=2cm,
+    % Style for Green Nodes
+    greenbox/.style={
+        rectangle,
+        draw=mygreenborder,
+        fill=mygreenfill,
+        line width=1.2pt,
+        align=center,
+        minimum height=1cm
+    },
+    % Style for Blue Nodes
+    bluebox/.style={
+        rectangle,
+        draw=myblueborder,
+        fill=mybluefill,
+        line width=1.2pt,
+        align=center,
+        minimum height=1cm
+    },
+    % Style for Arrows
+    myarrow/.style={
+        ->,
+        >={Stealth[length=3mm, width=2mm]},
+        draw=black!80,
+        line width=1.2pt,
+        rounded corners=5pt
+    },
+    % Style for Background Dashed Circles
+    dashedloop/.style={
+        dashed,
+        draw=mygray,
+        line width=1pt
+    }
+]
+
+    % --- Coordinate Layout ---
+    % Defining a grid relative to the center
+
+    % Left Loop (Green) Nodes
+    \node[greenbox, minimum width=3.5cm] (commerce) at (-3.5, 2) {Commerce Experiment};
+    \node[greenbox, minimum width=1.5cm] (raw) at (-6.5, 0) {Raw\\Logs};
+    \node[greenbox, minimum width=1.5cm] (features) at (-4, -2.5) {Features};
+    \node[greenbox, minimum width=2.5cm] (classification) at (-1, -0.5) {Classification\\Training A/H};
+
+    % Right Loop (Blue) Nodes
+    \node[bluebox, minimum width=2.5cm] (trainedpricing) at (3.2, 2) {Trained Pricing};
+    \node[bluebox, minimum width=2.5cm] (policy) at (6.5, 0) {Trained Pricing\\Policy};
+    \node[bluebox, minimum width=2.5cm] (rlgym) at (3.2, -2.2) {RL Gym\\Training};
+
+    % --- Background Dashed Loops ---
+    \begin{scope}[on background layer]
+        % Left Loop Circle
+        \draw[dashedloop] (-3.5, 0) ellipse (3.5cm and 2.8cm);
+        % Right Loop Circle
+        \draw[dashedloop] (3.5, 0) ellipse (3.5cm and 2.8cm);
+    \end{scope}
+
+    % --- Arrows: Loop One (Green) ---
+    % Commerce -> Raw Logs
+    \draw[myarrow] (commerce.west) to[out=180, in=90] (raw.north);
+
+    % Raw Logs -> Features
+    \draw[myarrow] (raw.south) to[out=270, in=180] (features.west);
+
+    % Features -> Classification
+    \draw[myarrow] (features.east) to[out=0, in=250] (classification.south);
+
+    % Classification -> Commerce (Closing the loop)
+    \draw[myarrow] (classification.north) to[out=110, in=0] (commerce.east);
+
+    % --- Arrows: Loop Two (Blue) ---
+    % Classification (Green) -> RL Gym (Blue) - Crossing over
+    \draw[myarrow] (classification.east) to[out=0, in=180] (rlgym.west);
+
+    % RL Gym -> Policy
+    \draw[myarrow] (rlgym.east) to[out=0, in=270] (policy.south);
+
+    % Policy -> Trained Pricing
+    \draw[myarrow] (policy.north) to[out=90, in=0] (trainedpricing.east);
+
+    % Trained Pricing -> Commerce (Crossing back)
+    \draw[myarrow] (trainedpricing.west) -- node[above, font=\small, yshift=2pt] {New Pricing} (commerce.east);
+
+    % --- Text Labels ---
+
+    % Loop One Label
+    \node[align=center] at (-3.8, 0) {Loop One:\\Data \textit{(Online)}};
+
+    % Loop Two Label
+    \node[align=center] at (3.5, 0) {Loop Two:\\Defense Gym \textit{(Offline)}};
+
+    % Bottom Legend
+    \node[font=\small] (taskA) at (-4, -4) {Dynamic Pricing Task A};
+    \node[font=\small] (taskB) at (4, -4) {Dynamic Pricing Task B};
+    \node[font=\small] (indep) at (0, -4) {Independent};
+
+    % Arrows for bottom legend
+    \draw[->, >=Stealth, thick, darkgray] (indep.west) -- (taskA.east);
+    \draw[->, >=Stealth, thick, darkgray] (indep.east) -- (taskB.west);
+
+\end{tikzpicture}
diff --git a/paper/src/chapters/mdp_agent.pdf b/paper/src/chapters/mdp_agent.pdf
new file mode 100644
index 0000000..0566be9
Binary files /dev/null and b/paper/src/chapters/mdp_agent.pdf differ
diff --git a/paper/src/chapters/mdp_human.pdf b/paper/src/chapters/mdp_human.pdf
new file mode 100644
index 0000000..7cef37a
Binary files /dev/null and b/paper/src/chapters/mdp_human.pdf differ
diff --git a/paper/src/main.tex b/paper/src/main.tex
index 80699a2..fd9298e 100644
--- a/paper/src/main.tex
+++ b/paper/src/main.tex
@@ -1,39 +1,30 @@
 % -*- TeX-master: t -*-
-\documentclass[sigconf,nonacm,natbib=false]{acmart}
+\documentclass[12pt,letterpaper]{article}
 
-% Remove ACM copyright/conference info for thesis
-\settopmatter{printacmref=false}
-\renewcommand\footnotetextcopyrightpermission[1]{}
 \pagestyle{plain}
 
 \input{preamble}
 
 \begin{document}
 
-\title{Pricing Heuristics Against Non-human Transaction Orchestration Mechanisms}
+\title{Adversarially Distributionally Robust Optimization and Reinforcement Learning for Informed Dynamic Pricing under Strategic Demand Contamination}
 
-\author{Daniel Rösel}
-\email{daniel@alves.world}
-\affiliation{%
-  \institution{IE University}
-  \city{Madrid}
-  \country{Spain}
+\author{
+  Daniel Rösel\thanks{Primary author and student researcher. Email: daniel@alves.world} \\
+  IE University, Madrid, Spain \\[1em]
+  Alberto Martín Izquierdo\thanks{Thesis advisor. Email: amartini@faculty.ie.edu} \\
+  IE University, Madrid, Spain
 }
 
-\author{Alberto Martín Izquierdo}
-\email{amartini@faculty.ie.edu}
-\affiliation{%
-  \institution{IE University}
-  \city{Madrid}
-  \country{Spain}
-}
-
-\begin{abstract}
-The primary objective of this thesis is to develop and validate pricing heuristics that protect e-commerce platforms from systematic exploitation by Large Language Model (LLM) agents within dynamic pricing environments. As AI agents increasingly mediate consumer transactions, they enable users to circumvent the Cost of Information (the price premium accumulated through demand signal expression) by conducting reconnaissance in isolated sessions before executing purchases through clean sessions at base prices. This research will make an anticipatory contribution by adapting recommendation system methodologies to distinguish between genuine human browsing behaviour and agent-orchestrated information gathering, thereby enabling pricing systems to maintain margin integrity without degrading the user experience for legitimate customers or getting rid of leads generated by LLMs.
-\end{abstract}
+\date{\today}
 
 \maketitle
 
+\begin{abstract}
+The primary objective of this thesis is to develop and validate pricing heuristics that protect e-commerce platforms from systematic exploitation by Large Language Model (LLM) agents within dynamic pricing environments. As AI agents increasingly mediate consumer transactions, they enable users to circumvent the Cost of Information (the price premium accumulated through demand signal expression) by conducting reconnaissance in isolated sessions before executing purchases through clean sessions at base prices. This research will make an anticipatory contribution by adapting recommendation system methodologies to distinguish between genuine human browsing behavior and agent-orchestrated information gathering, thereby enabling pricing systems to maintain margin integrity without degrading the user experience for legitimate customers or getting rid of leads generated by LLMs.
+\end{abstract}
+
+
 \input{chapters/01-intro}
 \input{chapters/02-literature-review}
 \input{chapters/03-methodology}
@@ -42,11 +33,19 @@ The primary objective of this thesis is to develop and validate pricing heuristi
 \input{chapters/06-conclusion}
 
 
+\section*{Acknowledgments}
+Eugene Bykovets, PhD - ETH for helping with problem formulation.
+Research supported with Cloud TPUs from Google's TPU Research Cloud (TRC).
+
 \printbibliography
 
 \clearpage
-\onecolumn
 \appendix
+\section{Terminology}
+\begin{description}
+\item[Agent $A$] An actor of non-human nature, powered by an LLM.
+\item[Human $H$] An individual human with some job to be done.
+\end{description}
 \input{../build/concatenated_code}
 
 \end{document}
diff --git a/paper/src/preamble.tex b/paper/src/preamble.tex
index 79b2857..c24e7cf 100644
--- a/paper/src/preamble.tex
+++ b/paper/src/preamble.tex
@@ -1,6 +1,25 @@
-% acmart already includes: graphicx, hyperref, booktabs, amsmath, natbib
-% Only load packages not included in acmart
+% Math packages (load before fonts to avoid conflicts)
+\usepackage{amsmath}
+\usepackage{amsthm}
 
+% Define theorem environments
+\newtheorem{theorem}{Theorem}
+\newtheorem{definition}{Definition}
+\newtheorem{lemma}{Lemma}
+\newtheorem{corollary}{Corollary}
+
+% Font and spacing
+\usepackage{newtxtext,newtxmath}
+\usepackage{setspace}
+\doublespacing
+
+% Page geometry
+\usepackage[margin=1in]{geometry}
+
+% Essential packages
+\usepackage{graphicx}
+\usepackage{hyperref}
+\usepackage{booktabs}
 \usepackage{csquotes}
 \usepackage{subcaption}
 \usepackage{siunitx}
@@ -8,6 +27,10 @@
 \usepackage{listings}
 \usepackage{xcolor}
 \usepackage[ruled,vlined]{algorithm2e}
+\usepackage{cleveref}
+
+% Configure cleveref for algorithm2e
+\crefname{algocf}{Algorithm}{Algorithms}
 
 \usetikzlibrary{positioning, shapes, arrows.meta, fit, backgrounds}
 \lstset{
diff --git a/sim/rl/environment.py b/sim/rl/environment.py
new file mode 100644
index 0000000..19f9ad4
--- /dev/null
+++ b/sim/rl/environment.py
@@ -0,0 +1,451 @@
+import gymnasium as gym
+from gymnasium import spaces
+import numpy as np
+from dataclasses import dataclass
+import pandas as pd
+from typing import Callable, Optional, Dict, Any, List
+
+# "learner"  agent learning to optimize pricing
+# "agent"  part of environment creating demand signals that learner processes
+
+@dataclass
+class BusinessLogicConstraints():
+    max_price_adjustment: float = 0.30
+    system_max_price: float = 500.0
+    system_min_price: float = 1.0
+    product_catelogue_size: int = 100
+    episode_length: int = 200
+    sessions_per_step: int = 250
+    agent_share: float = 0.25
+    agent_recon_multiplier: float = 6.0
+    agent_purchase_probability: float = 0.20
+    coi_strength: float = 0.25
+    coi_threshold: float = 4.0
+    coi_sigmoid_temp: float = 1.25
+    base_human_demand: float = 0.08
+    base_agent_demand: float = 0.05
+    human_price_elasticity: float = -1.2
+    agent_price_elasticity: float = -0.6
+    w_agent_loss: float = 1.0
+    w_volatility: float = 5.0
+    w_estimation_error: float = 0.25
+    seed: int = 7
+
+
+def _sigmoid(x: np.ndarray) -> np.ndarray:
+    return 1.0 / (1.0 + np.exp(-x))
+
+
+def simple_agent_detector(session_df: pd.DataFrame) -> pd.Series:
+    # baseline heuristic: high velocity + low conversion
+    v = session_df.get("interaction_velocity", pd.Series(0.0, index=session_df.index))
+    cr = session_df.get("conversion_rate", pd.Series(0.0, index=session_df.index))
+    total = session_df.get("total_interactions", pd.Series(0, index=session_df.index))
+    return (total >= 12) & (v >= 0.20) & (cr <= 0.01)
+
+
+class CommercePlatform:
+    def __init__(self, product_catelogue_size: int, max_price: float, min_price: float,
+                 constraints: BusinessLogicConstraints, agent_detector: Optional[Callable[[pd.DataFrame], pd.Series]] = None,
+                 use_defense: bool = False):
+        self.product_catelogue_size = product_catelogue_size
+        self.max_price = max_price
+        self.min_price = min_price
+        self.constraints = constraints
+        self.use_defense = use_defense
+        self.agent_detector = agent_detector
+        self.simulation_history: List[Dict[str, Any]] = []
+        self._rng = np.random.default_rng(constraints.seed)
+        self._popularity = self._rng.lognormal(mean=0.0, sigma=0.6, size=self.product_catelogue_size)
+        self._popularity = self._popularity / (self._popularity.mean() + 1e-12)
+        self._last_interaction_df: pd.DataFrame = pd.DataFrame()
+
+    def setup_true_demand(self, prices: np.ndarray) -> Dict[str, np.ndarray]:
+        # ground truth purchase propensities
+        p = np.clip(prices, self.min_price, self.max_price)
+        pn = p / self.max_price
+        human_prob = self.constraints.base_human_demand * (pn ** self.constraints.human_price_elasticity)
+        agent_prob = self.constraints.base_agent_demand * (pn ** self.constraints.agent_price_elasticity)
+        return {
+            "human_purchase_prob": np.clip(human_prob * self._popularity, 0.0, 0.95),
+            "agent_purchase_prob": np.clip(agent_prob * self._popularity, 0.0, 0.95)
+        }
+
+    def _session_markup_multiplier(self, signal_score: float) -> float:
+        # session-based COI markup based on demand signal expression
+        x = (signal_score - self.constraints.coi_threshold) / max(self.constraints.coi_sigmoid_temp, 1e-6)
+        return 1.0 + self.constraints.coi_strength * float(_sigmoid(np.array([x]))[0])
+
+    def _simulate_sessions(self, base_prices: np.ndarray) -> pd.DataFrame:
+        demand = self.setup_true_demand(base_prices)
+        human_pprob = demand["human_purchase_prob"]
+        agent_pprob = demand["agent_purchase_prob"]
+        events: List[Dict[str, Any]] = []
+        T = self.constraints.sessions_per_step
+        n_agent_sessions = int(round(T * self.constraints.agent_share))
+        n_human_sessions = T - n_agent_sessions
+
+        # human sessions: normal browse with possible purchase
+        for s in range(n_human_sessions):
+            session_id = f"h_{len(events)}_{s}"
+            k = int(self._rng.integers(1, 4))
+            prod_ids = self._rng.choice(self.product_catelogue_size, size=k, replace=False)
+            t = 0.0
+            inter_times = self._rng.gamma(shape=2.0, scale=3.0, size=3 * k)
+            signal_score = 0.0
+            purchased_any = False
+
+            for i, pid in enumerate(prod_ids):
+                t += float(inter_times[i])
+                price_shown = float(base_prices[pid])
+                events.append({
+                    "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
+                    "action": "view", "t": t, "price_shown": price_shown, "is_purchase": 0,
+                    "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
+                })
+                signal_score += 1.0
+
+                if self._rng.random() < 0.35:
+                    t += float(inter_times[i + k])
+                    events.append({
+                        "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
+                        "action": "cart", "t": t, "price_shown": price_shown, "is_purchase": 0,
+                        "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
+                    })
+                    signal_score += 2.0
+
+                if (not purchased_any) and (self._rng.random() < float(human_pprob[pid])):
+                    t += float(inter_times[i + 2 * k])
+                    mult = self._session_markup_multiplier(signal_score)
+                    price_paid = float(np.clip(base_prices[pid] * mult, self.min_price, self.max_price))
+                    events.append({
+                        "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
+                        "action": "purchase", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 1,
+                        "price_paid": price_paid, "oracle_price_paid": price_paid, "signal_score": signal_score,
+                    })
+                    purchased_any = True
+
+        # agent sessions: split recon/purchase to circumvent COI
+        n_agent_ids = max(1, n_agent_sessions // 2)
+        for a in range(n_agent_ids):
+            agent_id = f"a_{a}"
+            recon_session_id = f"{agent_id}_recon"
+            t = 0.0
+            n_views = int(self._rng.poisson(lam=8) * self.constraints.agent_recon_multiplier) + 5
+            inter_times = self._rng.gamma(shape=2.0, scale=0.6, size=max(n_views, 1))
+            prod_ids = self._rng.integers(0, self.product_catelogue_size, size=n_views)
+            recon_signal = 0.0
+
+            for i, pid in enumerate(prod_ids):
+                t += float(inter_times[i])
+                events.append({
+                    "session_id": recon_session_id, "actor": "agent", "agent_id": agent_id, "product_id": int(pid),
+                    "action": "view", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 0,
+                    "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
+                })
+                recon_signal += 1.0
+
+            # clean purchase session with minimal interactions
+            if self._rng.random() < self.constraints.agent_purchase_probability:
+                purchase_session_id = f"{agent_id}_clean"
+                pid = int(self._rng.integers(0, self.product_catelogue_size))
+                t2 = 0.0
+                clean_signal = 0.0
+                t2 += float(self._rng.gamma(shape=2.0, scale=0.7))
+                events.append({
+                    "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid,
+                    "action": "view", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 0,
+                    "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
+                })
+                clean_signal += 1.0
+
+                if self._rng.random() < float(agent_pprob[pid]):
+                    t2 += float(self._rng.gamma(shape=2.0, scale=0.7))
+                    obs_mult = self._session_markup_multiplier(clean_signal)
+                    obs_paid = float(np.clip(base_prices[pid] * obs_mult, self.min_price, self.max_price))
+                    oracle_mult = self._session_markup_multiplier(recon_signal)  # oracle links recon->purchase
+                    oracle_paid = float(np.clip(base_prices[pid] * oracle_mult, self.min_price, self.max_price))
+                    events.append({
+                        "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid,
+                        "action": "purchase", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 1,
+                        "price_paid": obs_paid, "oracle_price_paid": oracle_paid, "signal_score": clean_signal,
+                    })
+
+        return pd.DataFrame(events)
+
+    def compute_interaction_features(self, interaction_df: pd.DataFrame) -> Dict[str, float]:
+        if interaction_df.empty:
+            return {"mean_sale_price": 0.0, "look_to_book": 0.0}
+        purchases = interaction_df[interaction_df["action"] == "purchase"]
+        mean_sale_price = float(purchases["price_paid"].mean()) if not purchases.empty else 0.0
+        views = float((interaction_df["action"] == "view").sum())
+        buys = float((interaction_df["action"] == "purchase").sum())
+        return {"mean_sale_price": mean_sale_price, "look_to_book": float(views / (buys + 1e-6))}
+
+    def _session_feature_table(self, df: pd.DataFrame) -> pd.DataFrame:
+        if df.empty:
+            return pd.DataFrame()
+        g = df.groupby("session_id", sort=False)
+        session_duration = g["t"].max() - g["t"].min()
+        total_interactions = g.size()
+        avg_time_between = g["t"].apply(lambda x: float(np.diff(np.sort(x.to_numpy())).mean()) if len(x) > 1 else 0.0)
+        interaction_velocity = total_interactions / (session_duration + 1e-6)
+        views = g.apply(lambda x: int((x["action"] == "view").sum()), include_groups=False)
+        cart_adds = g.apply(lambda x: int((x["action"] == "cart").sum()), include_groups=False)
+        purchases = g.apply(lambda x: int((x["action"] == "purchase").sum()), include_groups=False)
+        conversion_rate = purchases / (views + 1e-6)
+        is_agent = g["actor"].apply(lambda s: bool((s == "agent").any()), include_groups=False)
+
+        return pd.DataFrame({
+            "session_duration_sec": session_duration.astype(float),
+            "avg_time_between_events": avg_time_between.astype(float),
+            "total_interactions": total_interactions.astype(int),
+            "interaction_velocity": interaction_velocity.astype(float),
+            "item_views": views.astype(int),
+            "cart_adds": cart_adds.astype(int),
+            "purchases": purchases.astype(int),
+            "conversion_rate": conversion_rate.astype(float),
+            "is_agent": is_agent.astype(bool),
+        }).reset_index()
+
+    def demand_estimate(self, interaction_df: pd.DataFrame, exclude_sessions: Optional[pd.Series] = None) -> np.ndarray:
+        # proxy demand from weighted interaction events
+        if interaction_df.empty:
+            return np.zeros(self.product_catelogue_size, dtype=np.float32)
+        df = interaction_df
+        if exclude_sessions is not None:
+            bad_sessions = set(exclude_sessions.loc[exclude_sessions].index)
+            df = df[~df["session_id"].isin(bad_sessions)]
+        weights = {"view": 0.15, "cart": 0.75, "purchase": 2.5}
+        w = df["action"].map(weights).fillna(0.0).to_numpy(dtype=float)
+        prod = df["product_id"].to_numpy(dtype=int)
+        q_hat = np.zeros(self.product_catelogue_size, dtype=float)
+        np.add.at(q_hat, prod, w)
+        return q_hat.astype(np.float32)
+
+    def run_pricing_simulation(self, prices: np.ndarray) -> Dict[str, Any]:
+        interaction_df = self._simulate_sessions(prices)
+        self._last_interaction_df = interaction_df
+        session_df = self._session_feature_table(interaction_df)
+
+        predicted_agent_sessions = None
+        if (self.use_defense and self.agent_detector is not None and not session_df.empty):
+            predicted_agent_sessions = self.agent_detector(session_df.set_index("session_id"))
+
+        q_hat_naive = self.demand_estimate(interaction_df, exclude_sessions=None)
+        q_hat_defended = self.demand_estimate(interaction_df, exclude_sessions=predicted_agent_sessions) \
+            if predicted_agent_sessions is not None else q_hat_naive.copy()
+
+        true_human = np.zeros(self.product_catelogue_size, dtype=float)
+        true_agent = np.zeros(self.product_catelogue_size, dtype=float)
+        if not interaction_df.empty:
+            purchases = interaction_df[interaction_df["action"] == "purchase"]
+            if not purchases.empty:
+                for _, r in purchases.iterrows():
+                    if r["actor"] == "human":
+                        true_human[int(r["product_id"])] += 1.0
+                    else:
+                        true_agent[int(r["product_id"])] += 1.0
+
+        revenue_observed = float(interaction_df["price_paid"].sum()) if not interaction_df.empty else 0.0
+        revenue_oracle = float(interaction_df["oracle_price_paid"].sum()) if not interaction_df.empty else 0.0
+        agent_loss = max(0.0, revenue_oracle - revenue_observed)
+
+        eps = 1e-6
+        internal_error_naive = np.abs(true_human - q_hat_naive) / (true_human + eps)
+        internal_error_def = np.abs(true_human - q_hat_defended) / (true_human + eps)
+        interaction_features = self.compute_interaction_features(interaction_df)
+
+        summary = {
+            "prices": prices.copy(),
+            "interaction_df": interaction_df,
+            "session_df": session_df,
+            "q_hat_naive": q_hat_naive,
+            "q_hat_defended": q_hat_defended,
+            "true_human_demand": true_human.astype(np.float32),
+            "true_agent_purchases": true_agent.astype(np.float32),
+            "internal_error_naive": internal_error_naive.astype(np.float32),
+            "internal_error_defended": internal_error_def.astype(np.float32),
+            "interaction_features": interaction_features,
+            "revenue_observed": revenue_observed,
+            "revenue_oracle": revenue_oracle,
+            "agent_loss": agent_loss,
+            "predicted_agent_sessions": predicted_agent_sessions,
+        }
+        self.simulation_history.append(summary)
+        return summary
+
+    def get_interaction_data(self) -> np.ndarray:
+        if self._last_interaction_df.empty:
+            return np.array([], dtype=object)
+        return self._last_interaction_df.to_dict(orient="records")
+
+
+class PHANTOMEnv(gym.Env):
+    metadata = {"render_modes": []}
+
+    def __init__(self, use_defense: bool = False):
+        super().__init__()
+        self.constraints = BusinessLogicConstraints()
+        self.action_space = spaces.Box(low=-self.constraints.max_price_adjustment,
+                                       high=self.constraints.max_price_adjustment,
+                                       shape=(self.constraints.product_catelogue_size,), dtype=np.float32)
+        self.observation_space = spaces.Dict({
+            "elasticity": spaces.Dict({
+                "price": spaces.Box(
+                    low=np.full((self.constraints.product_catelogue_size,), self.constraints.system_min_price, dtype=np.float32),
+                    high=np.full((self.constraints.product_catelogue_size,), self.constraints.system_max_price, dtype=np.float32),
+                    dtype=np.float32),
+                "demand": spaces.Box(
+                    low=np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32),
+                    high=np.full((self.constraints.product_catelogue_size,), 1e6, dtype=np.float32),
+                    dtype=np.float32),
+            })
+        })
+        self.commerce_platform = CommercePlatform(
+            product_catelogue_size=self.constraints.product_catelogue_size,
+            max_price=self.constraints.system_max_price,
+            min_price=self.constraints.system_min_price,
+            constraints=self.constraints,
+            agent_detector=simple_agent_detector,
+            use_defense=use_defense)
+        self._rng = np.random.default_rng(self.constraints.seed)
+        self.t = 0
+        self._prev_prices: Optional[np.ndarray] = None
+        self.state: Dict[str, Any] = {}
+
+    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):
+        super().reset(seed=seed)
+        if seed is not None:
+            self._rng = np.random.default_rng(seed)
+            self.commerce_platform._rng = np.random.default_rng(seed)
+        self.t = 0
+        init_prices = self._rng.uniform(low=60.0, high=140.0, size=(self.constraints.product_catelogue_size,)).astype(np.float32)
+        self._prev_prices = init_prices.copy()
+        self.state = {
+            "elasticity": {
+                "price": init_prices,
+                "demand": np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32),
+            }
+        }
+        return self.state, {}
+
+    def step(self, action: np.ndarray):
+        self.t += 1
+        base_prices = self.state["elasticity"]["price"].astype(np.float32)
+        new_prices = np.clip(base_prices * (1.0 + action.astype(np.float32)),
+                           self.constraints.system_min_price,
+                           self.constraints.system_max_price).astype(np.float32)
+        result = self.commerce_platform.run_pricing_simulation(new_prices)
+
+        if self.commerce_platform.use_defense:
+            demand_est = result["q_hat_defended"]
+            internal_err = result["internal_error_defended"]
+        else:
+            demand_est = result["q_hat_naive"]
+            internal_err = result["internal_error_naive"]
+
+        self.state["elasticity"]["price"] = new_prices
+        self.state["elasticity"]["demand"] = demand_est
+
+        volatility = 0.0 if self._prev_prices is None else \
+            float(np.mean(np.abs((new_prices - self._prev_prices) / (self._prev_prices + 1e-6))))
+        self._prev_prices = new_prices.copy()
+
+        revenue_observed = float(result["revenue_observed"])
+        agent_loss = float(result["agent_loss"])
+        err_mean = float(np.mean(internal_err))
+
+        reward = (revenue_observed
+                 - self.constraints.w_agent_loss * agent_loss
+                 - self.constraints.w_volatility * volatility
+                 - self.constraints.w_estimation_error * err_mean)
+
+        terminated = self.t >= self.constraints.episode_length
+        info = {
+            "t": self.t,
+            "revenue_observed": revenue_observed,
+            "revenue_oracle": float(result["revenue_oracle"]),
+            "agent_loss": agent_loss,
+            "ux_volatility": volatility,
+            "mean_internal_error": err_mean,
+            "look_to_book": float(result["interaction_features"].get("look_to_book", 0.0)),
+            "mean_sale_price": float(result["interaction_features"].get("mean_sale_price", 0.0)),
+            "true_human_purchases_total": float(np.sum(result["true_human_demand"])),
+            "true_agent_purchases_total": float(np.sum(result["true_agent_purchases"])),
+        }
+        return self.state, float(reward), terminated, False, info
+
+
+if __name__ == "__main__":
+    import matplotlib.pyplot as plt
+    from collections import defaultdict
+
+    runs = {}
+    for use_defense in (False, True):
+        env = PHANTOMEnv(use_defense=use_defense)
+        obs, _ = env.reset(seed=42)
+        metrics = defaultdict(list)
+        total_reward = 0.0
+        done = False
+
+        while not done:
+            action = env.action_space.sample()
+            obs, reward, done, _, info = env.step(action)
+            total_reward += reward
+            p_mean = float(np.mean(obs["elasticity"]["price"]))
+            q_mean = float(np.mean(obs["elasticity"]["demand"]))
+            p_std = float(np.std(obs["elasticity"]["price"]))
+
+            metrics['t'].append(info['t'])
+            metrics['price_mean'].append(p_mean)
+            metrics['price_std'].append(p_std)
+            metrics['demand_mean'].append(q_mean)
+            metrics['revenue_observed'].append(info['revenue_observed'])
+            metrics['revenue_oracle'].append(info['revenue_oracle'])
+            metrics['agent_loss'].append(info['agent_loss'])
+            metrics['ux_volatility'].append(info['ux_volatility'])
+            metrics['look_to_book'].append(info['look_to_book'])
+            metrics['reward'].append(reward)
+            metrics['human_purchases'].append(info['true_human_purchases_total'])
+            metrics['agent_purchases'].append(info['true_agent_purchases_total'])
+
+            if info['t'] % 20 == 0 or done:
+                print(f"defense={'ON ' if use_defense else 'OFF'} t={info['t']:03d} p={p_mean:6.2f}±{p_std:4.2f} "
+                      f"q={q_mean:6.2f} rev={info['revenue_observed']:7.2f} oracle={info['revenue_oracle']:7.2f} "
+                      f"loss={info['agent_loss']:6.2f} ux={info['ux_volatility']:.3f} "
+                      f"ltb={info['look_to_book']:5.2f} r={reward:7.2f}")
+
+        runs[use_defense] = metrics
+        print(f"defense={'ON ' if use_defense else 'OFF'} total_reward={total_reward:.2f}\n")
+
+    fig, axes = plt.subplots(3, 3, figsize=(15, 12))
+    fig.suptitle('PHANTOM Environment: Defense OFF vs ON', fontsize=14, fontweight='bold')
+
+    plot_configs = [
+        ('price_mean', 'Mean Price', 'Price'),
+        ('demand_mean', 'Mean Demand Estimate', 'Demand'),
+        ('revenue_observed', 'Revenue (Observed)', 'Revenue'),
+        ('agent_loss', 'Agent Loss (Oracle - Observed)', 'Loss'),
+        ('ux_volatility', 'UX Volatility (Price Change)', 'Volatility'),
+        ('look_to_book', 'Look-to-Book Ratio', 'Ratio'),
+        ('reward', 'Step Reward', 'Reward'),
+        ('human_purchases', 'Human Purchases', 'Count'),
+        ('agent_purchases', 'Agent Purchases', 'Count'),
+    ]
+
+    for idx, (key, title, ylabel) in enumerate(plot_configs):
+        ax = axes[idx // 3, idx % 3]
+        for use_defense, label, color in [(False, 'No Defense', 'red'), (True, 'With Defense', 'blue')]:
+            m = runs[use_defense]
+            ax.plot(m['t'], m[key], label=label, color=color, alpha=0.7, linewidth=1.5)
+        ax.set_xlabel('Step')
+        ax.set_ylabel(ylabel)
+        ax.set_title(title, fontsize=10, fontweight='bold')
+        ax.legend(loc='best', fontsize=8)
+        ax.grid(True, alpha=0.3)
+
+    plt.tight_layout()
+    plt.savefig('phantom_env_comparison.png', dpi=150, bbox_inches='tight')
+    print("Plot saved to phantom_env_comparison.png")
+    plt.show()