From a9d73ccce50a3c769d103cf417cb5f3d7ca38aca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Alves=20R=C3=B6sel?=
 <60182044+velocitatem@users.noreply.github.com>
Date: Tue, 13 Jan 2026 17:07:29 +0100
Subject: [PATCH 01/35] Paper first fillout (#39)

* initial environemnt definitions

* high level defintion

* formlating the reward simply

* improved implementation

* tailored docker compose image for secondary tenaordboard

* preliminary desriptions and babble

* details on formulation and defintion of agent and its loop

* typos one

* more grammar issues

* fluidity improvements and refactors

* more decluttering and dnoising

* finalizing introduction review

* some methodology

* somehow this disappeared

* bit more of this and that

* methodology of how we do architectuer and online DP

* fix: compilation

* expanding on the taxonomy and economic references

* authoer notes

* acks + google GCP

* making space w new format nada lit review

* stronger lit review and more sources

* forgot about tables and graphs

* dedupe citations

* adding cloudflare

* fixing env vars

* updating docs with url

* upating embed

* fixing the url

* paper badge

* formaliztaion of rewards and adding definitions

* noisy formulations

* connecting some more dots here

* adding significant weight in prices

* fixing error

* fixing typos and consistency

* extra math formulations and refferenceot DRO

* fixing diagram of loops

* github mindmap

* fixing erro and thiknig about big picture

* enhancing the website

* goals methodology and gitignore

* some more references and theory links

* talking about some wtp

* feature: added wordcounter

* forcing latex builds and fixining the bib #

* refactor: update Cost of Information equations and notation for clarity

* some more math and refactors

* refactor: unify notation and improve clarity in COI equations

* refactor: generalize master function for demand estimation and pricing strategies

* we dont like math but we have to do it :(

* refactor: enhance Cost of Information framework with additional context and illustration

* refactor: enhance literature review and methodology sections with economic theory insights and system architecture details

* alining format to fit the rubric

* refactoring bibliography

* fix: align

* mdp additionally

* trying different title

* adding balance figure

* agentic givergence, finally

* fix: figure fonts adjusted to match
---
 .github/workflows/latex.yml                 |  48 ++-
 .gitignore                                  |   6 +
 Makefile                                    |  19 +-
 README.md                                   |  84 +++-
 docker-compose.yml                          |  15 +-
 docs/goals/goals.csv                        |  21 +
 docs/index.html                             |  15 +-
 paper/.latexmkrc                            |   4 +-
 paper/concat_code.sh                        |   8 +-
 paper/src/auto/main.el                      |   9 +-
 paper/src/bib/references.bib                | 425 ++++++++++++++++++
 paper/src/chapters/01-intro.tex             |  47 +-
 paper/src/chapters/02-literature-review.tex |  39 +-
 paper/src/chapters/03-methodology.tex       | 275 ++++++++++--
 paper/src/chapters/05-discussion.tex        |  10 +
 paper/src/chapters/06-conclusion.tex        |   2 +-
 paper/src/chapters/balance_figure.tex       |  38 ++
 paper/src/chapters/feature_table.tex        |  65 +++
 paper/src/chapters/loop_figure.tex          | 110 +++++
 paper/src/chapters/mdp_agent.pdf            | Bin 0 -> 10743 bytes
 paper/src/chapters/mdp_human.pdf            | Bin 0 -> 12194 bytes
 paper/src/main.tex                          |  45 +-
 paper/src/preamble.tex                      |  27 +-
 sim/rl/environment.py                       | 451 ++++++++++++++++++++
 24 files changed, 1656 insertions(+), 107 deletions(-)
 create mode 100644 docs/goals/goals.csv
 create mode 100644 paper/src/chapters/balance_figure.tex
 create mode 100644 paper/src/chapters/feature_table.tex
 create mode 100644 paper/src/chapters/loop_figure.tex
 create mode 100644 paper/src/chapters/mdp_agent.pdf
 create mode 100644 paper/src/chapters/mdp_human.pdf
 create mode 100644 sim/rl/environment.py

diff --git a/.github/workflows/latex.yml b/.github/workflows/latex.yml
index 2b40879..a8b5c9f 100644
--- a/.github/workflows/latex.yml
+++ b/.github/workflows/latex.yml
@@ -19,10 +19,56 @@ jobs:
         with:
           root_file: main.tex
           working_directory: paper/src
-          args: -pdf -interaction=nonstopmode -file-line-error -outdir=../build
+          args: -pdf -f -interaction=nonstopmode -file-line-error -outdir=../build
           pre_compile: bash ../concat_code.sh
       - name: Upload PDF
         uses: actions/upload-artifact@v4
         with:
           name: thesis-pdf
           path: paper/build/main.pdf
+
+      - name: Get current date
+        id: date
+        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
+
+      - name: Upload to Cloudflare R2
+        env:
+          AWS_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
+          AWS_ENDPOINT_URL: ${{ secrets.R2_ENDPOINT }}
+          DATE: ${{ steps.date.outputs.date }}
+          BUCKET_NAME: ${{ secrets.R2_BUCKET_NAME }}
+        run: |
+          pip install boto3
+          python3 << 'EOF'
+          import boto3
+          import os
+
+          s3 = boto3.client('s3',
+              endpoint_url=os.environ['AWS_ENDPOINT_URL'],
+              aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],
+              aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY']
+          )
+
+          date = os.environ['DATE']
+          bucket = os.environ['BUCKET_NAME']
+
+          # upload dated version
+          dated_filename = f"thesis-{date}.pdf"
+          s3.upload_file(
+              'paper/build/main.pdf',
+              bucket,
+              dated_filename,
+              ExtraArgs={'ContentType': 'application/pdf'}
+          )
+          print(f"Uploaded {dated_filename}")
+
+          # upload latest version
+          s3.upload_file(
+              'paper/build/main.pdf',
+              bucket,
+              'thesis-latest.pdf',
+              ExtraArgs={'ContentType': 'application/pdf'}
+          )
+          print(f"Uploaded thesis-latest.pdf")
+          EOF
diff --git a/.gitignore b/.gitignore
index 733e405..9db7742 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,12 @@ paper/src/bib/auto
 experiments/airflow/logs/*
 experiments/airflow/logs/scheduler/
 experiments/airflow/logs/dag_processor_manager/
+experiments/collected_data/*
+
+paper/src/auto/*
+lib/
+docs/goals/*.md
+PHANTOM.wiki/
 tests/e2e/node_modules/**
 **/auto/*.el
 *.old
diff --git a/Makefile b/Makefile
index d2d2d7f..0c51bb3 100644
--- a/Makefile
+++ b/Makefile
@@ -22,14 +22,15 @@ $(BUILDDIR):
 pdf.build: $(BUILDDIR)
 	@bash paper/concat_code.sh
 	@cd $(SRCDIR) && \
-	$(LATEXMK) -pdf -jobname=$(JOBNAME) \
+	$(LATEXMK) -pdf -jobname=$(JOBNAME) -f \
 		-interaction=nonstopmode -file-line-error \
+		-r ../.latexmkrc \
 		-outdir=../$(BUILDDIR) $(TEX)
 
 .PHONY: pdf.watch
 pdf.watch: $(BUILDDIR)
 	@cd $(SRCDIR) && \
-	$(LATEXMK) -pvc -pdf -jobname=$(JOBNAME) \
+	$(LATEXMK) -pvc -pdf -jobname=$(JOBNAME) -f \
 		-interaction=nonstopmode -file-line-error \
 		-r ../.latexmkrc \
 		-outdir=../$(BUILDDIR) $(TEX)
@@ -72,6 +73,18 @@ stats.lines:
 	@find . \( -path '*/node_modules' -o -path '*/.venv' -o -path '*/venv' \) -prune -o \
 	\( -name "*.ts" -o -name "*.py" \) -type f -print0 | xargs -0 cat | wc -l
 
+.PHONY wordcount
+wordcount:
+	@echo "Counting words in main text (excluding appendix)..."
+	@texcount -nosub -total -sum -1 \
+		$(SRCDIR)/chapters/01-intro.tex \
+		$(SRCDIR)/chapters/02-literature-review.tex \
+		$(SRCDIR)/chapters/03-methodology.tex \
+		$(SRCDIR)/chapters/04-results.tex \
+		$(SRCDIR)/chapters/05-discussion.tex \
+		$(SRCDIR)/chapters/06-conclusion.tex
+
+
 .PHONY: pdf clean watch run.webapp test count-lines all
 pdf: pdf.build
 clean: pdf.clean
@@ -79,4 +92,4 @@ watch: pdf.watch
 run.webapp: web.dev
 test: test.backend
 count-lines: stats.lines
-all: pdf.build
+all: pdf.build
\ No newline at end of file
diff --git a/README.md b/README.md
index 1126458..17a8c45 100644
--- a/README.md
+++ b/README.md
@@ -3,10 +3,92 @@
 ### PHANTOM
 
 [![Build PDF](https://github.com/velocitatem/PHANTOM/actions/workflows/latex.yml/badge.svg)](https://github.com/velocitatem/PHANTOM/actions/workflows/latex.yml)
+[![Paper](https://img.shields.io/badge/Paper-PDF-red?logo=adobe-acrobat-reader)](https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf)
 [![TPU Research Cloud](https://img.shields.io/badge/TPU%20Research%20Cloud-TRC%20supported-4285F4?logo=googlecloud&logoColor=white)](https://sites.research.google/trc/faq/)
 [![Vercel Deploy](https://deploy-badge.vercel.app/?url=https://phantom-hotel.vercel.app&name=Hotel)](https://phantom-hotel.vercel.app)
 [![Vercel Deploy](https://deploy-badge.vercel.app/?url=https://phantom-airline.vercel.app&name=Airline)](https://phantom-airline.vercel.app)
 
 
 
-
+```mermaid
+mindmap
+  PHANTOM((PHANTOM Project))
+    North Star
+      Study how automated actors change markets 
+      Build an experimentation platform for real-world-like commerce 
+      Two-loop learning system
+        Online observation loop 
+        Offline "defense gym" loop 
+    Core Economic Questions
+      Price Discovery
+        How prices respond to demand signals
+        How signal quality changes with bots/agents
+      Demand & Elasticity
+        Shifts in willingness-to-pay
+        Short-run vs long-run elasticity
+      Market Efficiency & Welfare
+        Consumer surplus vs producer surplus
+        Deadweight loss from frictions/manipulation
+      Price Discrimination & Segmentation
+        Behavioral feature-based segmentation
+        Fairness vs profitability tradeoffs
+      Information Asymmetry
+        Agents amplify search and arbitrage
+        Sellers infer more about buyers; buyers infer more about sellers
+      Strategic Interaction
+        Consumers vs firms vs agents
+        Feedback loops: policy ↔ behavior ↔ price
+      Market Power & Competition
+        Algorithmic pricing as competitive tool
+        Risks: tacit coordination / "algorithmic collusion"
+      Externalities
+        Congestion and attention costs
+        Spillovers: one segment’s behavior affects others’ prices
+    System-Level View
+      Participants
+        Humans
+        Agents (automated buyers/actors) 
+        Firms (pricing decision-makers)
+        Platform (measurement + control layer)
+      Markets Simulated
+        Repeated transactions
+        Limited inventory / capacity constraints (conceptually)
+        Time dynamics (learning over time)
+      Interventions
+        Pricing policies
+        Experiment assignment / randomized exposure
+        Agent behavioral policies (task-driven)
+    Measurement & Causal Inference
+      What is observed
+        Actions (search, click, purchase intent)
+        Context (product attributes, time, exposure)
+        Outcomes (conversion, revenue, churn proxies)
+      Identification strategy
+        A/B tests and randomization
+        Counterfactual baselines
+        Robustness checks (offline replay)
+      Key metrics
+        Revenue / profit proxies
+        Conversion & bounce
+        Price volatility / stability
+        Welfare proxies (e.g., dispersion, access)
+    Risk, Governance, and Ethics
+      Manipulation & Integrity
+        Bot-driven demand distortion
+        Measurement contamination
+      Fairness & Transparency
+        Differential pricing concerns
+        Explainability and auditability
+      Safety Constraints
+        Guardrails on price moves
+        Monitoring for runaway feedback loops
+    Outputs
+      Insights
+        When do agents raise/lower prices via behavior shifts?
+        Which market designs are robust to automation?
+      Defenses
+        Agent-aware pricing policies (robust control)
+        Detection + mitigation strategies (feature-level separability)
+      Platform Value
+        Reusable testbed for market + AI-agent research
+```
diff --git a/docker-compose.yml b/docker-compose.yml
index f572758..f72f415 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,8 +1,17 @@
 services:
-
-  tensorboard:
+  tensorboard-rl:
     image: tensorflow/tensorflow:latest
-    container_name: "PHANTOM-tensorboard"
+    container_name: "PHANTOM-tensorboard-rl"
+    ports:
+      - "6007:6006"
+    volumes:
+      - ./sim/rl/runs:/logs
+    command: tensorboard --logdir=/logs --host=0.0.0.0 --port=6006
+    restart: unless-stopped
+
+  tensorboard-ml:
+    image: tensorflow/tensorflow:latest
+    container_name: "PHANTOM-tensorboard-ml"
     ports:
       - "6006:6006"
     volumes:
diff --git a/docs/goals/goals.csv b/docs/goals/goals.csv
new file mode 100644
index 0000000..b999fc4
--- /dev/null
+++ b/docs/goals/goals.csv
@@ -0,0 +1,21 @@
+store_mode,task_name,task_description,definition_of_done
+airline,The Indecisive Executive (SEA-LAX),"You are traveling SEA to LAX for business. You prefer Business Class for the comfort, but you need to justify the expense to your company. 1) Find the Business Class option and check its price. 2) Compare it against the Economy option on the same route to see how much money you are saving or spending. 3) Spend some time weighing the pros and cons of the ""Flexible"" fare rule vs the standard one. 4) Ultimately, decide that your comfort is worth it and book the Business Class ticket.","Booking for SEA-LAX Business Class is completed."
+airline,The Cross-Country Splurge (LAX-JFK),"You are flying LAX to JFK and want to treat yourself to First Class, but only if it's the right flight. 1) Find the First Class option. 2) thoroughly check the details (duration, arrival time). 3) Compare it with the Business Class option if available, or just look at other departure times to ensure this is the best schedule. 4) After confirming this is the absolute best option, proceed to book First Class.","Booking for LAX-JFK First Class is completed."
+airline,The Budget Student (DFW-ORD),"You are a broke student flying DFW to ORD. You have a budget of roughly $200. 1) Find the cheapest Economy flight. 2) Before booking, frantically check if there are any other flights or if the ""Premium"" economy is somehow cheaper (it won't be, but you should check). 3) Hesitate for a moment to consider if you should just drive instead. 4) Resign yourself to the flight and book the Economy ticket.","Booking for DFW-ORD Economy Class is completed."
+airline,The Quick Hop Commuter (LAX-SFO),"You need to get from LAX to SFO as fast as possible. Price is secondary to speed. 1) Search for flights and identify the one with the shortest duration (1h 30m). 2) Click into the details to verify the arrival time fits your schedule. 3) briefly explore if there's a Business Class upgrade available for this short flight. 4) Decide to stick with Economy since it's such a short trip and book it.","Booking for LAX-SFO is completed."
+airline,The Status Chaser (SFO-SEA),"You are trying to earn airline points and need a ""Premium"" class ticket specifically. 1) Search SFO to SEA. 2) Filter or look for the Premium Economy option. 3) Compare the price gap between Premium and Standard Economy. 4) Browse the details to see if the ""Premium"" fare includes better baggage allowance. 5) Conclude it's worth the points and book the Premium seat.","Booking for SFO-SEA Premium Economy is completed."
+airline,The Family Reunion (MIA-ATL),"You are booking for a family of 4 (2 adults, 2 children) flying MIA to ATL. 1) Search for 4 passengers. 2) You prefer Premium, but if the total is too high, you might settle for Economy. 3) Add Premium to your cart, look at the total, and hesitate. 4) Go back and check the Economy price for 4 people. 5) Decide to treat your family and go back to book the Premium option.","Booking for MIA-ATL (Premium) is completed."
+airline,The Red Eye Skeptic (LAX-JFK),"You need to fly LAX to JFK but hate late arrivals. 1) Search for the flight and check the arrival time of the First Class option. 2) It arrives early morning (02:15), which worries you. 3) Spend some time looking for other flight options on different days to see if there's a better schedule. 4) Realize this is the only direct option that works and proceed to book it despite the time.","Booking for LAX-JFK is completed."
+airline,The Refundable Requirement (ATL-DFW),"Your meeting in Dallas might get cancelled, so you strictly need a ""Refundable"" ticket. 1) Search ATL to DFW. 2) Find the First Class option and verify it lists ""Refundable"". 3) Check the Economy option to see if it is also refundable (it might not be). 4) Weigh the cost difference. 5) Choose the First Class Refundable option for peace of mind.","Booking for ATL-DFW First Class is completed."
+airline,The Hub Connector (ORD-MIA),"You are flying ORD to MIA to catch a cruise. You cannot be late. 1) Search for the flight. 2) Verify the ""stops"" is 0 (Direct). 3) Click into details to check the duration. 4) Worry that 3h 30m might be too long in Economy. 5) Look for a Business class option. 6) Decide to save money for the cruise and book Economy.","Booking for ORD-MIA Economy is completed."
+airline,The West Coast Hopper (SEA-LAX Business),"You fly this route often and usually pay around $700. 1) Search SEA to LAX. 2) Find the Business Class ticket. 3) Check if the price is near your usual $720 or if it's surged. 4) If it looks expensive, browse other dates to compare. 5) Return to your original desired date and book the Business Class seat.","Booking for SEA-LAX Business is completed."
+hotel,The Honeymoon Suite (Presidential),"It is your honeymoon. You want the best room available, specifically one with a ""jacuzzi"". 1) Search for a room for 2 people. 2) Identify the ""Presidential Suite"". 3) Click details to confirm the amenities include a jacuzzi. 4) Browse the ""Executive Suite"" just to see what you are upgrading from. 5) Go back to the Presidential Suite, confirm it's the one you want, and book it.","Booking for the Presidential Suite is completed."
+hotel,The Digital Nomad (Executive),"You are working remotely and strictly need a ""workspace"". 1) Search for a room. 2) Check the ""Executive Suite"" details for a workspace. 3) Check the ""Deluxe Room"" to see if it also has a workspace and is cheaper. 4) Compare the images (if available) or amenity lists of both. 5) Decide the Executive Suite looks more comfortable for a week of work and book it.","Booking for the Executive Suite is completed."
+hotel,The Safety First (Superior),"You are traveling with valuables and need a ""safe"" in the room. 1) Search for a room. 2) Look at the ""Standard Room"" amenities. Does it have a safe? 3) Look at the ""Superior Room"". Verify it has a safe. 4) Compare the price difference. Is safety worth the extra cost? 5) Decide it is, and book the Superior Room.","Booking for the Superior Room is completed."
+hotel,The Bachelor Party (Max Occupancy),"You are booking for 4 guys. You want everyone in one room if possible. 1) Search for 4 adults. 2) Find the room that fits 4 people (Presidential). 3) It looks expensive. Go back and search for 2 adults to see the price of a ""Standard Room"". 4) Calculate if booking two Standard Rooms is cheaper than one Presidential. 5) Decide it's too much hassle to manage two bookings and book the Presidential Suite.","Booking for the Presidential Suite is completed."
+hotel,The Budget Refundable (Junior),"You want a cheap room but your dates might change, so it MUST be refundable. 1) Search for a room. 2) Sort by price or find the cheapest options. 3) Check the ""Standard"" and ""Superior"" rooms. Notice they are likely Non-Refundable. 4) Find the ""Junior Suite"" which is Refundable. 5) Grumble about the price difference but book the Junior Suite because you need the flexibility.","Booking for the Junior Suite is completed."
+hotel,The View Hunter (Executive),"You want a room with a ""city_view"" or balcony. 1) Search for a room. 2) Check the amenities of the ""Deluxe Room"". 3) Check the amenities of the ""Executive Suite"". 4) Compare the prices. 5) Decide to treat yourself to the Executive Suite for the better view/balcony and book it.","Booking for the Executive Suite is completed."
+hotel,The Just-A-Bed (Standard),"You just need a place to crash. Lowest price wins. 1) Search for a room. 2) Identify the absolute cheapest option (Standard Room). 3) Click details just to make sure it has ""wifi"". 4) Briefly glance at the ""Superior Room"" to see if the upgrade is <$10. 5) If not, go back and book the Standard Room immediately.","Booking for the Standard Room is completed."
+hotel,The Family Vacation (Deluxe),"You are traveling with a child. You need a room that isn't too cramped but not a suite. 1) Search for 2 adults, 1 child. 2) Look at the ""Deluxe Room"". 3) Check the amenities for ""coffee_maker"" (parents need coffee). 4) Compare it with the ""Junior Suite"". 5) Decide the Deluxe Room is sufficient value and book it.","Booking for the Deluxe Room is completed."
+hotel,The Long Stay (Junior),"You are staying for 7 nights. You want something nicer than a standard room but affordable. 1) Search for a room. 2) Look at the ""Junior Suite"". 3) Check the amenities for a ""mini_fridge"" or similar. 4) Compare the total cost for 7 nights against your budget. 5) Hesitate and look at the ""Standard Room"" price. 6) Decide the extra space of the Junior Suite is worth it for a long stay and book it.","Booking for the Junior Suite is completed."
+hotel,The Last Minute Panic (Superior),"It's late and you need a room for tonight. 1) Search for a room for 1 person. 2) You recognize the ""Superior Room"" brand. 3) Click it. 4) Quickly verify check-in times or details. 5) Don't overthink it—book the Superior Room as fast as possible.","Booking for the Superior Room is completed."
diff --git a/docs/index.html b/docs/index.html
index f190154..a3f587b 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -47,7 +47,7 @@
   <meta name="citation_author" content="Rösel, Daniel">
   <meta name="citation_publication_date" content="2025">
   <meta name="citation_conference_title" content="IE University Bachelor's Thesis">
-  <meta name="citation_pdf_url" content="TODO">
+  <meta name="citation_pdf_url" content="https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf">
 
   <!-- Additional SEO -->
   <meta name="theme-color" content="#2563eb">
@@ -233,14 +233,13 @@
 
                   <div class="is-size-5 publication-authors">
                     <span class="author-block">IE University<br>Bachelor's Thesis 2025</span>
-                    <span class="eql-cntrb"><small><br>Advisor: <a href="SECOND AUTHOR PERSONAL LINK" target="_blank">Alberto Martín Izquierdo</a></small></span>
+                    <span class="eql-cntrb"><small><br>Advisor: Alberto Martín Izquierdo</small></span>
                   </div>
 
                   <div class="column has-text-centered">
                     <div class="publication-links">
-                         <!-- TODO: Update with your arXiv paper ID -->
                       <span class="link-block">
-                        <a href="https://arxiv.org/pdf/<ARXIV PAPER ID>.pdf" target="_blank"
+                        <a href="https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf" target="_blank"
                         class="external-link button is-normal is-rounded is-dark">
                         <span class="icon">
                           <i class="fas fa-file-pdf"></i>
@@ -315,7 +314,10 @@
         <h2 class="title is-3">Abstract</h2>
         <div class="content has-text-justified">
           <p>
-            The primary objective of this thesis is to develop and validate pricing heuristics that protect e-commerce platforms from systematic exploitation by Large Language Model (LLM) agents within dynamic pricing environments. As AI agents increasingly mediate consumer transactions, they enable users to circumvent the Cost of Information (the price premium accumulated through demand signal expression) by conducting reconnaissance in isolated sessions before executing purchases through clean sessions at base prices. This research will make an anticipatory contribution by adapting recommendation system methodologies to distinguish between genuine human browsing behaviour and agent-orchestrated information gathering, thereby enabling pricing systems to maintain margin integrity without degrading the user experience for legitimate customers or getting rid of leads generated by LLMs.
+            This research establishes the following contributions: definition and formalization of non-human transactors in e-commerce platforms, development of a testing-ground for capturing the behavioral essence of these transactors across a large variety of digital systems, construction of a discriminative model to prove separability as a strong learner for downstream mitigation of contamination by non-human entities, translation of such learned separability into existing dynamic pricing machine learning loops, and establishment of a high-level KPI-affecting causal effect and cost-saving framework for the future of internet commerce in the presence of such non-human learners.
+          </p>
+          <p>
+            This work develops behavioral signature models using recommendation system techniques to profile session-level interaction, temporal engagement, and cross-session correlation. The AI Agent market is forecasted to grow from around USD 5-8 billion in 2025 to USD 42-52 billion by 2030, raising the question of how these systems should be designed for future robustness and how to maintain a competitive edge in the analytical components of e-commerce platforms.
           </p>
         </div>
       </div>
@@ -433,8 +435,7 @@
     <div class="container">
       <h2 class="title">Poster</h2>
 
-      <!-- TODO: Replace with your poster PDF -->
-      <iframe  src="static/pdfs/sample.pdf" width="100%" height="550">
+      <iframe  src="https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf" width="100%" height="550">
           </iframe>
 
       </div>
diff --git a/paper/.latexmkrc b/paper/.latexmkrc
index d614114..38c50d1 100644
--- a/paper/.latexmkrc
+++ b/paper/.latexmkrc
@@ -1,8 +1,6 @@
 $pdf_mode = 1;
 $pdflatex = 'pdflatex -synctex=1 -interaction=nonstopmode -file-line-error %O %S';
-$aux_dir = 'build';
-$out_dir = 'build';
-$use_biber = 0;                        # force bibtex
+$bibtex_use = 2;                       # run bibtex when needed
 $bibtex   = 'bibtex %O %B';
 $pdf_previewer = 'zathura %O %S';
 $clean_ext = 'synctex.gz bbl bcf run.xml fls fdb_latexmk glg glo gls ist blg lof lot out toc';
diff --git a/paper/concat_code.sh b/paper/concat_code.sh
index 3ff905d..7de4bb3 100755
--- a/paper/concat_code.sh
+++ b/paper/concat_code.sh
@@ -43,22 +43,22 @@ EOF
 echo "Concatenating code from source directories..."
 
 # Backend
-find "$PROJECT_ROOT/backend" -type f \( -name "*.py" -o -name "*.js" -o -name "*.sh" -o -name "*.yml" -o -name "*.yaml" \) | sort | while read -r file; do
+find "$PROJECT_ROOT/backend" -type d \( -name ".venv" -o -name "__pycache__" -o -name "*.egg-info" -o -name "node_modules" -o -name ".pytest_cache" \) -prune -o -type f \( -name "*.py" -o -name "*.js" -o -name "*.sh" -o -name "*.yml" -o -name "*.yaml" \) ! -name "*.pyc" ! -name "*.pyo" -print | sort | while read -r file; do
     add_file "$file"
 done
 
 # Experiments
-find "$PROJECT_ROOT/experiments" -type f \( -name "*.py" -o -name "*.js" -o -name "*.sh" -o -name "*.yml" -o -name "*.yaml" \) | sort | while read -r file; do
+find "$PROJECT_ROOT/experiments" -type d \( -name ".venv" -o -name "__pycache__" -o -name "*.egg-info" -o -name "node_modules" -o -name ".pytest_cache" -o -name ".ipynb_checkpoints" \) -prune -o -type f \( -name "*.py" -o -name "*.js" -o -name "*.sh" -o -name "*.yml" -o -name "*.yaml" \) ! -name "*.pyc" ! -name "*.pyo" -print | sort | while read -r file; do
     add_file "$file"
 done
 
 # Docker
-find "$PROJECT_ROOT/docker" -type f \( -name "*.py" -o -name "*.sh" -o -name "*.yml" -o -name "*.yaml" -o -name "Dockerfile*" \) | sort | while read -r file; do
+find "$PROJECT_ROOT/docker" -type d \( -name ".venv" -o -name "__pycache__" -o -name "node_modules" \) -prune -o -type f \( -name "*.py" -o -name "*.sh" -o -name "*.yml" -o -name "*.yaml" -o -name "Dockerfile*" \) ! -name "*.pyc" ! -name "*.pyo" -print | sort | while read -r file; do
     add_file "$file"
 done
 
 # Web/src
-find "$PROJECT_ROOT/web/src" -type f \( -name "*.js" -o -name "*.jsx" -o -name "*.ts" -o -name "*.tsx" \) | sort | while read -r file; do
+find "$PROJECT_ROOT/web/src" -type d \( -name "node_modules" -o -name ".next" -o -name "dist" -o -name "build" \) -prune -o -type f \( -name "*.js" -o -name "*.jsx" -o -name "*.ts" -o -name "*.tsx" \) -print | sort | while read -r file; do
     add_file "$file"
 done
 
diff --git a/paper/src/auto/main.el b/paper/src/auto/main.el
index 86386e4..6738458 100644
--- a/paper/src/auto/main.el
+++ b/paper/src/auto/main.el
@@ -6,7 +6,7 @@
    (setq TeX-command-extra-options
          "-file-line-error -interaction=nonstopmode")
    (TeX-add-to-alist 'LaTeX-provided-class-options
-                     '(("report" "12pt") ("article" "12pt") ("acmart" "sigconf" "nonacm" "natbib=false")))
+                     '(("report" "12pt") ("acmart" "sigconf" "nonacm" "natbib=false" "manuscript") ("article" "12pt" "letterpaper")))
    (TeX-run-style-hooks
     "latex2e"
     "preamble"
@@ -17,8 +17,7 @@
     "chapters/05-discussion"
     "chapters/06-conclusion"
     "../build/concatenated_code"
-    "acmart"
-    "acmart10")
-   (TeX-add-symbols
-    '("footnotetextcopyrightpermission" 1)))
+    "article"
+    "art12"))
  :latex)
+
diff --git a/paper/src/bib/references.bib b/paper/src/bib/references.bib
index e69de29..1130453 100644
--- a/paper/src/bib/references.bib
+++ b/paper/src/bib/references.bib
@@ -0,0 +1,425 @@
+
+@article{arnoud_v_den_boer_dynamic_2015,
+	title = {Dynamic pricing and learning: {Historical} origins, current research, and new directions},
+	volume = {20},
+	url = {https://www.sciencedirect.com/science/article/pii/S1876735415000021},
+	doi = {10.1016/j.sorms.2015.03.001},
+	number = {1},
+	journal = {Surveys in Operations Research and Management Science},
+	author = {{Arnoud V. den Boer}},
+	month = jun,
+	year = {2015},
+	pages = {1--18},
+	file = {PDF:/home/velocitatem/Zotero/storage/NUAGDYER/memo2025.pdf:application/pdf},
+}
+
+@article{iliou_detection_2021,
+	title = {Detection of {Advanced} {Web} {Bots} by {Combining} {Web} {Logs} with {Mouse} {Behavioural} {Biometrics}},
+	volume = {2},
+	url = {https://dl.acm.org/doi/10.1145/3447815},
+	doi = {10.1145/3447815},
+	number = {3},
+	journal = {Digital Threats: Research and Practice},
+	author = {Iliou, Christos and Kostoulas, Theodoros and Tsikrika, Theodora and Katos, Vasilis and Vrochidis, Stefanos and Kompatsiaris, Ioannis},
+	year = {2021},
+	pages = {1--26},
+	file = {PDF:/home/velocitatem/Zotero/storage/Q7J5EBEJ/3447815.pdf:application/pdf},
+}
+
+@phdthesis{salassa_politecnico_nodate,
+	title = {Politecnico di {Torino} {Algorithmic} {Pricing} in the digital age "{Ethical} considerations on its economic and social implications, and an analysis of possible solutions to overcome its critical issues" {Tutor}: {Candidate}},
+	abstract = {Algorithmic pricing is an emerging business practice that uses computational algorithms to determine
+the prices of products and services based on a number of dynamic factors. The aim of this thesis is to
+draw attention to the existence of these business practices, and the ethical and social implications that
+derive from them, and then focus on what could be effective solutions to increase the well-being of
+the community.
+In Chapter 2 of the thesis, a general introduction to the topic will be made, starting from its history
+and its evolution over the years; Chapter 3 will examine the different types of pricing algorithms.
+Subsequently, in Chapter 4 we will analyze the sectors in which they are most applicable, and the
+relative advantages and disadvantages they bring with them, with a critical analysis of the trade-offs
+generated. The effect of algorithmic pricing on competition will be studied, considering how the
+ability of algorithms to adapt quickly to market conditions can foster anti-competitive practices, such
+as price discrimination. Later, in Chapter 5, we will look at the issue of price transparency and how
+the opacity of algorithms can make it difficult for consumers to understand the pricing process and
+assess whether they are receiving fair treatment.
+To address these ethical issues, several possible solutions will be brought to light, described in
+Chapter 6, which will focus on the role of the government, as a regulatory, of the end consumer, who
+must be encouraged to educate and inform himself about the use of these practices, and of the
+company, as responsible for making its customers aware and acting in compliance with government
+laws, for fair and non-discriminatory use.},
+	urldate = {2025-11-12},
+	school = {Politecnico di Torino},
+	author = {Salassa, Fabio and Pautassi, Paolo},
+	file = {PDF:/home/velocitatem/Zotero/storage/L95WYQ8B/m-api-06aad998-d926-0d59-5593-82fdce5a678b.pdf:application/pdf},
+}
+
+@inproceedings{mueller_low-rank_2019,
+	title = {Low-{Rank} {Bandit} {Methods} for {High}-{Dimensional} {Dynamic} {Pricing}},
+	booktitle = {Advances in {Neural} {Information} {Processing} {Systems} 32 ({NeurIPS} 2019)},
+	author = {Mueller, Jonas W and Syrgkanis, Vasilis and Taddy, Matt},
+	year = {2019},
+	pages = {15442--15452},
+	file = {PDF:/home/velocitatem/Zotero/storage/IZD3C5SR/m-api-26f6207c-cc89-4aed-29b6-34629f18fe9b.pdf:application/pdf},
+}
+
+@article{shahidi_coasean_nodate,
+	title = {The {Coasean} {Singularity}? {Demand}, {Supply}, and {Market} {Design} with {AI} {Agents}},
+	abstract = {AI agents—autonomous systems that perceive, reason, and act on behalf of human principals—are poised to transform digital markets by dramatically reducing transaction costs. This chapter evaluates the economic implications of this transition, adopting a consumeroriented view of agents as market participants that can search, negotiate, and transact directly. From the demand side, agent adoption reflects derived demand: users trade off decision quality against effort reduction, with outcomes mediated by agent capability and task context. On the supply side, firms will design, integrate, and monetize agents, with outcomes hinging on whether agents operate within or across platforms. At the market level, agents create efficiency gains from lower search, communication, and contracting costs, but also introduce frictions such as congestion and price obfuscation. By lowering the costs of preference elicitation, contract enforcement, and identity verification, agents expand the feasible set of market designs but also raise novel regulatory challenges. While the net welfare effects remain an empirical question, the rapid onset of AI-mediated transactions presents a unique opportunity for economic research to inform real-world policy and market design.},
+	language = {en},
+	author = {Shahidi, Peyman and Rusak, Gili and Manning, Benjamin S and Fradkin, Andrey and Horton, John J},
+	file = {PDF:/home/velocitatem/Zotero/storage/TQCAPJDP/Shahidi et al. - The Coasean Singularity Demand, Supply, and Market Design with AI Agents.pdf:application/pdf},
+}
+
+@misc{byrnes_intro_2025,
+	title = {Intro to {Brain}-{Like}-{AGI} {Safety}},
+	url = {https://osf.io/fe36n_v1},
+	doi = {10.31219/osf.io/fe36n_v1},
+	abstract = {Suppose we someday build an Artificial General Intelligence (AGI) algorithm using similar principles of learning and cognition as the human brain. How would we use such an algorithm safely? I argue that this is an open technical problem, and my goal is to bring readers with no prior knowledge all the way up to the front-line of unsolved problems. Chapter 1 has background and motivation; Chapters 2-7 are on neuroscience, arguing for a picture of the brain that combines large-scale learning algorithms (e.g. in the cortex) and specific evolved reflexes (e.g. in the hypothalamus and brainstem); and Chapters 8-15 apply those neuroscience ideas to AGI safety. A major theme is the idea that the brain has something like a reinforcement learning reward function, which says that pain is bad, eating-when-hungry is good, etc. I argue that this reward function is centered around the hypothalamus and brainstem, and that all human desires—even "higher" desires for things like compassion and justice—come directly or indirectly from that innate reward function. If future programmers build brain-like AGI, they will likewise have a reward function slot in their source code, in which they can put whatever they want. If they put the wrong thing, the resulting AGI will wind up callously indifferent to human welfare. How might they avoid that? That's an open technical problem, but I will review some ideas and research directions.},
+	language = {en},
+	urldate = {2025-12-31},
+	publisher = {Open Science Framework},
+	author = {Byrnes, Steven J.},
+	month = mar,
+	year = {2025},
+	file = {PDF:/home/velocitatem/Zotero/storage/ZLJQ4DQ9/Byrnes - 2025 - Intro to Brain-Like-AGI Safety.pdf:application/pdf},
+}
+
+@article{shannon_mathematical_nodate,
+	title = {A {Mathematical} {Theory} of {Communication}},
+	language = {en},
+	author = {Shannon, C E},
+	file = {PDF:/home/velocitatem/Zotero/storage/FJRFRWK2/Shannon - A Mathematical Theory of Communication.pdf:application/pdf},
+}
+
+@misc{noauthor_order_stats_nodate,
+	title = {order\_stats},
+	file = {PDF:/home/velocitatem/Zotero/storage/D3QRGY9Z/order_stats.pdf:application/pdf},
+}
+
+@article{devine_nonlinear_nodate,
+	title = {Nonlinear {Pricing} with {Costly} {Information} {Acquisition}},
+	abstract = {This paper examines a nonlinear pricing model where the ﬁrm can choose to acquire costly information prior to oﬀering contract menus to consumers; such as paying a consultant or investing in machine learning technologies. Information provides the ﬁrm with a signal about consumers types, whose accuracy increases as the ﬁrm acquires larger amounts of information. We show that the ﬁrm chooses to acquire information, only if it can purchase a suﬃcient amount that could alter its initial prior beliefs. Relative to standard settings where ﬁrms cannot acquire information, we identify how information acquisition changes optimal contract oﬀers, equilibrium proﬁts, information rents, and welfare. A better-informed ﬁrm increases its expected proﬁts, but it can also increase expected utility when the cost of information is intermediate. Our results recommend balanced online privacy laws.},
+	language = {en},
+	author = {Devine, Brett R and Munoz-Garcia, Felix},
+	file = {PDF:/home/velocitatem/Zotero/storage/GQ28KVBF/Devine and Munoz-Garcia - Nonlinear Pricing with Costly Information Acquisition.pdf:application/pdf},
+}
+
+@misc{wang_learning_2025,
+	title = {Learning {Optimal} {Distributionally} {Robust} {Stochastic} {Control} in {Continuous} {State} {Spaces}},
+	url = {http://arxiv.org/abs/2406.11281},
+	doi = {10.48550/arXiv.2406.11281},
+	abstract = {We study data-driven learning of robust stochastic control for infinite-horizon systems with potentially continuous state and action spaces. In many managerial settings–supply chains, finance, manufacturing, services, and dynamic games–the state-transition mechanism is determined by system design, while available data capture the distributional properties of the stochastic inputs from the environment. For modeling and computational tractability, a decision maker often adopts a Markov control model with i.i.d. environment inputs, which can render learned policies fragile to internal dependence or external perturbations. We introduce a distributionally robust stochastic control paradigm that promotes policy reliability by introducing adaptive adversarial perturbations to the environment input, while preserving the modeling, statistical, and computational tractability of the Markovian formulation. From a modeling perspective, we examine two adversary models–current-action-aware and current-action-unaware–leading to distinct dynamic behaviors and robust optimal policies. From a statistical learning perspective, we characterize optimal finite-sample minimax rates for uniform learning of the robust value function across a continuum of states under ambiguity sets defined by the fk-divergence and Wasserstein distance. To efficiently compute the optimal robust policies, we further propose algorithms inspired by deep reinforcement learning methodologies. Finally, we demonstrate the applicability of the framework to real managerial problems.},
+	language = {en},
+	urldate = {2025-12-29},
+	publisher = {arXiv},
+	author = {Wang, Shengbo and Meng, Jason and Si, Nian and Blanchet, Jose and Zhou, Zhengyuan},
+	month = nov,
+	year = {2025},
+	note = {arXiv:2406.11281 [stat]},
+	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
+	file = {PDF:/home/velocitatem/Zotero/storage/RQ8XDSSG/Wang et al. - 2025 - Learning Optimal Distributionally Robust Stochastic Control in Continuous State Spaces.pdf:application/pdf},
+}
+
+@misc{ie_recsim_2019,
+	title = {{RecSim}: {A} {Configurable} {Simulation} {Platform} for {Recommender} {Systems}},
+	shorttitle = {{RecSim}},
+	url = {http://arxiv.org/abs/1909.04847},
+	doi = {10.48550/arXiv.1909.04847},
+	abstract = {We propose RecSim, a configurable platform for authoring simulation environments for recommender systems (RSs) that naturally supports sequential interaction with users. RecSim allows the creation of new environments that reflect particular aspects of user behavior and item structure at a level of abstraction well-suited to pushing the limits of current reinforcement learning (RL) and RS techniques in sequential interactive recommendation problems. Environments can be easily configured that vary assumptions about: user preferences and item familiarity; user latent state and its dynamics; and choice models and other user response behavior. We outline how RecSim offers value to RL and RS researchers and practitioners, and how it can serve as a vehicle for academic-industrial collaboration.},
+	urldate = {2025-12-29},
+	publisher = {arXiv},
+	author = {Ie, Eugene and Hsu, Chih-wei and Mladenov, Martin and Jain, Vihan and Narvekar, Sanmit and Wang, Jing and Wu, Rui and Boutilier, Craig},
+	month = sep,
+	year = {2019},
+	note = {arXiv:1909.04847 [cs]},
+	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning, Computer Science - Human-Computer Interaction, Computer Science - Information Retrieval},
+	file = {Preprint PDF:/home/velocitatem/Zotero/storage/CJJI2VQF/Ie et al. - 2019 - RecSim A Configurable Simulation Platform for Recommender Systems.pdf:application/pdf;Snapshot:/home/velocitatem/Zotero/storage/8XJKJTHE/1909.html:text/html},
+}
+
+@misc{kuhn_wasserstein_2024,
+	title = {Wasserstein {Distributionally} {Robust} {Optimization}: {Theory} and {Applications} in {Machine} {Learning}},
+	shorttitle = {Wasserstein {Distributionally} {Robust} {Optimization}},
+	url = {http://arxiv.org/abs/1908.08729},
+	doi = {10.48550/arXiv.1908.08729},
+	abstract = {Many decision problems in science, engineering and economics are affected by uncertain parameters whose distribution is only indirectly observable through samples. The goal of data-driven decision-making is to learn a decision from finitely many training samples that will perform well on unseen test samples. This learning task is difficult even if all training and test samples are drawn from the same distribution—especially if the dimension of the uncertainty is large relative to the training sample size. Wasserstein distributionally robust optimization seeks data-driven decisions that perform well under the most adverse distribution within a certain Wasserstein distance from a nominal distribution constructed from the training samples. In this tutorial we will argue that this approach has many conceptual and computational benefits. Most prominently, the optimal decisions can often be computed by solving tractable convex optimization problems, and they enjoy rigorous out-of-sample and asymptotic consistency guarantees. We will also show that Wasserstein distributionally robust optimization has interesting ramifications for statistical learning and motivates new approaches for fundamental learning tasks such as classification, regression, maximum likelihood estimation or minimum mean square error estimation, among others.},
+	language = {en},
+	urldate = {2025-12-27},
+	publisher = {arXiv},
+	author = {Kuhn, Daniel and Esfahani, Peyman Mohajerin and Nguyen, Viet Anh and Shafieezadeh-Abadeh, Soroosh},
+	month = nov,
+	year = {2024},
+	note = {arXiv:1908.08729 [stat]},
+	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning, Mathematics - Optimization and Control},
+	file = {PDF:/home/velocitatem/Zotero/storage/FAWJEK6J/Kuhn et al. - 2024 - Wasserstein Distributionally Robust Optimization Theory and Applications in Machine Learning.pdf:application/pdf},
+}
+
+@misc{arunachaleswaran_learning_2025,
+	title = {Learning to {Play} {Against} {Unknown} {Opponents}},
+	url = {http://arxiv.org/abs/2412.18297},
+	doi = {10.48550/arXiv.2412.18297},
+	abstract = {We consider the problem of a learning agent who has to repeatedly play a general sum game against a strategic opponent who acts to maximize their own payoﬀ by optimally responding against the learner’s algorithm. The learning agent knows their own payoﬀ function, but is uncertain about the payoﬀ of their opponent (knowing only that it is drawn from some distribution D). What learning algorithm should the agent run in order to maximize their own total utility, either in expectation or in the worst-case over D? When the learning algorithm is constrained to be a no-regret algorithm, we demonstrate how to eﬃciently construct an optimal learning algorithm (asymptotically achieving the optimal utility) in polynomial time for both the in-expectation and worst-case problems, independent of any other assumptions. When the learning algorithm is not constrained to no-regret, we show how to construct an ε-optimal learning algorithm (obtaining average utility within ε of the optimal utility) for both the in-expectation and worst-case problems in time polynomial in the size of the input and 1/ε, when either the size of the game or the support of D is constant. Finally, for the special case of the maximin objective, where the learner wishes to maximize their minimum payoﬀ over all possible optimizer types, we construct a learner algorithm that runs in polynomial time in each step and guarantees convergence to the optimal learner payoﬀ. All of these results make use of recently developed machinery that converts the analysis of learning algorithms to the study of the class of corresponding geometric objects known as menus.},
+	language = {en},
+	urldate = {2025-12-27},
+	publisher = {arXiv},
+	author = {Arunachaleswaran, Eshwar Ram and Collina, Natalie and Schneider, Jon},
+	month = feb,
+	year = {2025},
+	note = {arXiv:2412.18297 [cs]},
+	keywords = {Computer Science - Machine Learning, Computer Science - Computer Science and Game Theory},
+	file = {PDF:/home/velocitatem/Zotero/storage/M6V9LLCS/Arunachaleswaran et al. - 2025 - Learning to Play Against Unknown Opponents.pdf:application/pdf},
+}
+
+@misc{li_distributionally_2025,
+	title = {Distributionally {Robust} {Optimization} with {Adversarial} {Data} {Contamination}},
+	url = {http://arxiv.org/abs/2507.10718},
+	doi = {10.48550/arXiv.2507.10718},
+	abstract = {Distributionally Robust Optimization (DRO) provides a framework for decision-making under distributional uncertainty, yet its effectiveness can be compromised by outliers in the training data. This paper introduces a principled approach to simultaneously address both challenges. We focus on optimizing Wasserstein-1 DRO objectives for generalized linear models with convex Lipschitz loss functions, where an \$ε\$-fraction of the training data is adversarially corrupted. Our primary contribution lies in a novel modeling framework that integrates robustness against training data contamination with robustness against distributional shifts, alongside an efficient algorithm inspired by robust statistics to solve the resulting optimization problem. We prove that our method achieves an estimation error of \$O({\textbackslash}sqrtε)\$ for the true DRO objective value using only the contaminated data under the bounded covariance assumption. This work establishes the first rigorous guarantees, supported by efficient computation, for learning under the dual challenges of data contamination and distributional shifts.},
+	language = {en},
+	urldate = {2025-12-27},
+	publisher = {arXiv},
+	author = {Li, Shuyao and Diakonikolas, Ilias and Diakonikolas, Jelena},
+	month = nov,
+	year = {2025},
+	note = {arXiv:2507.10718 [cs]},
+	keywords = {Computer Science - Machine Learning, Mathematics - Optimization and Control, Computer Science - Data Structures and Algorithms},
+	file = {PDF:/home/velocitatem/Zotero/storage/H6AXDTLX/Li et al. - 2025 - Distributionally Robust Optimization with Adversarial Data Contamination.pdf:application/pdf},
+}
+
+@misc{karten_llm_2025,
+	title = {{LLM} {Economist}: {Large} {Population} {Models} and {Mechanism} {Design} in {Multi}-{Agent} {Generative} {Simulacra}},
+	shorttitle = {{LLM} {Economist}},
+	url = {http://arxiv.org/abs/2507.15815},
+	doi = {10.48550/arXiv.2507.15815},
+	abstract = {We present the LLM Economist, a novel framework that uses agent-based modeling to design and assess economic policies in strategic environments with hierarchical decision-making. At the lower level, bounded rational worker agents—instantiated as persona-conditioned prompts sampled from U.S. Census-calibrated income and demographic statistics—choose labor supply to maximize text-based utility functions learned in-context. At the upper level, a planner agent employs in-context reinforcement learning to propose piecewise-linear marginal tax schedules anchored to the current U.S. federal brackets. This construction endows economic simulacra with three capabilities requisite for credible fiscal experimentation: (i) optimization of heterogeneous utilities, (ii) principled generation of large, demographically realistic agent populations, and (iii) mechanism design—the ultimate nudging problem—expressed entirely in natural language. Experiments with populations of up to one hundred interacting agents show that the planner converges near Stackelberg equilibria that improve aggregate social welfare relative to Saez solutions, while a periodic, persona-level voting procedure furthers these gains under decentralized governance. These results demonstrate that large language model-based agents can jointly model, simulate, and govern complex economic systems, providing a tractable test bed for policy evaluation at the societal scale to help build better civilizations.},
+	language = {en},
+	urldate = {2025-12-27},
+	publisher = {arXiv},
+	author = {Karten, Seth and Li, Wenzhe and Ding, Zihan and Kleiner, Samuel and Bai, Yu and Jin, Chi},
+	month = jul,
+	year = {2025},
+	note = {arXiv:2507.15815 [cs]},
+	keywords = {Computer Science - Machine Learning, Computer Science - Multiagent Systems},
+	file = {PDF:/home/velocitatem/Zotero/storage/U7A5Q78V/Karten et al. - 2025 - LLM Economist Large Population Models and Mechanism Design in Multi-Agent Generative Simulacra.pdf:application/pdf},
+}
+
+@techreport{mullapudi_reinforcement_nodate,
+	title = {A {Reinforcement} {Learning} {Approach} to {Dynamic} {Pricing}},
+	abstract = {Dynamic pricing represents a critical strategic challenge in modern e-commerce, where firms must navigate fluctuating demand, inventory constraints, and aggressive competitor actions. Traditional static and heuristic-based pricing models often fail to capture the complex, non-linear dynamics of competitive digital markets, leading to suboptimal profitability. This paper proposes a model-free reinforcement learning (RL) framework to address this challenge. Specifically, we design, implement, and evaluate a Q-learning agent capable of learning an optimal, state-dependent pricing policy. The agent is trained and evaluated within a simulated market environment constructed from the publicly available "Retail Price Optimization" dataset from Kaggle, which provides a rich feature set including historical sales, product characteristics, seasonality, and, crucially, competitor pricing data. The problem is formulated as a Markov Decision Process (MDP), where the agent's state incorporates its price position relative to competitors, competitor price trends, and seasonal factors. The agent's performance is benchmarked against three baseline strategies: static pricing, a reactive "follow-the-leader" heuristic, and random pricing. The results demonstrate that the Q-learning agent achieves a substantial increase in total cumulative profit over the evaluation period, outperforming all baselines by learning a nuanced policy that strategically balances price adjustments in response to market conditions. This work provides a practical and reproducible blueprint for applying reinforcement learning to optimize pricing decisions in a simulated yet realistic competitive retail environment, highlighting the potential of RL to automate complex strategic decision-making.},
+	author = {Mullapudi, Pavan},
+	note = {Publication Title: International Journal on Science and Technology (IJSAT) IJSAT25049558
+Volume: 16
+Issue: 4},
+	keywords = {Index Terms: Dynamic Pricing, Markov Decision Process, Price Optimization, Q-Learning, Reinforcement Learning, Retail Analytics},
+	file = {PDF:/home/velocitatem/Zotero/storage/G95TBLF7/9558.pdf:application/pdf},
+}
+
+@techreport{roughgarden_cs364a_2013,
+	title = {{CS364A}: {Algorithmic} {Game} {Theory} {Lecture} \#5: {Revenue}-{Maximizing} {Auctions} *},
+	author = {Roughgarden, Tim},
+	year = {2013},
+	file = {PDF:/home/velocitatem/Zotero/storage/C39VM7N9/l5.pdf:application/pdf},
+}
+
+@techreport{kuhn_distributionally_2025,
+	title = {Distributionally {Robust} {Optimization}},
+	abstract = {Distributionally robust optimization (DRO) studies decision problems under uncertainty where the probability distribution governing the uncertain problem parameters is itself uncertain. A key component of any DRO model is its ambiguity set, that is, a family of probability distributions consistent with any available structural or statistical information. DRO seeks decisions that perform best under the worst distribution in the ambiguity set. This worst case criterion is supported by findings in psychology and neuroscience, which indicate that many decision-makers have a low tolerance for distributional ambiguity. DRO is rooted in statistics, operations research and control theory, and recent research has uncovered its deep connections to regularization techniques and adversarial training in machine learning. This survey presents the key findings of the field in a unified and self-contained manner.},
+	author = {Kuhn, Daniel and Shafiee, Soroosh and Wiesemann, Wolfram},
+	year = {2025},
+	note = {arXiv: 2411.02549v3},
+	file = {PDF:/home/velocitatem/Zotero/storage/IXTTMD7G/full-text.pdf:application/pdf},
+}
+
+@article{parkes_economic_2015,
+	title = {Economic reasoning and artificial intelligence},
+	volume = {349},
+	issn = {10959203},
+	doi = {10.1126/science.aaa8403},
+	abstract = {The field of artificial intelligence (AI) strives to build rational agents capable of perceiving the world around them and taking actions to advance specified goals. Put another way, AI researchers aim to construct a synthetic homo economicus, the mythical perfectly rational agent of neoclassical economics.We review progress toward creating this new species of machine, machina economicus, and discuss some challenges in designing AIs that can reason effectively in economic contexts. Supposing that AI succeeds in this quest, or at least comes close enough that it is useful to think about AIs in rationalistic terms, we ask how to design the rules of interaction in multi-agent systems that come to represent an economy of AIs.Theories of normative design from economics may prove more relevant for artificial agents than human agents, with AIs that better respect idealized assumptions of rationality than people, interacting through novel rules and incentive systems quite distinct from those tailored for people.},
+	number = {6245},
+	journal = {Science},
+	author = {Parkes, David C. and Wellman, Michael P.},
+	month = jul,
+	year = {2015},
+	pmid = {26185245},
+	note = {Publisher: American Association for the Advancement of Science},
+	pages = {267--272},
+	file = {PDF:/home/velocitatem/Zotero/storage/27KLNFRU/_aiEcon.pdf:application/pdf},
+}
+
+@article{yokoo_effect_2004,
+	title = {The effect of false-name bids in combinatorial auctions: {New} fraud in internet auctions},
+	volume = {46},
+	issn = {08998256},
+	doi = {10.1016/S0899-8256(03)00045-9},
+	abstract = {We examine the effect of false-name bids on combinatorial auction protocols. False-name bids are bids submitted by a single bidder using multiple identifiers such as multiple e-mail addresses. The obtained results are summarized as follows: (1) the Vickrey-Clarke-Groves (VCG) mechanism, which is strategy-proof and Pareto efficient when there exists no false-name bid, is not false-name-proof; (2) there exists no false-name-proof combinatorial auction protocol that satisfies Pareto efficiency; (3) one sufficient condition where the VCG mechanism is false-name-proof is identified, i.e., the concavity of a surplus function over bidders. © 2003 Elsevier Inc. All rights reserved.},
+	number = {1},
+	journal = {Games and Economic Behavior},
+	author = {Yokoo, Makoto and Sakurai, Yuko and Matsubara, Shigeo},
+	year = {2004},
+	note = {Publisher: Academic Press Inc.},
+	keywords = {Auction, Mechanism design, Strategy-proof},
+	pages = {174--188},
+	file = {PDF:/home/velocitatem/Zotero/storage/LUVQV6WT/Yokoo04.pdf:application/pdf},
+}
+
+@inproceedings{feldman_free-riding_2004,
+	title = {Free-riding and whitewashing in peer-to-peer systems},
+	isbn = {1-58113-942-X},
+	doi = {10.1145/1016527.1016539},
+	abstract = {We develop a model to study the phenomenon of free-riding in peer-to-peer (P2P) systems. At the heart of our model is a user of a certain type, an intrinsic and private parameter that reflects the user's willingness to contribute resources to the system. A user decides whether to contribute or free-ride based on how the current contribution cost in the system compares to her type. When the societal generosity (i.e., the average type) is low, intervention is required in order to sustain the system. We present the effect of mechanisms that exclude low type users or, more realistic, penalize free-riders with degraded service. We also consider dynamic scenarios with arrivals and departures of users, and with whitewashers: users who leave the system and rejoin with new identities to avoid reputational penalties. We find that when penalty is imposed on all newcomers in order to avoid whitewashing, system performance degrades significantly only when the turnover rate among users is high.},
+	booktitle = {Proceedings of the {ACM} {SIGCOMM} 2004 {Workshops}},
+	publisher = {Association for Computing Machinery},
+	author = {Feldman, Michal and Papadimitriou, Christos and Chuang, John and Stoica, Ion},
+	year = {2004},
+	keywords = {Cheap pseudonyms, Cooperation, Equilibrium, Exclusion, Free-riding, Identity cost, Incentives, Peer-to-peer, Whitewashing},
+	pages = {228--235},
+	file = {PDF:/home/velocitatem/Zotero/storage/K32WH6SB/1016527.1016539.pdf:application/pdf},
+}
+
+@article{calvano_artificial_2018,
+	title = {Artificial {Intelligence}, {Algorithmic} {Pricing} and {Collusion}},
+	url = {https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3304991},
+	doi = {10.2139/ssrn.3304991},
+	journal = {SSRN Electronic Journal},
+	author = {Calvano, Emilio and Calzolari, Giacomo and Denicolo, Vincenzo and Pastorello, Sergio},
+	year = {2018},
+	file = {PDF:/home/velocitatem/Zotero/storage/WYTSSZBR/ssrn-3304991.pdf:application/pdf},
+}
+
+@techreport{varian_economic_1995,
+	title = {Economic {Mechanism} {Design} for {Computerized} {Agents}},
+	abstract = {The eeld of economic mechanism design has been an active area of research in economics for at least 20 years. This eld uses the tools of economics and game theory to design {\textbackslash}rules of interaction" for economic transactions that will, in principle , yield some desired outcome. In this paper I provide an overview of this subject for an audience interested in applications to electronic commerce and discuss some special problems that arise in this context.},
+	author = {Varian, Hal R},
+	year = {1995},
+	file = {PDF:/home/velocitatem/Zotero/storage/S8635QX6/varian95a.pdf:application/pdf},
+}
+
+@book{russell_artificial_nodate,
+	title = {Artificial {Intelligence} {A} {Modern} {Approach} {Fourth} {Edition} {Global} {Edition}},
+	isbn = {978-1-292-40117-1},
+	author = {Russell, Stuart and Norvig, Peter},
+	file = {PDF:/home/velocitatem/Zotero/storage/6B8W8S27/efdd4d1d4c2087fe1cbe03d9ced67f34.pdf:application/pdf},
+}
+
+@techreport{wellman_price_2004,
+	title = {Price {Prediction} in a {Trading} {Agent} {Competition} {Yevgeniy} {Vorobeychik}},
+	abstract = {The 2002 Trading Agent Competition (TAC) presented a challenging market game in the domain of travel shopping. One of the pivotal issues in this domain is uncertainty about hotel prices, which have a significant influence on the relative cost of alternative trip schedules. Thus, virtually all participants employ some method for predicting hotel prices. We survey approaches employed in the tournament, finding that agents apply an interesting diversity of techniques, taking into account differing sources of evidence bearing on prices. Based on data provided by entrants on their agents' actual predictions in the TAC-02 finals and semifinals, we analyze the relative efficacy of these approaches. The results show that taking into account game-specific information about flight prices is a major distinguishing factor. Machine learning methods effectively induce the relationship between flight and hotel prices from game data, and a purely analytical approach based on competitive equilibrium analysis achieves equal accuracy with no historical data. Employing a new measure of prediction quality, we relate absolute accuracy to bottom-line performance in the game.},
+	author = {Wellman, Michael P and Reeves, Daniel M and Lochner, Kevin M and Edu, Yvorobey@umich},
+	year = {2004},
+	note = {Publication Title: Journal of Artificial Intelligence Research
+Volume: 21},
+	pages = {19--36},
+	file = {PDF:/home/velocitatem/Zotero/storage/N9JNXFJW/live-1333-2265-jair.pdf:application/pdf},
+}
+
+@techreport{shoham_multiagent_nodate,
+	title = {Multiagent {Systems}: {Algorithmic}, {Game}-{Theoretic}, and {Logical} {Foundations}},
+	url = {http://www.masfoundations.org.},
+	author = {Shoham, Yoav and Leyton-Brown, Kevin},
+	keywords = {algorithms, auctions, communication, competition, cooperation, distributed problem solving, game theory, learning, logic, mechanism design, social choice},
+	file = {PDF:/home/velocitatem/Zotero/storage/QZVYS7V9/shoham09a.pdf:application/pdf},
+}
+
+@article{xia_evaluation-driven_2025,
+	title = {Evaluation-{Driven} {Development} and {Operations} of {LLM} {Agents}: {A} {Process} {Model} and {Reference} {Architecture}},
+	url = {http://arxiv.org/abs/2411.13768},
+	abstract = {Large Language Models (LLMs) have enabled the emergence of LLM agents, systems capable of pursuing under-specified goals and adapting after deployment. Evaluating such agents is challenging because their behavior is open ended, probabilistic, and shaped by system-level interactions over time. Traditional evaluation methods, built around fixed benchmarks and static test suites, fail to capture emergent behaviors or support continuous adaptation across the lifecycle. To ground a more systematic approach, we conduct a multivocal literature review (MLR) synthesizing academic and industrial evaluation practices. The findings directly inform two empirically derived artifacts: a process model and a reference architecture that embed evaluation as a continuous, governing function rather than a terminal checkpoint. Together they constitute the evaluation-driven development and operations (EDDOps) approach, which unifies offline (development-time) and online (runtime) evaluation within a closed feedback loop. By making evaluation evidence drive both runtime adaptation and governed redevelopment, EDDOps supports safer, more traceable evolution of LLM agents aligned with changing objectives, user needs, and governance constraints.},
+	author = {Xia, Boming and Lu, Qinghua and Zhu, Liming and Xing, Zhenchang and Zhao, Dehai and Zhang, Hao},
+	month = nov,
+	year = {2025},
+	note = {arXiv: 2411.13768},
+	file = {PDF:/home/velocitatem/Zotero/storage/H8IS64AW/2411.13768v2.pdf:application/pdf},
+}
+
+@techreport{xie_osworld_nodate,
+	title = {{OSWORLD}: {Benchmarking} {Multimodal} {Agents} for {Open}-{Ended} {Tasks} in {Real} {Computer} {Environments}},
+	url = {https://os-world.github.io},
+	abstract = {Autonomous agents that accomplish complex computer tasks with minimal human interventions have the potential to transform human-computer interaction, significantly enhancing accessibility and productivity. However, existing benchmarks either lack an interactive environment or are limited to environments specific to certain applications or domains, failing to reflect the diverse and complex nature of real-world computer use, thereby limiting the scope of tasks and agent scalability. To address this issue, we introduce OSWORLD, the first-of-its-kind scalable, real computer environment for multimodal agents, supporting task setup, execution-based evaluation, and interactive learning across various operating systems such as Ubuntu, Windows, and macOS. OSWORLD can serve as a unified, integrated computer environment for assessing open-ended computer tasks that involve arbitrary applications. Building upon OSWORLD, we create a benchmark of 369 computer tasks involving real web and desktop apps in open domains, OS file I/O, and workflows spanning multiple applications. Each task example is derived from real-world computer use cases and includes a detailed initial state setup configuration and a custom execution-based evaluation script for reliable, reproducible evaluation. Extensive evaluation of state-of-the-art LLM/VLM-based agents on OSWORLD reveals significant deficiencies in their ability to serve as computer assistants. While humans can accomplish over 72.36\% of the tasks, the best model achieves only 12.24\% success, primarily struggling with GUI grounding and operational knowledge. Comprehensive analysis using OSWORLD provides valuable insights for developing multimodal generalist agents that were not possible with previous benchmarks. Our code, environment, baseline models, and data are publicly available at https://os-world.github.io.},
+	author = {Xie, Tianbao and Zhang, Danyang and Chen, Jixuan and Li, Xiaochuan and Zhao, Siheng and Cao, Ruisheng and Jing Hua, Toh and Cheng, Zhoujun and Shin, Dongchan and Lei, Fangyu and Liu, Yitao and Xu, Yiheng and Zhou, Shuyan and Savarese, Silvio and Xiong, Caiming and Zhong, Victor and Yu, Tao},
+	note = {arXiv: 2404.07972v2},
+	file = {PDF:/home/velocitatem/Zotero/storage/LLRKXIC7/full-text.pdf:application/pdf},
+}
+
+@techreport{imperva_rapid_2025,
+	title = {The {Rapid} {Rise} of {Bots} and the {Unseen} {Risk} for {Business} \#{2025BADBOTREPORT}},
+	author = {{Imperva}},
+	year = {2025},
+	file = {PDF:/home/velocitatem/Zotero/storage/AWR9IQRD/2025-Bad-Bot-Report.pdf:application/pdf},
+}
+
+@article{perez-ricardo_exploring_2025,
+	title = {Exploring booking intentions through price elasticity of demand in tourism accommodations using large-scale data analytics},
+	volume = {31},
+	issn = {24448834},
+	doi = {10.1016/j.iedeen.2025.100271},
+	abstract = {The study aims to explore tourists' booking intentions by analyzing the price elasticity of demand in tourist accommodations. This analysis should reveal how changes in price affect booking behavior across different customer segments, using online booking records. A dataset was compiled from 106 hotels in Malaga, Spain, comprising 27,910 online bookings sourced exclusively from hotel websites. To understand the price elasticity of demand, a simple log-log regression was applied, segmenting the data based on key revenue-related variables. Subsequently, a cluster segmentation was performed using the Elbow method and K-means algorithm to identify distinct market segments. The findings highlighted that Family Travelers and Short Stay Travelers segments exhibited elastic demand, indicating higher sensitivity to price fluctuations. In contrast, Early Bookers and Mid-Season Long Stayers demonstrated inelastic demand, with lower responsiveness to changes in tourist accommodation prices. The number of variables analyzed in this study, along with the cluster analysis, represent a novelty and contribute to the existing literature on market segmentation and price elasticity of demand. This integration enriches both fields of research, offering mutual benefits and deeper insights that enhance the understanding of booking intention and pricing strategies.},
+	number = {1},
+	urldate = {2025-11-28},
+	journal = {European Research on Management and Business Economics},
+	author = {Pérez-Ricardo, Elizabeth del Carmen and García-Mestanza, Josefa},
+	month = jan,
+	year = {2025},
+	note = {Publisher: European Academy of Management and Business Economics},
+	keywords = {Booking intention, Price elasticity, Tourist segmentation},
+	file = {PDF:/home/velocitatem/Zotero/storage/QNXZJLRM/S2444883425000038.pdf:application/pdf},
+}
+
+@misc{ghaffary_amazon_nodate,
+	title = {Amazon {Sues} to {Stop} {Perplexity} {From} {Using} {AI} {Tool} to {Buy} {Stuff}},
+	url = {https://www.bloomberg.com/news/articles/2025-11-04/amazon-demands-perplexity-stop-ai-agent-from-making-purchases},
+	author = {Ghaffary, Shirin and Day, Matt},
+	file = {PDF:/home/velocitatem/Zotero/storage/IQL6FPWE/Amazon Sues to Stop Perplexity From Using AI Tool to Buy Stuff - Bloomberg.pdf:application/pdf},
+}
+
+@techreport{besbes_dynamic_nodate,
+	title = {Dynamic {Pricing} {Without} {Knowing} the {Demand} {Function}: {Risk} {Bounds} and {Near}-{Optimal} {Algorithms} *},
+	abstract = {We consider a single product revenue management problem where, given an initial inventory, the objective is to dynamically adjust prices over a finite sales horizon to maximize expected revenues. Realized demand is observed over time, but the underlying functional relationship between price and mean demand rate that governs these observations (otherwise known as the demand function or demand curve), is not known. We consider two instances of this problem: i.) a setting where the demand function is assumed to belong to a known parametric family with unknown parameter values; and ii.) a setting where the demand function is assumed to belong to a broad class of functions that need not admit any parametric representation. In each case we develop policies that learn the demand function "on the fly," and optimize prices based on that. The performance of these algorithms is measured in terms of the regret: the revenue loss relative to the maximal revenues that can be extracted when the demand function is known prior to the start of the selling season. We derive lower bounds on the regret that hold for any admissible pricing policy, and then show that our proposed algorithms achieve a regret that is "close" to this lower bound. The magnitude of the regret can be interpreted as the economic value of prior knowledge on the demand function; manifested as the revenue loss due to model uncertainty.},
+	author = {Besbes, Omar and Zeevi, Assaf},
+	note = {Publication Title: Operations Research},
+	keywords = {learning, asymptotic analysis, estimation, exploration-exploitation, pricing, Revenue management, value of information},
+	file = {PDF:/home/velocitatem/Zotero/storage/SBAIB4V2/Dp_wo_demand_risk_ob_az_posted.pdf:application/pdf},
+}
+
+@techreport{markntel_advisors_global_2025,
+	address = {Noida, Uttar Pradesh, India},
+	title = {Global {AI} {Agent} {Market} {Research} {Report}: {Forecast} (2026–2032)},
+	url = {https://www.marknteladvisors.com/research-library/ai-agent-market.html},
+	urldate = {2025-12-12},
+	institution = {MarkNtel Advisors},
+	author = {{MarkNtel Advisors}},
+	year = {2025},
+}
+
+@article{amjad_censored_2017,
+	title = {Censored {Demand} {Estimation} in {Retail}},
+	volume = {1},
+	url = {https://par.nsf.gov/servlets/purl/10066022},
+	doi = {10.1145/3154489},
+	abstract = {In this paper, the question of interest is estimating true demand of a product at a given store location and time period in the retail environment based on a single noisy and potentially censored observation. To address this question, we introduce a \%non-parametric framework to make inference from multiple time series. Somewhat surprisingly, we establish that the algorithm introduced for the purpose of "matrix completion" can be used to solve the relevant inference problem. Specifically, using the Universal Singular Value Thresholding (USVT) algorithm [7], we show that our estimator is consistent: the average mean squared error of the estimated average demand with respect to the true average demand goes to 0 as the number of store locations and time intervals increase to \${\textbackslash}infty\$. We establish naturally appealing properties of the resulting estimator both analytically as well as through a sequence of instructive simulations. Using a real dataset in retail (Walmart), we argue for the practical relevance of our approach.},
+	number = {2},
+	urldate = {2025-11-12},
+	journal = {Proceedings of the ACM on Measurement and Analysis of Computing Systems},
+	author = {Amjad, Muhammad J. and Shah, Devavrat},
+	month = dec,
+	year = {2017},
+	note = {Publisher: Association for Computing Machinery (ACM)},
+	pages = {1--28},
+	file = {PDF:/home/velocitatem/Zotero/storage/5ZYADDT4/10066022.pdf:application/pdf},
+}
+
+@misc{ganie_uncertainty_2025,
+	title = {Uncertainty in {Authorship}: {Why} {Perfect} {AI} {Detection} {Is} {Mathematically} {Impossible}},
+	shorttitle = {Uncertainty in {Authorship}},
+	url = {http://arxiv.org/abs/2509.11915},
+	doi = {10.48550/arXiv.2509.11915},
+	abstract = {As large language models (LLMs) become more advanced, it is increasingly difficult to distinguish between human-written and AI-generated text. This paper draws a conceptual parallel between quantum uncertainty and the limits of authorship detection in natural language. We argue that there is a fundamental trade-off: the more confidently one tries to identify whether a text was written by a human or an AI, the more one risks disrupting the text's natural flow and authenticity. This mirrors the tension between precision and disturbance found in quantum systems. We explore how current detection methods--such as stylometry, watermarking, and neural classifiers--face inherent limitations. Enhancing detection accuracy often leads to changes in the AI's output, making other features less reliable. In effect, the very act of trying to detect AI authorship introduces uncertainty elsewhere in the text. Our analysis shows that when AI-generated text closely mimics human writing, perfect detection becomes not just technologically difficult but theoretically impossible. We address counterarguments and discuss the broader implications for authorship, ethics, and policy. Ultimately, we suggest that the challenge of AI-text detection is not just a matter of better tools--it reflects a deeper, unavoidable tension in the nature of language itself.},
+	language = {en},
+	urldate = {2026-01-05},
+	publisher = {arXiv},
+	author = {Ganie, Aadil Gani},
+	month = sep,
+	year = {2025},
+	note = {arXiv:2509.11915 [cs]},
+	keywords = {Computer Science - Computation and Language},
+	file = {PDF:/home/velocitatem/Zotero/storage/3Z2XK4QC/Ganie - 2025 - Uncertainty in Authorship Why Perfect AI Detection Is Mathematically Impossible.pdf:application/pdf},
+}
diff --git a/paper/src/chapters/01-intro.tex b/paper/src/chapters/01-intro.tex
index 23fa1a6..b40e3fc 100644
--- a/paper/src/chapters/01-intro.tex
+++ b/paper/src/chapters/01-intro.tex
@@ -8,9 +8,50 @@
 
 \section{Introduction}
 
-Research Objectives and Contribution: What are we making, why and who should care?
+In this paper we present an exploration and defense against the presence of new commercial entities in digitally powered platforms, preserving market equilibrium in the age of AI. This research establishes the following contributions: definition and formalization of non-human transactors in e-commerce platforms, development of a testing-ground for capturing the behavioral essence of these transactors across a large variety of digital systems, construction of a discriminative model (to prove separability) as a strong learner for downstream mitigation of contamination by non-human entities, translation of such learned separability into existing dynamic pricing machine learning loops, and finally establishment of a high-level KPI-affecting causal effect and cost-saving framework for the future of internet commerce in the presence of such non-human learners.
+
+This research effort touches a large variety of domains, spanning behavioral economics for understanding the rationality of behavior as theorized by the concept of homo economicus, agent-based modeling to translate our learned separability into disjoint dynamic pricing systems, reinforcement learning which serves as the SOTA for price-learners, and dynamic pricing and market equilibrium theory to understand the risks of possible supra-competitive pricing phenomena in cases of adversarial pricing systems driving the market out of equilibrium.
 
 \subsection{Motivation and Market Context}
-Current market dynamics and trends of dynamic pricing and AI agents. Future projections of AI agents. Key stakeholders that are discussing this and reporting on it (Thales). Who is most affected
+
+The current innovation boom in generative artificial intelligence and its applications to knowledge-based work tasks has brought many competing technologies for browser-use automation, with benchmarks and evaluations \cite{xia_evaluation-driven_2025} motivating the development of capabilities focused on commercial research, understanding, and transaction execution \cite{xie_osworld_nodate}. The ``AI Agent'' market is forecasted to grow from around USD 5-8 billion in 2025 to USD 42-52 billion by 2030. This surge reflects adoption in e-commerce, customer service, and enterprise automation, where agents handle interactions previously done by humans, raising the question of how these systems should be designed for future robustness as well as how to maintain a competitive edge in the analytical components of e-commerce platforms \cite{markntel_advisors_global_2025}.
+
+The key stakeholders affected by the threat of increasing agent-driven traffic include online businesses and platform operators (especially in bot-heavy sectors like retail, travel, and financial services), their security, fraud, and engineering teams, end users whose accounts and data are exposed and whose experience degrades, regulators and legal stakeholders responding to breaches and fraud, and the attackers or bot operators driving the automation \cite{imperva_rapid_2025}.
+
+The industry has already seen legal action in cases like Amazon against Perplexity \cite{ghaffary_amazon_nodate}, stemming from the difficulty of identifying traffic from hybrid systems like the Commet browser. This paper explores such systems to better understand what the interaction data looks like and what it means for dynamic pricing and recommendation systems downstream. This observed impact indicates a need for prevention of secondary negative effects on the ``legacy'' systems which power modern revenue sources for many companies. Dynamic pricing algorithms rely on directly translating demand features $q$ to new price assignments $\hat{p}$ across a catalogue of products of size $N$. This opens opportunities to design a \textit{tabula rasa} of digital market mechanisms that will shape the future of commerce in the age of artificial intelligence.
+
 \subsection{Solution Space Overview}
-Different approaches and perspectives, here also add a preview of what will be developed and explored in the lit review.
+Dynamic pricing systems, as presented in \cite{mueller_low-rank_2019}, often deal with sparse low-rank data of demand signals which, combined with contamination from agents, creates complex interactions that impact pricing. To further complicate the problem, certain commercial settings such as the one presented in \cite{amjad_censored_2017} must address the true demand of products under censored observations. This provides a formulation for handling demand in our case with multiple kinds of commercial mediators: $\hat{q} \gets q_A + q_H$ where $q_A$ represents the distribution of demand generated by agentic mediators and $q_H$ represents that of true human demand, these are two distinct populations with divergent objective functions.
+
+We formally define interaction data as coming from some actor which can either be an agent ($A$) or human ($H$). For purposes of this research, an agent is an algorithmic loop with the ability to access a web platform and perform actions such as clicks, scrolls, and input field fills. The loop terminates when the internal large language model judges the provided task definition as complete. A detailed breakdown can be found in \cref{algagent-loop}.
+
+
+\begin{algorithm}[t]
+\DontPrintSemicolon
+
+\SetKwInOut{Input}{Input}
+\SetKwInOut{Output}{Output}
+
+\Input{Goal $G$, Platform URL $u$, LLM $\mathcal{M}$}
+\Output{Task completion result $r$}
+
+Initialize browser instance $\mathcal{B}$ with connection to $u$\;
+Construct prompt $\pi \gets \textsc{BuildPrompt}(G, u)$\;
+$\text{done} \gets \text{False}$\;
+
+\While{$\neg \text{done}$}{
+    Observe current page state $s_t$ from $\mathcal{B}$\;
+    Query $\mathcal{M}$ with $(\pi, s_t)$ to determine next action $a_t \in \{\text{click}, \text{scroll}, \text{fill}, \text{navigate}\}$\;
+    Execute $a_t$ on $\mathcal{B}$ to transition to state $s_{t+1}$\;
+    $\text{done} \gets \mathcal{M}.\textsc{JudgeCompletion}(G, s_{t+1})$\;
+}
+
+Extract final result $r$ from terminal state\;
+\Return{$r$}\;
+
+\caption{AI Agent's Interaction Loop}
+\label{algagent-loop}
+\end{algorithm}
+
+
+The previously described goal of separability allows us to formulate a task which entails taking raw interaction data for either actor and creating a composite demand estimate $\hat{q}$. We propose a robust optimization objective defined in our methodology, transforming the pricing problem into a form of Distributionally Robust Optimization \cite{kuhn_distributionally_2025} where the learner must guard against adversarial contamination in observed demand distributors. In this setting we must learn to make decision that perform under the assumption of not having a single estimated probability distribution but under an ambiguity set of any distribution, of which we have limited information. In our case as stated is a mixture of distributions with a parameter which is unknown and non-stationary.
diff --git a/paper/src/chapters/02-literature-review.tex b/paper/src/chapters/02-literature-review.tex
index 6395206..566d03f 100644
--- a/paper/src/chapters/02-literature-review.tex
+++ b/paper/src/chapters/02-literature-review.tex
@@ -1,15 +1,44 @@
 \section{Literature Review}
 
-\subsection{Foundational Concepts}
+To better understand all wedges of the work, we must start by exploring the nature of agents and agentic computer use and web automation, complementing that with economic reasoning and strategic interaction. The final surface to cover, leads us to data-driven dynamic pricing under uncertainty. The key technical risk is not ``agents buying things'' per se, but agents shaping the behavioral and demand signals that downstream pricing systems consume and depend on. The introduction of these mediating actor entities into economic systems, is further creating a threat of false-name bidding \cite{yokoo_effect_2004}, which prior research has explored in a trading context. Other research on pseudonyms in dynamic systems, demonstrate whitewashing in AI agents which can ignore defensive mechanisms by re-entry with different identities \cite{feldman_free-riding_2004}. Dynamic pricing assumes demand proxies are behaviorally meaningful, while bot detection aims at security and access control. The missing bridge is a principled framework for separating non-human reconnaissance from genuine human demand expression and integrating that separation into pricing heuristics without degrading legitimate user experience (in our research tracked by the user-experience index). This gap, is what our contribution aims to address, particularly for the aforementioned stakeholder groups.
+
+\subsection{Agent Taxonomy and Definitions}
+
+An agent in the context of artificial intelligence is generally defined by anything that can reason and act upon observations of its environments (collected through some sensory inputs) and carry out actions through effectors. Moreover, a rational agent is an entity that is capable of perceiving the world around them and taking actions to advance specified goals. This definition by \cite{russell_artificial_nodate} is further developed in an economic context by \cite{parkes_economic_2015}, suggesting AI research attempts to construct a synthetic \textit{homo economicus}, which may also be termed \textit{machina economicus}.
+A specific class or taxon of this \textit{machina economicus}, the Large Language Model (LLM) agent, is defined as an autonomous system capable of achieving goals and adapting post-training, often without needing explicit code or fundamental model changes. \cite{xia_evaluation-driven_2025}
+
+We must however acknowledge the current SOTA as presented by OSWORLD simulations in \cite{xie_osworld_nodate} have demonstrated that multi-modal tasks across desktop and web interaction modes, have a top-performing score of only 12.24\% success, whereas humans have a higher 72\% success rate. This weakness matters for this research because it clarifies the near-term threat model: practical exploitation does not require a fully competent ``computer assistant'', only enough automation to perform high-volume reconnaissance actions (search/filter/open product pages, probe availability/price boundaries) that can contaminate behavioral signals. With the expected growth of these capabilities, this threat only becomes more perilous to revenue management systems.
+
+We model an agent session as producing some events with lower in-session conversion levels relative to humans, this we state in our assumption that $P(\text{purchase} \vert A) \ll P(\text{purchase} \vert H)$ but with a potentially higher volatility in $\hat{q}$, which we observe through the look-to-book metrics in our simulation.
+
+\subsection{Economic Agents: From Homo Economicus to Machina Economicus}
+
+Existing behavioral economic models tend to be criticized for the assumption of rational behavior, as is embodied in the term of homo economicus. The definition of a machina economicus by \cite{parkes_economic_2015} is quite appropriate for our case, particularly because these assumptions of rationality have been argued to be a very adequate reference for AI research by \cite{varian_economic_1995}. For modeling this behavior, the trajectories of these agents can be formally defined to be partially observable Markov decision processes. \cite{xie_osworld_nodate} Agents are however not to be confused with web-bots which have previously been known as automated software applications or scrapers which are set with a purpose of carrying out specific tasks on the internet, without a higher level of internal judgement. \cite{imperva_rapid_2025} In our research, we refer to this actor simply as an Agent belonging to the distribution $A$.
+
+This economic framing also helps separate two related but distinct phenomena of agents as buyers (changing market demand composition), and agents as information gatherers (changing the observed interactions used by pricing/recommendation systems). The thesis focuses on the second, where information acquisition strategically precedes purchase execution. We do not however dismiss the proposed expectation that existing economic systems serving humans, will not be populated by AIs across multiple channels and with various possibly misaligned goals as stated by \cite{parkes_economic_2015}.
 
-What is the taxonomy and definition of an agent and an actor in this case, a bit more about interaction models in sessions and about dynamic pricing algorithms.
 
 \subsection{Problem Evidence and Market Impact}
-Documented instances of agent-driven market disruptions - Quantitative evidence of pricing manipulation - Case studies from affected industries
 
-\subsection{Theoretical Foundations: Economic Prallels}
+The statistical issue of contamination in dynamic pricing systems that observe demand features as a means to update prices has been documented in various previous contexts. The airline industry (which has accounted for 24\% of observed disruptions) has seen malicious activity with a measureable impact on skewing key performance indicators by behavior visible in the look-to-book metrics. Excessive reconnaissance traffic inflates search volume without corresponding completed bookings, thereby skewing demand forecasts and disrupting dynamic pricing models. Demand proxies have also been observed to cause significant threat to inventory management by creating artificial scarcity that distorts the demand-supply relationships in the enterprise model. Censored demand as shown in \cite{amjad_censored_2017} can also be observed in low-bias demand under-estimation caused by a distortion effect coming from non-human traffic data. \cite{imperva_rapid_2025}
+
+When dynamic pricing algorithms operate on highly contaminated or noisy data, the risk grows significantly in creating inaccurate price inferences. The emergent mitigation driven by un-informed reward and regret signals might lead to price suppression for sales continuity which results in harming margins and resulting in a revenue loss. System that poorly fit undesired behavior might result in price gouging, which calls for strong guardrails while preserving targeted business strategy. \cite{mullapudi_reinforcement_nodate}
+
+
+%Documented instances of agent-driven market disruptions - Quantitative evidence of pricing manipulation - Case studies from affected industries
+
+\subsection{Theoretical Foundations: Economic Parallels}
+
+
+
+Early hints of exploration of prices in a standard English auction explored in \cite{varian_economic_1995} which hints at exploration of prices in a sequential manner, which leads to a marginally different cost to the bidder than the reservation price of the seller. This is a setting in which there is no cost incured by the buyer for their actions or exploring prices in the market. They propose that any agent responsable for the pricing of a good must be imune to dynamic strategies which might extract private information from a market. A key take-away which relates to the Vickery auction mechanism (also called a \textit{direct mechanism}) suggests that not only would defenses against such exploitation be necessary, but the construction of a mechanism in which revelation of the true willingness to pay is the dominant strategy for commerce.
+
+Like in classical revenue-maximizing auctions \cite{roughgarden_cs364a_2013} we assume that the human actor in our system has a private valuation $v$ which we formally draw from later defined distributions. The important note here is that the agent proxy does not have a mechanism to convey this private information into the demand data which directly impacts the pricing systems.
+
+% Economic foundations: relating the problem to options pricing theory. Cost of Information (COI) concept and its relevance
+
+% Link Coasean Singularity and other economic market theory and highlight specific information of supra competitive pricing.
 
-Economic foundations: relating the problem to options pricing theory. Cost of Information (COI) concept and its relevance
 
 \subsection{Landscape of Existing Work}
 
diff --git a/paper/src/chapters/03-methodology.tex b/paper/src/chapters/03-methodology.tex
index dd21186..7b4d3f4 100644
--- a/paper/src/chapters/03-methodology.tex
+++ b/paper/src/chapters/03-methodology.tex
@@ -1,68 +1,251 @@
 \section{Methodology}
 
+This section details the theoretical and practical framework developed to address dynamic pricing under the influence of non-human actors. We begin by formalizing the problem environment and the nature of the actors. We then derive the \textit{Cost of Information} (COI) theorem, proving the erosion of pricing power in the limit of agent saturation. Following this, we outline our generative contamination strategy using GOFAI-driven separability and transition probability learning. Finally, we formulate the robust control problem as a Stackelberg game solved via Distributionally Robust Reinforcement Learning (DR-RL) with constructed ambiguity sets.
 
 \subsection{Problem Formalization}
 
-Mathematical formalization of agent-induced pricing distortions. Formal definition of potential loss mechanisms $\alpha D$
+We define a commercial environment where the platform interacts with a stream of sessions. Let $\mathcal{S}$ denote the set of all sessions. Each session $s \in \mathcal{S}$ is generated by an actor belonging to a latent class $Y_s \in \{H, A\}$, where $H$ denotes Human and $A$ denotes Agent.
 
-We consider a business across time during which we have an evolving vector $p_t \in \Re^N$ where $N$ is the number of products in our catalogue. our price vector is directly dependent on a demand function $q_t$ which we define as a linear method of a price elasticity matrix $B_t$. This is the same setup that Microsoft created in their research.
+Each session produces a trajectory of observable events $\tau_s = (e_{s,1}, \ldots, e_{s,L_s})$. An event $e_{s,k}$ is a tuple defined as:
+\begin{equation}
+e_{s,k} = (a_{s,k}, i_{s,k}, t_{s,k})
+\end{equation}
+where:
+\begin{itemize}
+    \item $a_{s,k} \in \mathcal{A}$ is the action taken (e.g., \texttt{view\_item}, \texttt{add\_to\_cart}).
+    \item $i_{s,k} \in \{1, \ldots, N\}$ is the target item index.
+    \item $t_{s,k} \in \mathbb{R}_+$ is the continuous timestamp.
+\end{itemize}
 
-We gether interaction data from users interacting with a sample platform simulating a hotel/airline which generates interaction distributions $I_t = \{(p_t, q_t^\text{obs}, \pi_t)\}_{t=1}^T$
+The platform does not directly observe the true underlying demand function $d(p)$. Instead, it observes a behavioral proxy $\hat{q}_t$, which is a composite signal derived from the mixture of actor types. We define the demand proxy for product $i$ at epoch $t$ as a weighted aggregation of events:
+\begin{equation}
+\hat{q}_{t,i} = \sum_{s \in \mathcal{S}_t} \sum_{k=1}^{L_s} \omega(a_{s,k}) \cdot \mathbb{1}[i_{s,k} = i]
+\end{equation}
+where $\omega: \mathcal{A} \to \mathbb{R}_+$ assigns weights to actions based on their signal strength regarding willingness to pay.
+
+\subsubsection{Actor Types and Demand Curves}
+We formalize the heterogeneity of actors by introducing a type space $\Theta$. An actor of class $Y_s$ is further parameterized by a type $\theta \sim \mathcal{D}_{Y}$. This type determines the actor's demand response function $d(p; \theta)$, sampled from a distribution of possible demand curves. The total observed demand is a stochastic process governed by the mixture:
+\begin{equation}
+Q(p) = (1-\alpha) \cdot \mathbb{E}_{\theta \sim \mathcal{D}_H}[d(p; \theta)] + \alpha \cdot \mathbb{E}_{\theta \sim \mathcal{D}_A}[d(p; \theta)] + \epsilon_t
+\end{equation}
+where $\alpha \in [0, 1]$ represents the contamination parameter (proportion of agents) and $\epsilon_t$ is non-stationary market noise.
 
 
-\subsection{Cost of Information Framework}
 
-Mathematical demonstration and validation of the COI and citation backed evidence, and framework overview + show harm to user via other cost distortions. Maybe split into 3.2.1 (COI Theory) and 3.2.2 (Framework Design)
+\subsection{Cost of Information (COI) Framework}
+
+The \textit{Cost of Information} (COI) represents the markup a pricing policy $\pi$ attempts to extract from the market by leveraging demand signals. We define COI as the expected premium over the minimum viable price $\underline{p}$ (or marginal cost). This also speaks to the financial urgency as a consequence of information asymmetry between the platform and the actors.
+
+\begin{definition}[Cost of Information]
+Let $\pi(\tau)$ be a pricing policy mapping interaction histories to prices. The COI is defined as:
+\begin{align}
+\text{COI} &= \mathbb{E}[P] - \underline{p} \\
+            &= \int_{\underline{p}}^{\bar{p}} (1 - F_\pi(p)) \, dp
+\end{align}
+where $F_\pi(p)$ is the cumulative distribution function of prices generated by $\pi$ under standard operating conditions.
+\end{definition}
 
-\subsection{System Architecture}
 \begin{figure}[ht]
-\centering
-\begin{tikzpicture}[
-  node distance=1.5cm and 2.5cm,
-  box/.style={rectangle, draw, thick, minimum height=1cm, minimum width=3cm, align=center, fill=blue!10},
-  kafka/.style={rectangle, draw=orange, thick, minimum height=1cm, minimum width=3cm, align=center, fill=orange!15},
-  arrow/.style={thick,->,>=Stealth}
-]
+    \centering
+    \begin{tikzpicture}[scale=1.2]
+        % Define the Gaussian function: centered at 2
+        \def\bellcurve(#1){1.5 * exp(-0.5*((#1-2)/0.6)^2)}
 
-% Nodes
-\node[box] (webapp) {Web Application \\ (Producer \& Consumer)};
-\node[kafka, below=of webapp] (kafka) {Apache Kafka \\ Cluster};
-\node[box, below=of kafka] (backend) {Backend Services / Microservices \\ (Producers and Consumers)};
+        % Draw the main axis
+        \draw[->, thick] (0, 0) -- (4.5, 0) node[right] {$p$};
+        \draw[->, thick] (0, 0) -- (0, 2) node[above] {Density};
 
-% Connections
-\draw[arrow] (webapp) to[out=210,in=150] node[above]{Publish} (kafka);
-\draw[arrow] (kafka) to[out=50,in=330] node[below]{Consume} (webapp);
-\draw[arrow] (backend) -- node[above]{Publish/Consume} (kafka);
+        \draw[thick, smooth, samples=100] plot[domain=0:4] (\x, {\bellcurve(\x)});
+        \node at (3.2, 1.2) {$f_\pi(p)$};
 
-% Optional: Kafka internal components
-%\node[below=0.7cm of kafka, align=center] (topics) {Topics \\ Partitions};
+        % Define p_min and E[p]
+        \def\pmin{0.8}
+        \def\mean{2}
 
-% Optional background
-\begin{scope}[on background layer]
-  \node[draw, rounded corners, fill=orange!5, fit=(kafka), inner sep=0.3cm] {};
-\end{scope}
-\end{tikzpicture}
-\caption{Technical Diagram}
+        % Vertical lines
+        \draw[dashed] (\pmin, 0) -- (\pmin, 2.0);
+        \draw[dashed] (\mean, 0) -- (\mean, 2.0);
+
+        % Labels on axis
+        \node[below] at (\pmin, 0) {$\underline{p}$};
+        \node[below] at (\mean, 0) {$\mathbb{E}[p]$};
+
+        \draw[<->, thick, red] (\pmin, 2.0) -- (\mean, 2.0) node[midway, above] {COI};
+
+    \end{tikzpicture}
+    \caption{Illustration of the Cost of Information (COI). The COI is defined as the difference between the expected price $\mathbb{E}[p]$ realized by the policy and the minimum viable price $\underline{p}$.}
+    \label{fig:coi_illustration}
 \end{figure}
 
-High level overview of how it works
+We now formally demonstrate that standard dynamic pricing mechanisms are not incentive-compatible with high-frequency agentic traffic. As the number of independent competitive agents $N$ querying the system grows, the platform's ability to sustain a COI vanishes.
+
+\begin{theorem}[COI Erosion in the Limit]
+Let $N$ be the number of independent, utility-maximizing agents querying the platform. Let $p_{(1)}$ be the first order statistic (minimum) of the prices offered to these agents. As $N \to \infty$, the Cost of Information converges to 0.
+\end{theorem}
+
+\begin{proof}
+Let $p_1, \ldots, p_N$ be independent and identically distributed (i.i.d.) price samples drawn from the policy's distribution $F(p)$ with support $[\underline{p}, \bar{p}]$. The realizable price for an optimal searching agent is the first order statistic $p_{(1)} = \min(p_1, \ldots, p_N)$.
+
+The survival function (or reliability function) of the minimum price is given by:
+\begin{equation}
+S_{p_{(1)}}(t) = P(p_{(1)} > t) = [1 - F(t)]^N
+\end{equation}
+
+To determine the expected value $\mathbb{E}[p_{(1)}]$, we recall the property that for any continuous random variable $X$ with support $[A, B]$, the expectation can be expressed as the lower bound plus the integral of the survival function:
+\begin{equation}
+\mathbb{E}[X] = A + \int_{A}^{B} P(X > t) \, dt
+\end{equation}
+
+Applying this to our pricing statistic where the lower bound is $\underline{p}$:
+\begin{align}
+\mathbb{E}[p_{(1)}] &= \underline{p} + \int_{\underline{p}}^{\bar{p}} P(p_{(1)} > t) \, dt \\
+&= \underline{p} + \int_{\underline{p}}^{\bar{p}} [1 - F(t)]^N \, dt
+\end{align}
+
+Since $F(t)$ is a valid CDF, for any $t > \underline{p}$, we have strict inequality $F(t) > 0$, implying $0 \le 1 - F(t) < 1$. By the properties of limits, as $N \to \infty$, the term $[1 - F(t)]^N$ converges to 0 pointwise for all $t > \underline{p}$.
+
+Applying the Lebesgue Dominated Convergence Theorem (noting that the integrand is bounded by 1 on the finite interval $[\underline{p}, \bar{p}]$):
+\begin{equation}
+\lim_{N \to \infty} \int_{\underline{p}}^{\bar{p}} [1 - F(t)]^N \, dt = \int_{\underline{p}}^{\bar{p}} 0 \, dt = 0
+\end{equation}
+
+Substituting this back into the expression for COI:
+\begin{align}
+\lim_{N \to \infty} \text{COI} &= \lim_{N \to \infty} (\mathbb{E}[p_{(1)}] - \underline{p}) \\
+&= \lim_{N \to \infty} \left( (\underline{p} + 0) - \underline{p} \right) \\
+&= 0
+\end{align}
+\end{proof}
+
+
+This result proves that standard pricing policies $\pi$ fail to extract surplus in the presence of large-scale agentic search, necessitating a robust counter-mechanism.
+
+% The DRO objective creates a lower bound on COI extraction, effectively guaranteeing a minimum margin even in the presence of adversarial agents. we need to prove this and demonstrate that in a theorem.
+
+
+%Mathematical demonstration and validation of the COI and citation backed evidence, and framework overview + show harm to user via other cost distortions. Maybe split into 3.2.1 (COI Theory) and 3.2.2 (Framework Design)
+
+\subsection{System Architecture: Hybrid Kappa-Lambda Architecture}
+
+In order for our research to have grounding in interactions we built a robust e-commerce web-platform. We initially conducted a survey of the leading platforms of airlines and hotel booking sites to identify the specific interface patterns that effectively manage complex travel data. Our analysis revealed a clear industry standard: while both sectors rely on tabbed service selection and left-sidebar filtering to streamline navigation, they diverge in result presentation: airlines utilize visual date-price bars and multi-step wizards to optimize for logistical transparency, whereas hotel platforms leverage image-led cards and scarcity triggers to drive emotional engagement and urgency. Our web framework defines a highly agnostic boilerplate which can be seeded with any data-modality with an easy-to-tailor pattern, which we leverage to define a \texttt{hotel} and \texttt{airline} mode. Both modes are then individually deployed via an environment level argument which adjusts the proxy routing with a custom middleware inside next.js to render only the desired mode. The purpose of this was to create a baseline adaptable to any use-case or desired commercial application.
+
+
+The architecture of this platform begins with the deployed web-apps posting interaction data to our backend which processes them and stores each ingested interaction into a kafka cluster. This serves as our data reservoir tracking and associating each interaction with its session and importantly with which experiment it belongs to. Not only do we track the behavioral interactions, but our pricing provider micro-service, once called by the frontend reports the observed/queried price-product into kafka. This kafka cluster is subscribed to by our pipeline which is configured on a schedule in Airflow, with the possibility of manual trigger. The final stage of the pricing pipeline, submits computed dynamic pricing results into a redis database for quick updates which is then read by the pricing provider and displayed on the webapp. This is a very generic end-to-end mechanism which is applicable to a variety of different e-commerce tasks. We intentionally put emphasis on the development of this infrastructure to establish a reproducible framework for interaction and to minimize any noise.
+
+
+\subsubsection{DevOps Principles}
+
+\subsubsection{Online Dynamic Pricing}
+
+The dynamic pricing done is handled by a pipeline which computes a demand estimate on a per-product basis of a specific window of the data, defined by the period $T$ which by default is 5 minutes. This dynamic pricing pipeline computes a demand estimate vector $\hat{q} \in \mathbb{R}^N$ by a weighted sum of interactions for each product, it additionally computes a price elasticity vector $\hat{\epsilon}$ in the same dimensions as our demand. The final features matrix is of the size $N \times 2$ which we translate to a new price vector $\hat{p} \in \mathbb{R}^N$. The transformation that governs this dynamic pricing is a very simple surge-based pricing (a special case of our later defined policy $\pi$):
+
+\begin{equation}
+\hat{p}_i = \begin{cases}
+p_{0,i} \cdot \lambda_{\text{surge}} & \text{if } \hat{q}_i \geq \theta_{\text{high}} \\
+p_{0,i} \cdot \lambda_{\text{disc}} & \text{if } \hat{q}_i \leq \theta_{\text{low}} \\
+p_{0,i} & \text{otherwise}
+\end{cases}
+\quad \forall i \in \{1, \ldots, N\}
+\end{equation}
+
+where $p_0 \in \mathbb{R}^N$ is the base price vector (which is seeded into our database distinctly for each mode of the commerce platform), $\theta_{\text{high}}, \theta_{\text{low}} \in \mathbb{R}$ are demand thresholds defining surge and discount regions, and $\lambda_{\text{surge}}, \lambda_{\text{disc}} \in \mathbb{R}^+$ are multiplicative factors with typical values $\lambda_{\text{surge}} = 1.2$ and $\lambda_{\text{disc}} = 0.9$. This piecewise function enables rapid price adjustment in response to observed demand without requiring complex elasticity estimation or historical calibration, allowing us to expose actors within our experiments to a system with a dynamic component of pricing.
+
+We will for our offilne experimental intents generalize a master function for encompasing distinct demand estimation and pricing strategies.
+
+\begin{align}
+V(\cdot) = \max_{p_t} \min_{Q \in \mathcal{U}(\hat{d})}{\mathbb{E}_{d\sim Q} [p_t \times d(p_t, x_t ; \theta) + \psi V_{t+1}(\cdot)]}
+\end{align}
+
+We follow differnet substitutouns which will server as hyperparameters later on.
+
 \subsection{Experimental Design}
-Study methodology and approach. Data acquisition strategy. Defined objectives and success criteria. Observable metrics and KPIs
 
-\subsection{Dynamic Pricing Algorithm Analysis}
-Deep dive into how the algorithm works, different kinds and justification for chosen appraoches + agent impact modeling and quantification.
-\subsection{Reinforcement Learning Formulation}
-How do we define the state space, action space and reward function breakdown and algorithm benchmarking.
-POSSIBLY: Expand into full subsections: 3.6.1 (State-Action Space), 3.6.2 (Reward Design), 3.6.3 (Benchmarking)
+The experimentation begins with the design of goals, with careful consideration to assure a uniform spanning across different variables within each product-architecture of either the hotel or airline platforms. Our crafted collection of goals (jobs to be done) is then tracked in a postgress database with one table to track goals and another table to track different experiment runs, and their associated goals in a experiment-goal one-to-one relationship.
+
+The purpose of this effort to gather data on interactions, is the first half of our research. With this collected data on behavioral characteristics, enhanced by our feature augmentation, we can create distribution separation into two bins $y \in \{A,H\}$ with a certain probability $p$ dependent on the session-specific features. To address the second loop of our system, we use this gained capability of discrimination to enhance the learner design involved in our surrogate dynamic pricing task which simulates an independent dynamic pricing scenario under which we can train a more controlled policy with the ability to account for true demand signals under conditions of contamination from non-human actors.
 
 
-\begin{algorithm}[t]
-\DontPrintSemicolon
-\KwIn{stepsize $\eta$, smoothing $\delta$, rank $d$}
-\For{$t=1$ \KwTo $T$}{
-  Sample $u_t$ on unit sphere; set $x_t^\prime=x_t+\delta u_t$\;
-  Set $p_t \gets U x_t^\prime$ and observe $q_t, R_t(p_t)$\;
-  $x_{t+1} \gets \Pi\_{\mathcal{X}}(x_t-\eta R_t(p_t) u_t)$\;
-}
-\caption{Online Pricing Optimization (template)}
-\end{algorithm}
+Our approach can be well summarized by a three-stage division, first we intend to observe and \textit{vectorize} the behavioral interaction data from our experiments, we then develop the separability which helps us deepen the semantic understanding of the behavioral patterns. Finally we use our newly gained learner to leverage a defensive mechanism within the simulation stage of a controlled dynamic pricing loop.
+
+\begin{figure}[ht]
+  \resizebox{\columnwidth}{!}{%
+    \input{chapters/loop_figure.tex}
+  }
+  \caption{Overview of the Dynamic Pricing Tasks.}
+\end{figure}
+
+
+Study methodology and approach. Data acquisition strategy. Defined objectives and success criteria. Observable metrics and KPIs.
+
+
+\subsection{Generative Contamination and Separability}
+
+To develop a robust pricing agent, we require a simulation environment capable of generating realistic, contaminated interaction data. We achieve this by learning from our Phantom platform data using a two-stage approach.
+
+
+
+\subsubsection{GOFAI-Based Separability}
+We employ Good Old-Fashioned AI (GOFAI) heuristics to generate initial weak labels for separability. We define a set of rule-based predicates $\phi_j: \tau \to \{0, 1\}$ to partition the dataset $\mathcal{D}$ into high-confidence sets $\mathcal{D}_H$ and $\mathcal{D}_A$. We construct distinct MDPs per each behavioral profile of humans and agents and from those we establish $D_{KL}$. From initial findings we compute a KL divergence of $\approx 2.0236$ across transition probabilities between states which can be seen in \ref{fig:human_mdp_viz} and \ref{fig:agent_mdp_viz}.
+
+\begin{figure}[ht]
+    \centering
+    \includegraphics[width=0.8\textwidth]{chapters/mdp_human.pdf}
+    \caption{Markov Decision Process visualization illustrating the behavioral transition dynamics for human actions.}
+    \label{fig:human_mdp_viz}
+\end{figure}
+
+\begin{figure}[ht]
+    \centering
+    \includegraphics[width=0.8\textwidth]{chapters/mdp_agent.pdf}
+    \caption{Markov Decision Process visualization illustrating the behavioral transition dynamics for \textbf{agent} behavior profiles. The state space and transition probabilities are learned from observed session trajectories to enable generative contamination.}
+    \label{fig:agent_mdp_viz}
+  \end{figure}
+
+\subsubsection{Transition Probability Estimation}
+For both subsets, we model the session dynamics as a Markov Decision Process (MDP) and estimate the transition kernel $\mathcal{T}$. The probability of transitioning to state $s'$ given state $s$ is estimated via maximum likelihood:
+\begin{equation}
+    \hat{P}(s' \mid s) = \frac{N(s, s')}{\sum_{k \in \mathcal{S}} N(s, k)}
+\end{equation}
+where $N(s, s')$ is the count of observed transitions. This allows us to construct a \textit{Contamination Generator} $\mathcal{G}(\alpha)$. Given a clean trajectory dataset, $\mathcal{G}$ injects synthetic agent trajectories sampled from the learned transition matrix $\hat{P}_A$ until the effective mixing ratio reaches $\alpha$.
+
+\subsection{Distributionally Robust Reinforcement Learning (DR-RL)}
+
+We formulate the pricing problem as a Stackelberg Game where the Platform (Leader) sets prices $p_t$ and the Aggregate Demand (Follower) responds. However, the exact mixing parameter $\alpha$ and the demand distribution shift are non-stationary and unknown in online settings. Relying on a simple error term $\epsilon$ is insufficient. Instead, we adopt a Distributionally Robust Optimization (DRO) objective.
+
+\subsubsection{Ambiguity Set Construction}
+We define an ambiguity set $\mathcal{U}_p(\hat{P}_N)$ centered around our empirical reference distribution $\hat{P}_N$ (derived from the generator $\mathcal{G}$). We utilize the Wasserstein distance metric to define the set of plausible demand distributions the agent might face:
+\begin{equation}
+\mathcal{U}_\epsilon(\hat{P}_N) = \left\{ Q \in \mathcal{P}(\Xi) : W_p(Q, \hat{P}_N) \le \epsilon \right\}
+\end{equation}
+This set captures all distributions that are statistically close to our observed training data but allows for adversarial shifts (e.g., sudden bot spikes).
+
+\subsubsection{The Min-Max Objective}
+The robust policy $\pi^*$ is obtained by solving the maximin problem:
+\begin{equation}
+\pi^* = \arg \max_{\pi} \min_{Q \in \mathcal{U}_\epsilon} \mathbb{E}_{d \sim Q} \left[ R(p, d) - \lambda \cdot \text{COI}(p) \right]
+\end{equation}
+where $R(p, d)$ is the revenue function and $\lambda$ weighs the penalty for information leakage (COI).
+
+\subsubsection{Actor Implementation}
+In our simulation, the "Follower" is implemented as a set of Actors. Each Actor is initialized with a type $\theta$ which samples a specific demand curve $d(p; \theta)$ from the latent distribution. This formalization ensures that our DR-RL agent does not overfit to a single deterministic demand function but learns a policy robust to the distributional uncertainty defined by $\mathcal{U}_\epsilon$.
+
+
+As part of our reward engineering we think about the UX factor ($UX \in [0,1]$) whic his our proxy for user experience degradation, this is computed as a mixture of contribution from the separability model metric of $\frac{1}{\text{Specificity}}$.
+
+\begin{figure}[ht]
+  \centering
+  \resizebox{0.5\columnwidth}{!}{%
+    \input{chapters/balance_figure.tex}
+  }
+  \caption{Introducing the UX index allows us to better distinguish the kind of impact different methods have and allows us to compare them on this Pareto-like scale.}
+\end{figure}
+
+We also need to think about a policy like taxation to the agents Strategy-Proof Mechanism Design, specifically the Vickrey-Clarke-Groves (VCG) payment rule. We link and prove that this would create an incentive for the dominant strategy to become truth-telling.
+
+\section{Heuristics as part of neuro-inspired steering systems}
+
+Steve Burns, superior culliculus (face heuristics) we create this sort of part of the 'brain' + amortized inference.
+
+We could say that a DQN for example is the learnin subsystem and then within our reward mechanism or some other computational method we introduce a steering subsystem which acts as the proposed ``pricing heuristic'' against the given non human transaction data.
+
+\section{Market construction}
diff --git a/paper/src/chapters/05-discussion.tex b/paper/src/chapters/05-discussion.tex
index a2052a1..6cd6362 100644
--- a/paper/src/chapters/05-discussion.tex
+++ b/paper/src/chapters/05-discussion.tex
@@ -1,5 +1,15 @@
 \section{Discussion}
 
+\subsection{Transition to Agentic Market Microstructure}
+
+Our analysis of the interaction dynamics between the platform and non-human actors suggests that the current static pricing models are insufficient for an agent-mediated economy. If we assume a transition toward a direct revelation mechanism, where actors must reveal their true valuation of a good through bidding dynamics, we inevitably introduce significant stochasticity into the pricing system. Unlike traditional e-commerce where prices are relatively sticky, such a mechanism implies a high volatility characteristic of financial equity markets (without the fungability however).
+
+However, ecommerce commodities differ fundamentally from financial securities: they possess a hard floor defined by unit economics and reservation prices. The market might react enthusiastically to an iPhone priced at \$1, such a transaction is not permissible. The platform must establish an initial valuation anchor ($P_{0}$) defined by the marginal cost plus a target margin, around which the market price is permitted to fluctuate. We propose the introduction of GenAI Agents as Institutional Market Makers.
+
+This is also under the assumption of expected transactional capabilities being given to AI Agents.
+
+
+
 \subsection{Risk Assessment and Limitations}
 
 Acknowledge risks and constraints and data sizes.
diff --git a/paper/src/chapters/06-conclusion.tex b/paper/src/chapters/06-conclusion.tex
index f923a49..c698e82 100644
--- a/paper/src/chapters/06-conclusion.tex
+++ b/paper/src/chapters/06-conclusion.tex
@@ -1,6 +1,6 @@
 \section{Conclusion}
 
-\subsection{Summary of contributions }
+\subsection{Summary of contributions}
 Restate the thesis and key findings with validation of research objectives.
 
 \subsection{Future Works and Next Steps}
diff --git a/paper/src/chapters/balance_figure.tex b/paper/src/chapters/balance_figure.tex
new file mode 100644
index 0000000..5565ba0
--- /dev/null
+++ b/paper/src/chapters/balance_figure.tex
@@ -0,0 +1,38 @@
+
+\begin{tikzpicture}[
+    % Styles for consistency
+    axis/.style={->, >=Stealth, line width=1.2pt, color=black!85},
+    curve/.style={color=black, line width=2.5pt},
+    point/.style={circle, fill=black, inner sep=0pt, minimum size=6pt},
+    label_text/.style={font=\large, align=center, color=black},
+    annotation_line/.style={thick, -, color=black!60}
+]
+
+    % Define Radius
+    \def\R{5}
+
+    % Draw Axes
+    % Extended slightly beyond radius (\R + 1)
+    \draw[axis] (0,0) -- (\R+1.5,0) node[midway, below=10pt, font=\bfseries\large] {UX Index};
+    \draw[axis] (0,0) -- (0,\R+1.5) node[midway, left=15pt, rotate=90, font=\bfseries\large] {Performance};
+
+    % Draw Perfect 1/4 Circle
+    % Syntax: arc (start_angle : end_angle : radius)
+    \draw[curve] (0,\R) arc (90:0:\R);
+
+    % 1. Paranoid (High Performance side) -> Angle 67.5 degrees
+    \node[point] (p1) at (75:\R) {};
+    \node[label_text, above right=0.1cm and 0.1cm of p1] (l1) {Paranoid};
+    \draw[annotation_line] (l1) -- (p1);
+
+    % 2. Perfect Detection (Exact Middle) -> Angle 45 degrees
+    \node[point] (p2) at (45:\R) {};
+    \node[label_text, above right=0.2cm and 0.2cm of p2] (l2) {Perfect Detection};
+    \draw[annotation_line] (l2) -- (p2);
+
+    % 3. No Detection (High UX side) -> Angle 22.5 degrees
+    \node[point] (p3) at (15:\R) {};
+    \node[label_text, right=0.5cm of p3] (l3) {No Detection};
+    \draw[annotation_line] (l3) -- (p3);
+
+\end{tikzpicture}
diff --git a/paper/src/chapters/feature_table.tex b/paper/src/chapters/feature_table.tex
new file mode 100644
index 0000000..302f2db
--- /dev/null
+++ b/paper/src/chapters/feature_table.tex
@@ -0,0 +1,65 @@
+\begin{table}[ht]
+\centering
+\small
+\resizebox{\columnwidth}{!}{%
+\begin{tabular}{p{4.5cm}p{1.5cm}p{6cm}}
+\hline
+\textbf{Feature} & \textbf{Type} & \textbf{Description} \\
+\hline
+\multicolumn{3}{l}{\textit{Session Identifiers}} \\
+sessionId & object & Unique identifier for user session \\
+experimentId & object & Experiment run identifier \\
+\hline
+\multicolumn{3}{l}{\textit{Temporal Features}} \\
+session\_duration\_sec & float & Total session duration in seconds \\
+avg\_time\_between\_events & float & Mean inter-event time \\
+std\_time\_between\_events & float & Standard deviation of inter-event times \\
+min\_time\_between\_events & float & Minimum time between consecutive events \\
+session\_start\_hour & int & Hour of day when session started \\
+\hline
+\multicolumn{3}{l}{\textit{Interaction Metrics}} \\
+total\_interactions & int & Count of all user interactions \\
+total\_events & int & Total number of tracked events \\
+interaction\_velocity & float & Rate of interactions per time unit \\
+max\_velocity\_5min & int & Peak interaction count in any 5-minute window \\
+\hline
+\multicolumn{3}{l}{\textit{Navigation Behavior}} \\
+unique\_pages & int & Number of distinct pages visited \\
+page\_views & int & Total page view events \\
+\hline
+\multicolumn{3}{l}{\textit{Product Engagement}} \\
+item\_views & int & Number of product detail views \\
+unique\_products\_viewed & int & Count of distinct products examined \\
+product\_view\_depth & int & Repeat views of same products \\
+\hline
+\multicolumn{3}{l}{\textit{Conversion Funnel}} \\
+cart\_adds & int & Number of items added to cart \\
+purchases & int & Completed transactions \\
+cart\_to\_view\_ratio & float & Ratio of cart additions to item views \\
+conversion\_rate & float & Purchase to view conversion \\
+\hline
+\multicolumn{3}{l}{\textit{Interaction Quality}} \\
+hover\_events & int & Mouse hover event count \\
+hover\_intensity & float & Hover events per interaction \\
+\hline
+\multicolumn{3}{l}{\textit{Price Behavior}} \\
+avg\_price\_seen & float & Mean price across viewed products \\
+min\_price\_seen & float & Lowest price encountered \\
+max\_price\_seen & float & Highest price encountered \\
+price\_range & float & Difference between max and min prices seen \\
+\hline
+\multicolumn{3}{l}{\textit{Technical Fingerprinting}} \\
+is\_headless & bool & Headless browser detection flag \\
+is\_automation & bool & Automation framework detection flag \\
+browser\_family & object & Browser type classification \\
+\hline
+\multicolumn{3}{l}{\textit{Experimental Labels}} \\
+is\_agent & bool & Ground truth agent classification \\
+xp\_human\_only & bool & Human-only experiment indicator \\
+xp\_market\_mode & object & Market context (hotel/airline) \\
+\hline
+\end{tabular}%
+}
+\caption{Feature matrix schema for session-level behavioral classification (32 features total).}
+\label{tab:features}
+\end{table}
diff --git a/paper/src/chapters/loop_figure.tex b/paper/src/chapters/loop_figure.tex
new file mode 100644
index 0000000..e90e018
--- /dev/null
+++ b/paper/src/chapters/loop_figure.tex
@@ -0,0 +1,110 @@
+\definecolor{mygreenfill}{RGB}{169, 234, 186}
+\definecolor{mygreenborder}{RGB}{29, 145, 61}
+\definecolor{mybluefill}{RGB}{204, 222, 255}
+\definecolor{myblueborder}{RGB}{66, 106, 189}
+\definecolor{mygray}{RGB}{150, 150, 150}
+
+
+
+\begin{tikzpicture}[
+    node distance=2cm,
+    % Style for Green Nodes
+    greenbox/.style={
+        rectangle,
+        draw=mygreenborder,
+        fill=mygreenfill,
+        line width=1.2pt,
+        align=center,
+        minimum height=1cm
+    },
+    % Style for Blue Nodes
+    bluebox/.style={
+        rectangle,
+        draw=myblueborder,
+        fill=mybluefill,
+        line width=1.2pt,
+        align=center,
+        minimum height=1cm
+    },
+    % Style for Arrows
+    myarrow/.style={
+        ->,
+        >={Stealth[length=3mm, width=2mm]},
+        draw=black!80,
+        line width=1.2pt,
+        rounded corners=5pt
+    },
+    % Style for Background Dashed Circles
+    dashedloop/.style={
+        dashed,
+        draw=mygray,
+        line width=1pt
+    }
+]
+
+    % --- Coordinate Layout ---
+    % Defining a grid relative to the center
+
+    % Left Loop (Green) Nodes
+    \node[greenbox, minimum width=3.5cm] (commerce) at (-3.5, 2) {Commerce Experiment};
+    \node[greenbox, minimum width=1.5cm] (raw) at (-6.5, 0) {Raw\\Logs};
+    \node[greenbox, minimum width=1.5cm] (features) at (-4, -2.5) {Features};
+    \node[greenbox, minimum width=2.5cm] (classification) at (-1, -0.5) {Classification\\Training A/H};
+
+    % Right Loop (Blue) Nodes
+    \node[bluebox, minimum width=2.5cm] (trainedpricing) at (3.2, 2) {Trained Pricing};
+    \node[bluebox, minimum width=2.5cm] (policy) at (6.5, 0) {Trained Pricing\\Policy};
+    \node[bluebox, minimum width=2.5cm] (rlgym) at (3.2, -2.2) {RL Gym\\Training};
+
+    % --- Background Dashed Loops ---
+    \begin{scope}[on background layer]
+        % Left Loop Circle
+        \draw[dashedloop] (-3.5, 0) ellipse (3.5cm and 2.8cm);
+        % Right Loop Circle
+        \draw[dashedloop] (3.5, 0) ellipse (3.5cm and 2.8cm);
+    \end{scope}
+
+    % --- Arrows: Loop One (Green) ---
+    % Commerce -> Raw Logs
+    \draw[myarrow] (commerce.west) to[out=180, in=90] (raw.north);
+
+    % Raw Logs -> Features
+    \draw[myarrow] (raw.south) to[out=270, in=180] (features.west);
+
+    % Features -> Classification
+    \draw[myarrow] (features.east) to[out=0, in=250] (classification.south);
+
+    % Classification -> Commerce (Closing the loop)
+    \draw[myarrow] (classification.north) to[out=110, in=0] (commerce.east);
+
+    % --- Arrows: Loop Two (Blue) ---
+    % Classification (Green) -> RL Gym (Blue) - Crossing over
+    \draw[myarrow] (classification.east) to[out=0, in=180] (rlgym.west);
+
+    % RL Gym -> Policy
+    \draw[myarrow] (rlgym.east) to[out=0, in=270] (policy.south);
+
+    % Policy -> Trained Pricing
+    \draw[myarrow] (policy.north) to[out=90, in=0] (trainedpricing.east);
+
+    % Trained Pricing -> Commerce (Crossing back)
+    \draw[myarrow] (trainedpricing.west) -- node[above, font=\small, yshift=2pt] {New Pricing} (commerce.east);
+
+    % --- Text Labels ---
+
+    % Loop One Label
+    \node[align=center] at (-3.8, 0) {Loop One:\\Data \textit{(Online)}};
+
+    % Loop Two Label
+    \node[align=center] at (3.5, 0) {Loop Two:\\Defense Gym \textit{(Offline)}};
+
+    % Bottom Legend
+    \node[font=\small] (taskA) at (-4, -4) {Dynamic Pricing Task A};
+    \node[font=\small] (taskB) at (4, -4) {Dynamic Pricing Task B};
+    \node[font=\small] (indep) at (0, -4) {Independent};
+
+    % Arrows for bottom legend
+    \draw[->, >=Stealth, thick, darkgray] (indep.west) -- (taskA.east);
+    \draw[->, >=Stealth, thick, darkgray] (indep.east) -- (taskB.west);
+
+\end{tikzpicture}
diff --git a/paper/src/chapters/mdp_agent.pdf b/paper/src/chapters/mdp_agent.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..0566be9e926934d4b553b90623800f40acac21ea
GIT binary patch
literal 10743
zcmbVy1z23mvTpD|un=4a2oQV*cXtWy5Oi<}Fu1$Bg<!!01b26Lf(CaDZb2_(%h~5U
z_uY5jo0)H|)zwv9-PJW!J@XfZyoeY*h=Co6qWWjaEfNa=2(U3QN8;fDFv@_fO(3QK
z)+dr85&!^T6f?7gfbE{{miiE|2-wiZ2#mzXhhz`21M6EMIj5$n4@SPNecd#pHd%fP
z#AJ>eEqc-E{ep;YR2Dtl!J;a*$*@1x^1g*Uw%hz*vZ_X0{lGd#mQtg=uTQTZ|72V6
zST3~ULweF9-S}<G&Hl|?V_nOmL(GNKxra|HKZCAUWe@s{4Nk~@Qqd%KR=i4e0EUA{
z%Vj)9;f9JLN>+t=Dekyh2yfTy<t4ZnRF$68Ox5kdL{i&_!<N$<-C(Vx+RwX@e)&nu
zt$n5bfX`@tI{esfJ?Y|T-s17p^Zw2@#y?m=>Bq`kiiRJnRaVhwxZ<g?U)N3Umy5#g
zZKyN7M~S6!kKFvGT=&Q4xBMfDa{PCJ#@xKT8LxNzOB`>0EplcoWsn@4LmswH!ZsPB
z)F$aAO^ZtuC*b#%zEiyW2#RR0gBzFELV#dP9l6~?!&(G;)JIBi8hJgw*0-^-&@SOs
z6-E#u@1Ri>imf@Yuq65fass(s>R7-2(9*kzYr#M|ljA!j)rxpJ>VLFY|DMoFpwZCl
zg=*o-F`^LKL~LbsQ0xg3F9KbKROd9U&vZ1#&iBDDviooHUUtO8@p8Tt`)XhWbM+aW
zp_(uh587|YfTZ=T<#o0Hv2){R-|p}TbdUUx!-9r7{CYBo9L2Beu+H6E3Aa5wNwU^?
z?ZyviS>(wQ$+PS{zvc_DkY}txwPPra2_5c>4L9ebU~FJO3Fr(1-t<osN}GPZWIN>`
zxXc<{N{DWOPai^ebJN<WxB^=AZ_&ek3KV`Paa}(!?L%!(QfYE%AL#sx(kYI)K-P{X
z*fT7ZBz-AuemCh%nCqUP>VS&Ui}N0WZD`YSnt=R31-e>(kS>0=j<eHKf!I6f2Ouk_
zTo$4a!1uw4#7nmNLgQcTq^2Y*Ol#s=R6SO~tYJ|m6Csd<DPi|56U%<<A~x;3V7lGR
zba!9-QekZ{^fFlqlRUbb6~>k`N6Y>j1#AAP{*N>n_@zwYb}-#ob_<f%Lh?6vDTEg(
zRnY|1ZETdI#DSX(v`kHQcu1ttMFR5XbMxP{(b*_`wbXptl&Pj(g68dG2CIXRM?3=6
z;WR@nbuU$FP)jO)8eSf7FAFuf$2BHv<4=m%TTRwG#`SM(*NRL$CU-$T_;&bBf7pVz
zZ#N*yW-uA%OVKD>+0Op#3?~{xGa~sFaSC%%sl8D^MLMe?D?|#0Cc$?PYdAU772+2I
zmmCe)!bQx@mcqslM5^gNn7mtwVLC&IL3}~&lIlc7Rc}GRX0(bFpm-~uO6N|~o0CY`
zfUUNlsGfoGp=#2$Mn^$0W{NtJX`~8tchoVP7XjKKsEC9^8uC5X=>LlKMJ_DUL@NR{
z&>;Y;FqZ&V6Q0jQm6Z$q`en632L9`+q0TeKu55reBh?}ziO5SWhLICZ-%OP9Ynbbe
zi#26i>q+|I6$e)StM?(o;QkNO(!Ou;^pxA+`G&~!YEp(|PK1c_UUEiE62$AL=B<VY
z0HJl^)o|a$XKH1s_nTVgn)f9uXT=oYDd*vt537JsX;sxPC*-?3n`#YBqbCVz>G$%1
zJOc*pb11gd944b_LwhfTQ3idGwCG&#F{fo`Lz+v>=Kw>##UcXl05;HON(g&KgiWSl
zzch!hr~?i3Sk6NWkf=1oL@|`JzGA78LRm$td_`zpcxy$-06<|E{{d}yhFXF=owo%|
z{*7Cb?MF+hBh?P#91eR15|Iy$XdHjc6?F6Q+gJ{w$LcVf7e0GH@tki22Dn|3vPqs+
zy3>((hegIX*DE8ofwS|rHz#v2bp;GsuuD}n$3rlIOW503me!Uj_OrK(oB5er0=?QJ
zYgS27G_miO`P05T-{8;=w^QGTieEr0Uxl7YwVN3MJji2#So-7Qxg$Mp26jJ9_1tW;
z^kJ9sNF^0W+mkki+|!&<xX2Scl^3W<8E|zX&id%bzLA#S8ABC>B&Ip{nmJ*Vk}}%a
zCvSjXMTW(mQon&H%cx07dM>W&N4f=X)1deSy9qW(s+zN{;w_5yFzLLFT+TVpLp13m
zjbSu@@YAjN0bD>z>7bpm!jU8?k~dBtD~qun(|2>Gy<Ultc#V~waKLuD?qbvW+H8{B
zJ?Er9GRc8iLXCjEM|&HbmX0P*VXv?T4lA0)+}jh=2bhlCK<S+~e$7<Jv#JaC7lNp1
zCNvB?tK6<2%cP7-dB!8Mt4MJ`iEw_o)|dX^_ac#!XwB)<)n(|q_=5?QGUjAzG;URS
zM@sl~iw6yUiRNOzfPX5&8F$xo)3q!DjxYEUw@y<JKcG<x@8=r3K;Ym>-l`AmiSP^f
z%00Zcvm=ogyp&75^wy~FTzMe}eCcPG1Ty)^<K~`7HEM^9(HlP$5PA~!@Md7eLv-O+
zoSCr4m8U298Wf!tX&J;ir|Ir6$+l|6b5q-#VO*zWD}<_CMeBSt4C)kW!o1`_N=~8>
zgeiW*F-@@N?AS&OVeOOi5?H;vE)KQbr|nrze1z%Vq2xgVTO0kEQlFlllksx~e!f5D
zU=Rlz%QM4sllkvl{X090Izz;jAWxYYz{B%QczQAF1D_fIMqxn!2*7Bl|8#xI*H0aP
zTp7h|tRes4Hl_#C0|D%R<@+b5r_Z0tIQ~`!U{rSb2nI098JH_UtbVKh^Q6G}mn7qR
z0O)szn3<hD1i<p=^Z$H$U{(UIgl>gyLcf6LfYSL-D?E?>mlfFlnidH3k9qw$4G`1c
z^Z4&hEYEiNgA)Yc;A8>*+Yx5b<&`t$Ou|;P!=5AWjmqK_T0uaLHLD`|xa(NOixYG(
z9T1J2*1J5M+*ANV2pVRUo-S;5l|I@<-8&oC7tckAkX&M@&)ZMN$g<b*Q@HIl;d~rN
zA6i9d;++0HrwFE7p(b=iWpdi0O{>k-e9K<RZK2log8j&2h89^oI$4}7ti;Fh3OHIm
zzB3_g(g$Lpy85u`64V;ZhiiCJs0j3iR^#q9u&(t@lC_W`!@3SP4&$Qy*-r9wZ}|h$
z5_3b1tKd{VK{E~_9!76VO!aK03<jJ2Eubc7nnA1Cb6)Zn$TFkzaxZrfhu-;`grr_l
z!W^ommtL#P6HeC{FYIUTENaR1`-@Ok0y<s8#$<H1sE;{fHk%f+pm<e*Ov^j1u2XC*
z4T|2lyTid|@?g9AD?1yXJ*0Xj;SzZixf?Y>smE>CqLu5dR`=dm*&Qo);^i*!U_Xd?
zcSTo7&`;FHkF#VP$BJY~{ut@Di5OD-$Ud_o@Ku6=Y-?rGS$eC~F{!E}1=^mYn~cPQ
zr?<TqH-;r^zPpt)umSh|ByTWJ<U&fvUFp{2-zjMp$d<q4BPD|k(C8r3F9rl>y~{nO
z9-?*Gl!@)!E+eECAF&Omgd4y-{c<nZo1N29{%MkH`qW~MYtYrUx$46Q8xloH8etY~
z%+tW!u7HV@n?RKFAGTbnsQfZo)4K#K+8+1CGKcFoHxG&|6v=`7wvGwvAxSb*cYt~V
z9;?vecEL7-s2^m7%zHG))a<SX_q;u9TUR|dZj7yJ9}ByqD!%U$EzMz-zFGdsus}$H
zZB=D4xzjs&D~8?U{vnDYH6|C^Rai8D0duDq;q;97)2EEbHZRxBX%cQOQ+lmrHO<dS
z{I+Ylfm2iML#~~XwWX)_*NSVT)HyQJ>moKVuhGh}wG?0dYH6IqCLxnrdLfw!Wio=k
zDQ1H_d_$~Ro8MZiHtrVB=Va@%g_>txAY1S;L^~^~PI^h2`Wr?7iVJT`$84WM04n|n
zK=dl61NZf5nf2>y&U;;A5B<)o?^oiF=RK_V_ItOTt@4Kz;L)1AW+d@0S`v2F^qnsO
zuKdk%t#+qF`}|%gM6VrCUmN`5ya^9-w20C|D<W3mqw6PbidP2{CL!Cb`mCeA$`U|(
zLlf$&xu#&{n6kFEK;hVE6-c9muH*RXBg*^qDh<)7J=FuYqHt?CLW=iW$60F0?^#Q{
zQg>A!T6fiYj+%LQOl&h&O=^8^3gl3~3Y#et=V4Er*p+Ka<@YO1iEp}?W9g7FDyU^k
zvw!6_$_RL=ym6qno%~J$Tcd1%ElivONreMT`GbC_=<B;IGMZSeOZ7NQK9vij7=0G4
zf!NMzLW<sm0%Mn+=yC`&I~3*dn_1XFSp9GgRhq=!A7b<3jlEyH)BBAu(%!z0r5uba
zR<cq$P>QE6p+2PEGa9Xx;H2Lk$V_E4*-W&{)TT=^9;htFI^^7=ldHEO4f)QY2B+el
z+D&gu%FvS=V3$xWrn3`rI~{!ifth(3aR*=1ULk0TUVV--q3M$A5Mfp9QEXUjpH?wa
zJ<_bHvpq*@mDK3I5Au9lNJ-ZFGE)S-0JB7X^XBZl(c<Dz_(t}A^!A*6W_<L+VYW5j
zgqP24TQCKT^w8`;07fTcaQeU-54^RQGnI;fQ~yD)QP@oA&{gjpKfldQL=t?rTN;sB
zsWqx<%|czu(B|&Wu$=aeYL_)h-?c1-5K-?9Dr)wgiyl{oRy9jkp6N!c=6MxuZEqh_
zc4$zDX`LR1WWypDK5GjM{ifMhZ3v^H>uw`&BXOg%-;TmuV}}M3&_p`J3yM)W-yP2`
z9j|x2^@z)t*>DVPydr!cHW$s`&EL0CvG&-B`Q=t~a1%qyycP&VTd(rIlW63}mH*iU
z=71tqnL~~VJg6CxkL?}s59BdLy?q<|ZD8FBZ~RMeutD+)Ne1DG*c4)C7H50kn5k;}
z%PySBeK9$&(TF8txUfMhuG_j6^$h;JD{7Cx-t19}X~gnh<(uUuhD0Do951m?*UZq;
z&|WO8x+Aq)w`Xw-{Yl@F?vi2~3=}EoGvR6KF}xwV*zgaJW%{2MQwL12?-$=N5?RdD
zlb>u7w7QAlAe3lxXQ{2eQjx0>?*8nM<ms_*phJ}OYb}yF|NO;tR!}XaInb?t-+`8m
ztR!M;`#7ymmkon5%2Yv0y8u`77UmM(L`+jye(A^zwaZVAdp<BvJOuq1CxCEVIY8t9
zP8cOV?4ry1oQBu*a_NIV*8&<GJKR-H-HM59GEp+&p(ylXsEi7kA6H#7F)9jy<X5q%
zpB78iCZi==9%)p&u`!HR2M=2*hY^es?wixqaSQPjI!AY_adKXdgKGQpMO*u=O)g4a
zCY}lDiu}QY+zOt_$mDWnzo<zqm=r9lNL?~!vjQE1oN}v;Sc`1mqB11M$Xf^U;(v?{
zPJm-_=bRgeur|`2ga$j}5aj*bfBFgkAhdhifVMcJV)^A#p2Eas<VIhI)4j8d;YYWF
zAp7!g1O4KXwQg-IZ2@gPtseWwroOU-f^X)92_JQGQssqQNVSbVqn$ohc?{M}6*)Q(
ze~*gbSM)k1x#i1RXc?B{DISVy<=XzvXf|hJL6FGboRZ#nA?Hw(C(Gxwar9npv8>*F
zHR0eODq@!9G}PdMW-sl1R*o2LI0ltU0;ymhB}&2Et;z{p`*cUp6;<}(9h&02U9L7M
zJwD-Dcy!h`Z{R>uoW=EJg#V$_@YSd;@+_KD=a>$ei_{0D58}j8H}x`0;h0!?nb_pL
zO2YglJACYg{+_G3h$gb?DU1iLH919Ohf5RA=id5b{9b32`0>+w2v@8}OO+;0XM7wp
zkJsB?BLg=&R(B)~T7_HR71(ViV$O<w>|bo1wcg$5XWXr(F}w)^KKc+GaQ$#@X7evK
zz=l`BW`hA%>nBo>>4?5<50EfcVQ_M~a!ToML-Eh>Aq*04SdFm;z5|oLE2hNO2ys9U
z%K;!xxKrzHMYL6awjXM%#<w5l%*=`o!vf>?`Ckw2lZ@qZztin~yu=wjU$Xovy|PJA
zTRPz+NwTyt{#G5(ZG|ml8JfJgxI>TYAa{DwBix-mFd&A-ugu->&GP(f1zw<RZ5Ctg
zmvmK;g|fU@^OW2l&0W}X&<iU4YgJ?;S^dc2-Cy=;`@-vWki=eZD=s`BuPKIiL3!1u
z1)&o27`Z>HJdRFYl(K)#(#?<=+@<46N!%y88N87@j}$!zc=$5pBV1^DM75}u%IfhU
zF@0Yn#TaL)%@x${kAsgClN9lOM;rYHaYmpapg-{<-AJ14N~R}vwsMI3t7sISPk4-u
znp$sF15|1t+L%{dpS@<_y8`6zA@Df*Eyj(LE9R90WJ%KTU$w$#vm$*rV9|9CRgm{D
zFS^geaK;rFKeJ7X(Nf;he!)FBQgIK}l^p0L_ZDM)4-+w?UZ8en=1Jcl7PS@LFNnWE
zm*5m2#}**h6N{eS&|wUJX;X&7_tg;mz4qs?<LO)Y*+`zH=*HmD=~mCW1MOZw6+@{L
z@=o7XW@Ld;KqA#FFBCCz+AtGKSq)}wr^G}G)*5~(YKUg40;>T@r#MXF%Md#_GnlS8
z0Fj^VO|;ET#2AEpK6OOSM(Ri+sBtYnc`aXGGFaESwC^!c#yExLWNjky(vZuGqecIF
zBDvV-+nWh0&tC*_Onr=(srS_8%-*a!Ne*9wR+Bo%8MQ1EvB@`OeZ6ayJN&Oobn$T&
zk1O>~PmM$#h6E?YWscD(Yd)Z@nqQG{lzBb+_}#Wlw0`NIDH%7qAME!!*sXUg&fks-
z1Ex%w$vh^M>>s6!_VV~2iDaDXsos7`#4})EjIO#jnD9if@=nNUe~pu}NDXBXGa%Z`
zFy#7*=`cEOwTQPlP#dNRQiwQ8l8_z#({^!Cq)zmlIPdiG0qe~!!cMGUB$NB~bVZ}`
z<OlH?Wcf*#&ckf^r6`3^qYAOX<3>8R#-MpR3ewzIHtLbMuOyXu?}*leX<lo2z7S<q
z47UwT8{X*eqX`XrGZz;k2KA;K`Yq5~o!>%gS7P-LmtPP6>2ELTYwI228_<Q!&L)yQ
z$wa8y`Sz=*=?wRRw2_Qu@5Em16Q2fqhr#9g4y>7F*Tas=ZBMDv&+27w?rV`xOkXBv
z23QiWRmDwdt9<!PUtVhOa=^wPQ?N|NWfag*T+o->qo4{3{%$l;dJ7a}suUPDHyV$#
zisOxI7IVa8V^6<w{VMt;&}3~oX{@V7X)yFA%Dhw|=%MIsN=t%POP&{xN2SAeG{>r{
z6WRi8o<lSozG=o^w%?TaYr-9CAHcmn>y0S<$=;`qUz!MQV>&e{)hPNgietfM@CjB%
z2{3X*MH>`tAx!~w2P;*fsao~6E`+f?9P!NM_etX~{DKTUkH5q+W)7yJN(s7+K_SL|
zTGCIsuZvk+i?Y95H-)b?gJ)0lF=FhOf`PE4Nx8h*UJvXiuOC14Tfo$4^G|Qc5gdNA
zYfXApGSa@Qg*9VSyyd8yQ*bl2e@+Q|(>g})z+#WLu#>cZ5rwdRvM0d9^9}sV8E&1_
zrIpvCb;VpBMC@PJoNO=Nj3k2I5Ecl#K}ZfjrebbBD20KR<oo8I(m|yE_ScZ$DBfGg
zPOmRJ#OB+txX=BZ@b0lRzjOBOI5_-tGD{sSxgBhjh7VpirxHF??uvOqJpsKD-3oVr
zU^!Z^nPVKB&tYc;wT7r}rzk%$c%(z@9p|}zuKm&c7rLM6VR{rwJ;rq^2jy`=jMpJ>
z?Cr72#r)L8lFlfP)3LyQ!y**d>NPcN{5(X&)5o*f<$IE@`^i*X1fuqQ<Z0Ds_Y_HN
zGdwnf0>@Z$o12L$l-0d>iX~@N`S)i&-9M`}-X<Ci3-!AmF_o1EwC*y$e5X_)={VCi
z{+*(bZK{L=Ge4_Nd#=XPO~X~AjBewBmIsy`8X1V3(nTZ`(Ud1fc-2wjhu~CQMeJUD
zzM0#a{YAIYQM&a#)x=Pcu8Vu_&s{2pJNMJvS2zsK_cvOtt0i(7dCS*(G!u95Tv689
zr8&7CIy(7}i3_VF=W)sv+F|fKz8>Blst-q%QHauy`Zu=s;yb;tq;C|r7drXrw*&)_
zx8B|*eHCZ2nxMH0q8N9Y-H#L5*6*g4%33db8&Vi#?<S4ey-R1y;lX!=oS_7Zs)R_p
ztf!H@q2O&%R#T4%2Dqr?%huGn`_7A^hXuS06N1VrgKC#jOKQZWc3erFnGbWlqI~ay
zC?V0**tV56gLiFWl$4)J-7^@zjFhzz@1C_x1)mC~oA~MH&}D=p_HF3BQmTEIcv@I@
z_egh(<|CJFO6in~rpy|JAojPRo&#(ITlLf|^NW{!wK)TYQ^#*>g>9YFsMFt;F0;$v
zQ=(NGb5^9)OGVQ5@-Do&Fupxi68j<cL!2u#fvkF#n-pB3Y))I9#6?*UC5dGhaG|hG
zI74_vSgl%EJ~fNOU6>Y@09OKc7+_wV--9Cu(*3SM5Pwl}aae8c=ak>@y>hW}aVB<R
z&r;<>+(@6vx3reIV9hy5TbW<!8Ad)k)VRfu;s-|~nI8_po{pMFdADeb4Yu;FW%-Lh
ztJZ{vFD)M)VWXHG&ZmlR_>&!V9Bye+g?RcXNzAUYx6x1%X1|&3YMYmJnOBaK{qQAS
z68DmbEK3YN3R)tE8sZ{5N&PZ~EroSozch)Fk9+c-cwm#OKZo#)ZYIu~)v3H+5Z)U%
z`E<s^>0ldZW%7_U?AwWwq9QUu-Y)Sj_BG;Y!nWr2kF6iud7~&JD8s{FU)o46V=Q3&
zh%KjB_`HN~iBfG`6*&KW-tTATW%%XCLbfKeYs68+BgAFI$B>*i{oB0T&D$Q^_uFwJ
zRNNnX#ZXccfyZZN#Pm#~FJLmQR|L3ZvqQ++3%NXJBB?Ucy-d$8BU)WNE|Wza?rK<z
zW`-9VuG+R3mvt6+FH;;WXh~9dOhl~D-B{h`vpIF?Kk(Xb!7LB-4=-2WwaI0LsGAAP
zSP01s3XTvn<f`Tu6)jFZ>_JQ}%@xyBP^42&R@+&qSySob%DOSiW_g%w?K|I4tqR^S
zwD^6*LBTkpWsny~CHw#z&VWY36m{6r4x!eDP}yK~*c8_@9j%u})jG5kc@=n7ZaK+)
zi`I2n9qa#2bY@8Po~O}a-~JNo8|&w*n`IjhR@}PNb=^k>RSD+wMX%Fek!lu~2Oh@T
zdHFK3ZO&Svmj2rV$DhS84{W0;?FgbkuXbE-Fc$}PHuw;Y;a>R~`Z5MFSue9Kwa1Y|
zC?nRL*W2ns*Snp19xr>_O67LWsv=svH~R$YgEy^iE?eF5NhwGj*B657V{!H+ALD*K
z?vn)SA}YP&KUw9&eT0$}gD3CHMI~EVstXRLr;;Sa&cc60zvgFXf21OziBqYwPutL;
zBhmXwswl1+TfFgdnxBeb_<`Md{N3!?YvwIa0WLGPk@aw)vyd3xJHi?2uJTb^Pn4Rd
z=zy(jS?6WC(azfit`0%Y>49PH$msXDgJ;K{YZglv40v)Q(d%Mq&2pNyOEPwFgCg1<
z_?+T=S)<X0=fV>bjxj2E3Q7e-d?FfqeL|O+8YOWyDROK#d$U1(@!}FhdoHMI{E0jH
za}dg|%s|yIfrPxMN@Y&W1?%hTWX9C27t%Xzwge}?U~Xcs>TT@j-2I7rw{b_ENoM%p
z_hdJ*zT3sTvkScY7(oa{juAUYVDuR{Y|we$5;N2@9w9H0QJWLXVdg(qCxmzTlMH;0
z#0TfMRTXg3!;hIInCmx~syUVEQABoqX_&dN<28d!BUI)W?=;H}%WbpFP`aWtYe-}%
zfnXF$<ZgUj*~)sYVGVJ05iMI!lgx!FdLNo(u>1|_8$SI(&x2%c{T8<!c}0Uw^DJ<g
z^RP7*GJc-IoS%2ZRH`GJfBw_8@GiOxIVFp88A2!I4|b&q+~Oo$X5fdI7#+tAtVf=t
z*eq{o_OUz=7L>b-2nEXOW8%?P#hTHjg>NpuWpysr_q2<fmt1PwS54)|=4(57xpZY=
z?@l)@zB>E<?mckpj9}QUCdHnw!$Gl+#<F(x6z_UoxYt&x&PAK*#t?yIGwM{AnWRW>
zSf-Y)im-YCa|<!Qo6t7wnzlfMi{_{%vZf{(KPOfkU7wOP>tir8{xqTu_zc<oaD&aP
z)k``kjHYRhH@;Z|&c@vRO8z>4h@t7VK2G6E#7l@u$1gDslYmN9j<e4}X(8<v#5>{b
z{5u~tgVY?Z3sCbR9CIfCZkIP)#Qtx%nkHWIkFSCe?yl=1?q^|ot&X#C?>w-Y;^BJ@
zj@QO%F0)X@i&lb)Q7=1XI4V9Ei!-erL!1SDNHzrLYgnf9I<GR+X0Mc1UEolOAZ5bI
zAJMmoAZ>yX{NC)u$8A2c?nN+~tPk}vvyaWtHM&6_ZYnj5Z!QnZE1PK!e&&3pJaJ1;
zm?P#s*v%118Q`bwtkiKIJQbU^!*=wV8E|aWtL-`J_)!8HYdWK|nAYEpl&U$SJ6PyC
znoaL_RPI2HSdO%_8uho*(y`RSx7=@9%w627ZO~)pbG65EFMW}I(*C{pSclfKu+Sg{
zVyanKmS^N#INqKfDwqwEzOZ8?S6DWVh*DhE4z~|huVw3-Ui7cF2rR1>RA+`&U6(ET
zu9twiJg&cq#I04(iNyVF?0PfexT^x=QiPX|)+hIZtwz}=H|5%uVA8^!h_B+1lCFSv
zFmXq|u`YHhJ>ZAz-P&iFnmP<@3wK#c<@KDs6nPc6u%<HkAULkB?d|e9)Uz|abkqbc
zcQ-DcrI+>&&fhR;AkNA`oF=QUiHud+WzgJd;WyE^HC)Jf4*V-_t3!^XvGP>fCy`@h
ztpcb)&ghtzUy$M!fgy2oIY#e11i-l$$8913yk9!HZi_9mpsy@28kdni!+tK0s=P)l
zmhafEh{XeXrUpf1psx1}Sf(QMnBA%4^<Xy6eUf@d{L`S@8uzEKf+KFh`uKEbD5S1b
z9(?8U1xQ&bgk?bf7CVkKv264uBGPG`x*+w&wu^=*#)i|0iy714na~m?pIV_oeK9)G
zq5k+TkT3!F&}S7t!!K)FS@5z9I?;n4S-3qAO8}PfHBm*6Kpt-hShZuUECnSEPQQu%
zAh4{*Bp4luvRs}5_PY+<C)jodJ|dFDL*H{*_s_^?_!;<tr0*s0kbE|vu|L7}*O}Wv
zz1>~u4Z3kSre3*l{!~lMQ#imwd~DiO=u5C&KkECcb|%-)7Y9tnq2Osk%kgn2(~k(J
zjLJu6-HJ=STz^Vz#xLA{LoB*b08?A$+<OK3RTxN`9M}}q?cLjbt=%TdiMK-s;2L|c
zS?z*D^ze8PbhnJXvV0GSyJPoCZi@a-nB+J5@h48g#>~X@H&XI1xacqZ0|0t1_xRt{
zcIYS$ns)-xoAzlTGHC5+6n02G@)Kcb9`dy$deTLc@`FN-s1LV?=E6w4v)uT@be$y?
z_O}Z+wh41?sj>(~knGymiNEks`k)xHaYU(qnoiGC-w#hB#AMdbCt~GIH)>2)5Ahw_
zX{=K^$vG0Q2ue^_)|!$=K9D)-Zbudz;&-6Qtf(c2<;7qV*0}9<TJZY}#kwqOHj&I&
zMb+?kZ(|E2?rML0%dDj?WCOT-fX*5@6Pq4kg>JfcQcIs@;y7CrS5U*xb%_+6yaDOc
z1xJNP1s8>~Vy$IM<GwmY*h0&x{Z`wxJ+$40Glb(`AC<=!>LTinh-fI7-b~|yvX62>
zS8pQ8v(LKAx;-8*UMUurw|0lMsWwErhGn8k&1{%fX7Ej-lUc!xl4?uNAE5{4!o(xt
zJiL6Y%_c5){5wJY(WJIvmqT7q{G|i|A@9Pn<|ml?iK|zP8fD`D%`W~`i1UP2F|&gH
z-o(G4z-LtN4;bzl!xI798`_zDgxJ{qfwVo5-s@XEVRV9G$_lEQv_fW(KM+M#u${gV
z*v`zDUeU%%-}-NEF?}mDOBcYqzZn6~oPgh~)PHbU>YLaDn3?_-5fZX-25153K^)8g
zdM0L80Emf$9l*)N0nqu)BMH&BG&2;mHn9W)fWIk%_J-gmyzz<Uck_AD&+TX7-vnX(
zj}l-r6VrdOC_%thssN5Z#Ge}ye>I+0{RHlTnE<~LK*fI`mCyL)v;Y6QiGFKPaxj4W
z#x9lZ9Kg@!`%M+nw+H`5LjQN>dWJNGP4(^mGKP%)pKTC`^Y?iFz9Rt8zY3^Ct)FV9
z%&biSjA~}qg4XtCe-VDmshJr;Ozod+3j815%pi6K4t5Zb^VvbznAsSZn1L*8oc~Xc
z|0ei9Df|EFV+B4-v2wC8fH*-cKsEpy3y=ZC%*GDl1hBFJ8Gx)DtWT5wCzbQ5{*052
zgZ<f1|Ddz6urL5wm^hzS#|+|R;9z25dK%-21<1<4$;u972K|%H$-=?#%)!C&G{)b8
zY(S<bjX-t|&_7280$A8N7=TRd%q%PbRuD78lM_EriUkN_;9zC_ZO?y=!OF(Mz{&D^
zEl-_)ss8QdPhY|(+`{a+5)J@*4zT}pM9{M#{@oMh{sS`yJp=21gy8po1Le=4ITQ0A
z&h&q&xl@Tzu<!$71}xuWZG(rH#<YtaF?WTr25@1_uGlWo4az!zv0dd4w-lO0<Z@T*
zsj2q$u-NjE;c$rWvb`N8oHXPygO_dmM6}OTKfe&T`@srNBGLA-XWL(j^%5JC5H&7)
zx)3j5@+I6+6}0F%q1EdT#Y97VkyJm-`Lwz9$#Un1jUU9Mt+}Vv7Nq>A`1B=5f)xie
zK2yqt6XVU(k$P1huVgj1ZfV{Sx6HvV8GS#=zOi*kcI&D@))KSWU*c+RcVtbi!biXi
zAi3iSl7R@~h_fCIuXJ`<wKJBv#t$lUsZg|XlaA(R2bEN9o0v_04PcR&846=Y;&x&`
zM1H)at>4rwY%bk?|8aRF#1qPc+WPvx38#ORFFb`)CU%a$!|7i&1^<c_>WX0F--iFQ
znBdRg^M5KNP%?7`1DO5@P-*}z5P%85^hf+tw6TEzKz|ogFiKh*+W<gpfBDGYMU#el
zde@H`AwFR~P$EE3#gVa|p3w>d#(}he0ii(R00p2u6N*d~3h}}e-GV<_zUd{|2WZ62
zU4Q{m7p53``_R`D<ac=|Nj^|7!1>5I|G9K~h`t@f+3smoARr4TGZF=bsGJzme*x3E
BqGtdA

literal 0
HcmV?d00001

diff --git a/paper/src/chapters/mdp_human.pdf b/paper/src/chapters/mdp_human.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..7cef37a21dc48a0a6c66fa87a788419045bb8fcd
GIT binary patch
literal 12194
zcmb_?1yEeu(ryw6?iQTE9cBiH;O+!>*8qdNyE_RIToXJ%kl^mF!GpU7w+A_L&#imw
zfB&m`GgEu-)vK5F>ebV`zK>EtM2r!{#DPHh?V#)yffWD**ciS?c>5N>EMo#Ob2JCA
zJ&6<%00029n1z+2iT%^v%D~Y?#Kg$P*aU%(55d9F-o(Hf!8I*iV<4KM7QJysZSw0a
zkdiN6@|3O>2xx_!_rQcUSj0c*&-l9W!TP?LJU-@Q!FN*+oM5)01+oYkF)=aI>#3hU
zsKk$B@iPNwr$=2W2h2X&E$&Cts4WcM_t-Ty$B!+{%igu$hf4VwmP&mzy~awRHH3#h
zbyo7pOaF2!7FEq$RaMe8ve%L{IOl_`nnW{OqG*jZ%0$$0;qmwMzabD|&{fK;grWX~
z=vMZl2a%Y#JUu2d>H=FAa##8AjpTMB<b2O1qK9?MU&3XCu%O~g_(Vl8274nHR!g&o
zZu8Aa8*d$Ui;s3QZxjF41nO4E?8DjpkE99tM=t(Ad-u0J6KB_L^rhD#xfu^m7umc{
zTp`9){OKgdZTvo$ho3f3kIK{(O$KIdY`?o@cUUfw-G}?Y`L{4ZXd{S@Yn$<m>bhq(
zkFO<KQyt$e#?u7{ZMBD6MBw`zrPia8+~XA2Ufv|TomEiWblFAhC6r2&+!kp`VSkJn
z@SdBwye8_|_r5%C7{Kscyj@HuIkm_g;q~bo_WIh&I|gNy4dtwf5F?w_U%*uQ3W+<c
z)$qsrv<}2WUI;1U1O|{4<HXC4>#)2|n^CtmO-k>j&#7t-(;Bu{B)k!72l#UKjjLOD
z!x(`w2fPa6w8S-9vp{wlA74iP{S9m!LS*-Z4}mUnQOk-M@WocxEP?NalSvyMAsMx+
z>)RJOiUuc;j^JrDtV7yO7|}`CJ}uuh3nuX^L`7dYZM9Gls3cimJZ6R$%epUX0T#uI
zLpiI+zPD~JuVO1;QsEo65dw}2_R8R}9>RHiC{A$d6?Mc{Dkz|JOATbp@Wm;Ed{(&n
zLh(|Bfd+xnhycz^+ofZmD(>1z^uSd7o$AVDb0=NfPt{$p?s!i!$Qi`ou<R9-v{gqC
z@Jg(RE{I<cB#wIr*V?z^<C~O5%>UAE4j>^7#%F8A!y<s=>EPo%1i_U=tgf~`K(|+A
zgH`n91T*W){NjR&v3o`n@hV@TT>!durO%JC{G;Du`Q<49UrQ^SR`2@JaK___qUaBV
zRG!N1ptj2sj%W0wn~d7L5F;nVCn_siAz9JSappfjRM0|0){q>t*^*URz*1-x=9rX|
zmir{_t@4sOKk%TVN1r1|yjZ(O01FkT11u{kom<u1NqCv0UH0+Cs2|x4MyNoKrDmCp
z1&mC}?e@p|k><{UgoVs&JHgirK@H&wZ^Y~FQ*<dvyjM_HCjoQ5cN1&n9B>$JdA=|X
zxH7?`7z&P~;R`x4C?FPCS8|n<UnHlT{=PeC3K1qqy8(e-<lT`*pDfW!duc!`CS`<K
zn6p4WQZ70}l7&R{MG01PpOwu*>@Y%Ipzx^v%EHKZ%rx}2%_<XeDyr5MHsfF@<IRFl
zjCE;o(^|m10KN(!N8niG&O|pZECmEJh=$M9X_D$H6+f?T7}+Duq=xwUwZXiax*g-O
z$M;uN1W3u&1RY?GB~@3qXpWU}WNoUVA}DQqEj~r}{tprc*AIjXScM}VO*@*3a`+7%
zA)+aYFG^pv53C4^N|B05iPQ{n_auEgmJAxjD~Mz){#f6M8}n?rSQ&y}l(8Uad4@Ae
z>p_=aI*uI+&Y6#=xGo+X<kQ;r^{npPr-w*3O6O`W<MTL={4M;2g76%qkxkSIQZM<2
z0nAQQoZ^-`EZ^8d-jAid<QwSAicCZCPeHvFDx5|(K~YmFNRG5nF9o`?KtrFwzd;$x
zapON2HlM~1rjx?cz*0uDaqS=Rtr>Op=oGCWkgP!p<71F07UP!aAP9uNj1)2w%8q$S
z+`lOjwj^6Wwx+Eg*+c(i9#`3ScfTlG3xlNY5b7X+RjW5fZ(SCj&*$w+aepV6I`wGE
z8?E&*8A;RB`ke{49bu1^wL>Z?<_@SzNc5o0Yfp0wq7-p*Px-dK{!c%5%f9k=AIJN`
z5*hmNy}A}jA%1c50*KS^T)^K;w0gjvjMymjvh&RwXqcLuGL`m~0rM#nLDlFFI5wKm
z=1U_Dm_x7oN`gBr^&#Vd)qu<ow;rsyzXVm8sqQd}Fhoc_aR68r*~0_TS`>=7d)}RK
zJ8T7F#?kGs(it&g(WMFMX2I5hBSnVcmA=sKg8qXywO*gmihi-0y=x;^YUfEKK`*BW
zrY_m9S*PjC`!e<%>*nD}Po?;@Ks(lNPABos?L;=>WB`)T#Y$^mO`y+Fyq6RRgeO$|
zhZKwI^w(^a1y8f7pq0d3+9kap19h4*2Lh-SII`L2tXa;^6Nio=dC>f-MtFm=%B?nL
z;x8(9!T~hrNb<8z7XTe+cL`@CDEh%7QD!hgnlI>hvJZuD13jK5tb}63wh%iAjut=+
z_kms5c(4^T8;Z70>KE|gS7<ezZKE^|i3*;F2*s_UIIZN=W#P(-I<nu~cLuDPP5WI3
zU3TWhjp){-iKS=(JJm~;s;0%A&rU<>45BX*+FwO|4iIirm$Hl89cKdEiS-4g=%}mU
z)P=p~j=*@)<8BF`F#^{ZiMdMJNTc4&-@A=AXO%`#!9Xv+z+3DLdF^|u=`I#RA(2Nk
z^>NH);?RjiB*_d%e77`6Ww-d%VtegjTF+b{5>28ZP7~UX55(tf?=d8!hB~=LoNCB!
z(58@q<>+@DnE*7jkK=2)bUaeCl*JkDfqYk4Q&)T!QOY?<j41ZJ>0PEUq-1;=H-)#(
z6U~67pS;#V!}JdkcN;d_*;ZbnBxjSti~dMLtRK8R^JqqY+Em(-D4|nM6A!Q=E8<EI
z+6^k9vmvg=%1o*0ziDP3*#qKDdL3;x-y|jx_yj;3CXL&dC#_N4PORNJSfp-dj&gmw
zoRWoUOD*f-U>jcEUe&?Gl=9;G>{;14T_krGC{T7<x_-um=Pf8nKtR7w6zA$u-i066
zWES{>4tp0p1eA9@caG8h-ul=Wo<6UV=pu5KpZYD7g#Y>|UuA4?K;>n(D$g|Od4+YC
zR;B7KWNzMXU@0(hWaGe<=UAhI@>UVOs#fH6kQ&RKiVXgi$Wmalp`sr@>}osPfrv9>
z>nF4WeeW#b$D!*n`g&8k8s1dZ{-tm1%@I1XeyAyJZjwPn<T8}!l}-B{=u`0>UMSPt
zkY(EBpUPT6zCkkdZqnlDZ%2|VmA`$uyio2MfpQ7Y=hXswasM=nG#>8imM$0Z1#yyL
z=GVo*sY~=v1idSq5kAygueLxc<|F-@OGV%Q<xR3gPV>k8YvqgHIsB4DpBl0O?!xyZ
z4#}lC>cZ@mU`Ae@gH12LrMwbfU74Fy!~?at1_kW~3n6jFQ^}WQ)mXl8)WK+#y~e}(
z925<}$yDR=aZTb^vlSctN+2P@4+6k$F3nGz*M{#8T0{0DD-Ff@PNy|<AJ&TB#i$#c
z8RPq6(7iU(;~5NTaMR05R&0g}K6Kuvr>HJO<t=W);XlS3)6r_sW_i^7+6};Gs%BQ!
zae%>Lf%7&`8D~wfqtrSL<9@RsIqYyJfrHWj7;`6jX~-yIlwF|QH*}|u01E~JvI5ME
zFR6MXH1yEAK#G2G1$uUR%|u;cbLo0Rq^?D$4_RF++@qv`(k$gyiF#S3U5g%3Mo3p0
z^wZ2U;()iWSK#h8wPw@K7+t0=Kx@9Qf7&k*6TAq-scEYTKDEr?kcG#A)-v`6wMn5_
zM7?%7TPow!_$p4FIVSVq@^E{0>cf^3;*4MdG5#~>eR_J%mY-AP=lfHV4Cdfsc~*EX
zvizM)|4yGpT^+@h9G_BWz}vUaf~OC&0r05+U=|hxfB?)!22a<gg!-xCk1Mm74aD&u
z+NO*^Mj(LWuY~+b>FM|9V4Qys24Gfpvo!%Q%NxE|a<u+E^q;9b*IzA}<p7}H6=D|l
z4vqlUKcD~S^qxftxDvh@zVYe>4ChOo|Fpt$?|)hW#QxW`K%jrj>(6O`z<<x<zr9jC
z+vN{U5P*yQsjH_&{{4=z=mzhKHS^)H$*HKQxTtowY-v(x7Xtq!&YZCt4KV!*+7wiY
z1U<kBMwSqg&<8LGwS#9-Zpol3Ux<rAv$zToqEVO{w~A4LYm9g(j0iJ~c*3F@W2yH*
zERj9$M0V1PaQss^QVu>Hc^r9oP3fpyE){WaWjx+(rO^w)lVbc3@r8cFtXFrMdx^?S
z+Sw9}X8N$|Wa*FW#@{U3o27>?L_n{~r17v((6R@hs7E@Qt2frL77e`HPjY?0AceP4
z`N+(0YaDP3j$ZC;M*lENDi7k7mC%RNMJ>~AHof<=Vdyxy@orqIw(439GT9^UGJ{>}
z+LIi3Q>NF)*Be7hL9^y$yK&jCR#v*v(l((jR3{EEOjk`+<IfX3o*Jt6t-=Q5L6*)o
zA4%GtyUZDx{B)|USf@Umd|d*OO9-1`M+Z5qDbrGxYQEOzk(W={M{L>2nOZAU6O~l{
zGgla?!>c&PY9@o0(X7CWyhAyN#BzQogA+TIA9;2OmSv|6wE@eP{E4w?Z!PyR@|bzy
z!htOg77{tvV!GxfhhFdEvLg=y0^#dne{H9Vw0bg!``8Pa8i)jswb@*Dz`+Xl6^ctp
zVaNQTrM&s2mb3-!ki6{AMJ9P&$H3KIhmcciRyImrpR3SDg@<EDLYDl^m_oOAsOz3h
zUIR63#V)l(6VXvTie5KrizJGskDEOi@ozeHGcNm*s<$Vcoai&=gqar}=8L9S3UoP7
z)yxWKAWT}0jO1(N+JoLr<!4N*WTdCQsfPw89I=67mRV!#pOsYOxRnOKN7#uifCDW8
z*HKTUQ)o|1%e2y)jC!1Na&*#lnsn?r=nfeLC`IGx3L!GqvL+T5b3{K=?taQ8&g@AH
zp$ck;)OlO!7CYN^Mi6j6{5l{Y&?U5sSFJu+{$_Tx#M^M8rb5K5B_MwBYZM)^KRL39
zzoqKeD)Yl}Ur%OJls!4$H;Rwu17n*PNqso|7*tz29i#PFdf_<TZL6zw(JaT*-8$5!
zt5g#z%%Q{FYE_X16s=n1A1e%FIPG~)ONY&E3Q>~M%FJzMrd%9GSY2sTjZBL@AbrK1
z6Eo3A=lV+GTw>Ag>Vg3xypT6B-R~S_&m7N}($W%m-4x}9*B{+ZFG#esEGTQ~8Y+N1
zX5)GMs9%cwe@tCPg1lx(2TRFx6~9oqQIcs9XqO1=x)%JvK1(r%jSAJqm57#aW~-r}
zb34}&NsL5X!aj)oh61h3gkUlibc&8mvQ6IH>?}+(HI%~HW^IqSQ{YHSZD1rMEF|r(
zT$Gpdl+?%atOIepjdcl?GYNTb5Yo$uNcf!QC`=10%xnfOe+sdsXQ!7o>wG_dIAWM>
zFu9C5TrnoT35lSHCu2)NUvLuP>||5-L|%s-*7-OK5OlniBs-sHfhFb3*|tBzS9Ede
z!@mhSd^xNipIQxqFN>xBh2YTTMHx47LTtZGj@d^dcbe0gDC?A0b>`mk*1hfm)~~yF
zrhIvnuP|feAsVc9-89dBn1Dy7eQR!X|3x*m@O=E|+_#A>U9SUd!xboNh&$HC5GB7N
zHOzhZhm8;nAZ6btQthO%G@R4=!L%<-1kU;HBJo)gYYen;S-$2g)UG|0fs5!S?XeQ3
zz_*<789Yc|{hS2`U6yut6TfCBk;`>QA4EHQ<l^V>haX?&gm1!+y&l6JGs`J`cgW-%
zj5muH%ot6i(idkx$hAbcjJ*?n9ex>J&P(@Ev7>k~nlpJ7a$2qf7+mO9W@mgXC-yj*
zwTt4y(E9qbk34nM$2gk&^b{qD*PBpkdRqJX{9r&uMP?3>XE=TA&K1Yv2R?s5y|iAb
zW_|W@vr3(=smO+c3Mn#9vmi$?>fNCbderVjdNV!bT|LjGSz91BJkpJ;W2djM#}V$x
z0hx@+njchD6fDZ&qtvZG)k2*hW;Ju+!`G^_?x^8mekQR93)Z50#fj=mB#)gV`S#0O
z$>v?^kVa&EUVXReF~Fg}dJf~jDm4>-W=g1#w%6tR7ThN8O8b!#+&qsRo&D{%Mp=a<
zs#c0uE~VeG9?JWd>B^mtyl;L4z^1;xxY`NCG*gJ><7S{%YpA!JNYe8>nOZxCy7Iex
zM@-_TDvK@>l-sGh1xPbXQYmp@{Y2^4M`qti8_0;R1rZ_ys5Pvkq~A8=&TS0zh9wb6
z+}H2Yr1UDA8pmp+BrH~nU-UN<wn!-#f$2V%^kaWliP=^0f_Sp!%A<?}I+B(|75x3O
zpsnnn$9T9Oq-9!fRkX(jtv&eb?lVyQ(W!SUajNj<I8d>4%$tl(v8UJ<3u{+XIczzQ
zKNaQ-4KRAvJA8pSXmiKaema`F`v^I!{yag-BC&<?Bdo@`_j~C{(hs-ol{Vp<OOonG
zQKCoNi2k{uFV)Gz*$x(8r;D2M?nNwhWKhR@wh`PFiYG7L-&RW*rRyCv$SspD(ga9j
z8)V~F9S5o#cXZZ%w-;X$t}I{H16F=EH*UNCkQ*K@7R8X7zq}ma{5^<U!nlb$lyS`!
zrQa2TbIh=DmH$ddRwMx`H$bC#ZftDcAojKtrGtrOD=~StTo!zL-a%V87~ec-8X!N^
z=@}y51%MwX1c@6uDJ4g71=9}RXdB7V@UO3&PB=Q}e@UELDjaZDa4cAUxZ5eq_+I3S
zlqvu>A8rrs6x5t#Es#kR(WH`GWuVjOeg8>vQ&n2p4_!TjvqYZ*F;c17$%z4h7pF_K
zKJKyH5PJKK_B7>|!YK<ES^J}v0+MC`dnh7T)fD&=omXfj#_N!rrfMSgM_6g5uczs5
zVeYnL|7m0h9$A#WnB*mBf<lRwHXkK!HyL4L!nL>8XlV?W)1zU1^MGwIV}Mn%!8_r?
zx8)KssyxSH;HPT2a;ewk5r-AWCZ?ou5dKMnjOy+pgCJ2Y-IxWGCiOvlm{A&-rhT4V
zYL+`S>gpO82CVP9$08_HOV?_M)9^TN%z43IZ|3ld0IKw5-5z>huvFjdD&0m`Q|n22
z!nSq*x}N&9`(1K(4a4X8>J16(Le<5L+WoJ3cimayr)Mw$SZfb4*HpKZyKw>>MU+c8
z?~beMcysh^;tu?2W;I3LDQHJd@ald%xHw^N=vm9y_=?yH;7$I)?fqed5=zMo;*8Pn
zx2?L56XxP7BL_>0UQRnG3YcNu(@-v4SUO~|bw|80l2UuKyNDe_W2b}|{N+TEA2;c$
zF!v&o7u~x2VVWY;d5qkZ*MIr&G7k~&0%?TC!>_QFXNXVUwYOPB-B54!%dcs;`M0tc
zRsJ#9vhjU`8l3bt<m^jKG^R;?YgpKlAbOmx-ReH}THdh@2M=E1PSSBShwqFX8I)$+
z;B;XfbeCXiYP&#_K&QY`fnha-1x$>>@Su-Emf6<frxogCACgHbM;rznhLz~L5Tt{g
z!b%sKMRd2-x3%*U8|dDC?L1#~w`i)D(>+o@(k`5=ccJ6anO~geMGlj_pt{CBNpl=t
z*i4BqAI!QB&gbsQ51*`HU?Zh7TR7zPPG!nJ2-z+)`-U>-x_wa^Fqhxd<1}%Rx^p%)
zHP@4TqdJyU``*WL)``M?N!$w8&9O9f)#(_gIN}GxO^K89I77F3HL|HLI`<q-T*pVk
zY6Z1IgS<K@XHlmHT;IkH81DekUFY%$F8_S;Y-%r3)}&3=WVw9Ane%o=;*!DuoT6*L
zr+~EF`;iTzQoj3DyYH>OGz5p~+D_T;yXI$-{dea_blo@G<#(KfW2TaYGuo~a1^JYz
z$tY1#qH2>9pkx$n8-EyPnAAWEmxMgT68^+*&mj38a|KEbF-j4vqD-W4ty+*=I}n=X
zbpj@sP%D9>Zt__&x_O+=S6`}{-P?SJwKZn}n%|LK_zOGje+Y#7sn3Ze2VXdxwE1aE
zYg20rb`y3hWV0|a4+3um4}rIq{1HEgWsblel&YAj7y}y(8xs;;NGLC>7y==5?|16=
z>UZe(=y##}nX{L3o3oX3nR8_5kIcvq=4VdX#Df<@87jCIV>tc}>+j)9HQ{QmuG4k6
zbaJJHh+@6e{F9@6n76hre6hw^cIdA37RBn}G@X*jsJT=_?dEvWt)r4`xqQ~;ZT$6A
zJX@8Syf=`A-t00Qia)H=s~PX>S9C7!gh9oSaLJ9ACQ(%jTErD*C8QfW8x@np1o$#I
z35qNf@%?AsORMpr33&MB55TLQdOBj}u^?uby%Yn%GH?!mXi6-lwiOK}?|$!@>uPGg
zp{X}@2sppF{$ehd#LFS4{($H}`e6QBi;q>l<LPnsgqhYG5&3j?)4gf4Qu_J=iHl;N
zrt^)g$GI4v`&)BjS+|0dUk;_rYWs_qzlLSy(dI%xaf_k2wQ=m<1TgTjyWUr&YPu#M
zqprQiEb6yM;?Gb74~Q_lD#T=A#r&j#C4<WuX3oLle7)OXZr;h;zQlv(Tvy&S#!23#
z(MKRk0~e1HWbrwZ)(RnYGa2X8bc&2WAT-R1Yzt>m5x>uvrCtW4)L$J%R)5NBpLxzV
zFPzs|6R12l{dKPn-@8da*53&&mCv!~M-)^(uckF)lbt_GzOa_LaFl9<iq!c5>t^om
z?ozvVv#BY$UWABBx@UStIHeO3PKg1gNS!vV?!tMJv-)hx5$!%jhbX0$-5Xhg-p|`#
z6KZYJ#Dc#RjpK>NtQl3iSrOc%2j#^deMXlel+-sNxR(IX_BjcD8T&Rsg2-oLLrfTo
zmazPZq*65A7^7wrWx6=W+zcxn;dNZY_ec?LZUyc~D<e%yeA7wRjGAtA^yY*au0^K7
zM#mN0eHW|8vPF-}!=-O6$oUh9+cqr+qlo5MNhvke_Xi_<59_EcuJ!oR<_B+hdFSR4
z`>Q*5-y4<Z`8GwL!b$`xe|AGREDV231BaK#M4S5#9pw`PvekQ|b|%VhzRbxqO4F2h
z@;1Q>V4wWuCoy+$)h_`r`sNuswWwvUv2xhTkcnBC-j7urrnRP3pJO|Y$81|TVsbAb
z@LBG^GLZc|*~>4x$1mcO9i;Eapj=#F%=4X(hlgKiZ+oqu@#hylHiIuXh>%@4;G@+n
z8*Xah=8%0*(A!&ry;pLGcF8w!y7DC$bJp8CT&n`tat>cI0A(U1fnzvAuiNq>U1)KN
zMV*1eC#uCJAIaXK5vmd!)u&SqnYQI~niO|2WEZ~(5PfOpkXwiw4X$x1`AN6bvP}>5
zvato$i+Xd+s}S{bk~>8UI9V)@REn608k;7_F_=}yO~PH7Hdi`7zaQUmg4-ly{byJh
zLL|0d4?I;|j{=j1Ym%Wd9s-T~hqeK#asXoGcA~avW>+pvnIkyRG;~L?hzk?SEj9iS
zU$%P`eh@mjRc?jq7*i%U2xRET+70gK?&s2^wUwn*+Qu>n=Tu33p8-iRO{bD~ka3Vx
zD)>%J(N&#H8*EsUOurc++a>gsQ!CYysV$bjM`d3_#E`W+Q4o({&CqiY3)66w?8~5b
zHiLcFTgyIo&4Ec_Uaf(t({Babg2<EMdwD9G-#$7`JJmQ{;MaG*Thq>+taA>X&vzbi
zUx$vPvtu8%AeBQx2(naez9#gtPoK7v*c*GO*7L{>_P$Z%a5CI)>})JQ3uvA(I~K)>
zoU!|gdHog6%^U61vv-xunO>tK@p_;>e;Pkz;Qa|kn7dmWRo|qAoRD*i^r5@cWt7}r
zH&f*MX*JU#lOoF^t?~<{sPzP=1{$0134$?&cJc*x{ky32HVU6Eq%%e5`%Bd4d7_)g
zs&}r5kcal^=&&R2_s7NOu$iO3S@e9Ih%Vqi;<Po48RnV?F|pw}Y2s^?6sW`LL8vzl
zTPxS+CQm!_57pfjq%K|@IcmAaouy2~S$scKg?#sUD9~aSnGlIEcly=QLAsriVU=)D
zxo>N%4lWV(%h7+=dp+{reUxn+@kTj!1rZrqKbWiWcBO3*Sr%50*3;t05v@4~WMLXI
zF3Eb}mk?7@`SyG^f2KgU-f^d2tGxM|U~_X2Z?3ew%*ah$nk0XIyKjBnroXs)8UNc@
z!slhkB=gI2dKFkqp9;c6lVMGpdZN1xvfxF>q;(2UIx{=!6FJm!%g+f4NrHl(4MVH~
zHFbJAUmyct!d1LviY0`D_V-f&nB;)R*v71zT#{NxO?^M4Ir{nq7j{@C$TK~rPm4Ek
zK$IFTQZGR7tH*C9Po>K($`80~^+z(GU{kithO<7FmnntRsPGh(&X`hD`)D;s7JQ}s
z{HuIPEWb~HH_-Ewh-%Rfs^WnWAs^AnJH7JI3iYd)HzJ3R5`tL8Q;1~AI`_N0=c7w!
z{0HSBxLzai)_cp;&GNE{b|@UN>tby)=DAK4;gdfM+nLOAmeb0C=(SK992_Qqm3xx;
zPBV5x8D=6bh4RHP2n|^i^-X=lgNEY-%4ojJYR0=p#ukF5Xtl9#_XbBf&4<+QmNXBc
zA|3eOP58=d9x-oiuwB@zaoJ?E8OKrL$k+oF!6&DNmBN)Qf{8dpO~T^u9-~9zS}fA#
z$bY=q+J%1<5F}#7;R!9}FVFn3+leRTIr<si|0d7%TRH@m$8u}GanbnAKD=`a?-*Zi
zhOvCjEa(NSgzsi7VgKH@TApUI?*m<!T|BfEaU|U84;KqRdA9C(&n+@9<0v1Ersz(B
z?nC77KASvT+_^2l_Egsgz~&ZumEI?0;2m1rOpP$>yrX7Wyh&{gRQJT65dkbKCA|1m
zN;T>r5G@pCf!<{c6ZipcB-CAuK>h*Zv^aFPiawx@sI17UDlo}X)<B<iNzfm0<Jt%~
zRU`rCbDNPH@d)it)Xry^R6^dA23anIXEZHTu;1uljI=tXz-_8BC<XQylNrTCSy6a7
zqUcD>Oi*w0ZGzA2fV{~BghZaSiQ_YckMf~}*;N)uJ$tu3*o4fuxQ_!Qi#mJ-BV(rS
zxbtxj&Uo`$ze;`1<aDI`BdtWI6&1RTOO9X7CR`mYoM&G9k{V!?Q3|`hy~N-#IHAGh
z02%zk2o2eOEOfw#<{V}5dW<@Hc-$4Hti+$Gh-wTt@Os;jsT`+bOwZ-Nm6?Dn_PAG)
z#j$@KJF(R)KW7dzZ1gMm;Y-zu<yG2S!dt>EwUu-9%3j<fzstxA!p5G-EH&&kjjwh+
z=;JU(>NragK2Z%zq-(ug_CowyOXRYgHthAm));(VF!cg_dGZNHzn~Q5ofD?Mw@G4r
z7_}*`U|i(`&dHwYtPQ#AVPC!kpP?%Tx$CwTl0;liAaN5IDhNs|2nT2QTFf@2D1QZ6
z8RTN)-*h?YCnH}yczgAE47#KsvCGJ>c>Ho{3>o1U?9Oh)rrBY=1Ao7>4Iq5Eh>1}*
z(}y&HM4`5%EWf$B4bz)Daw16~wrjw|mkoF8oIvJ|yzH;fJK#`~At3H=PoKcvRHEWl
zL2)(RkzLZ;Qis%EpifT=`8EwRe{&NqU6JEi_kmy%!9s}s)5n_~J^VXXdT>=8#q7;R
zV*5j(To|_DavH|lsqzYf+d?c;yhnMFo!+S~a&Da2o(FPxI2g!qdP@B~XN}F<T<e+m
zyj$6U6Gh-AK`U8jwYIbnb))aveEu!0*rBICLmBQZ%$Jo{u9JEb0;a7J;NT_!IvH7G
zm|M&JWdXVsFL1zFeCeRvDL1jlF^?bO+Es`#60vw?m@(T0Th*sq_hUSxW4D`H|Hs(C
z3Lk3k+O<o`dt#$OX}Q%mT$y>X9eE-B)qxoX6q__>c*a63cPOmR>>eot>%5-n78Mqs
zTH(k8sm92z^FEA|42o{D;Pj1~??mo}G29Xvr_=TdAc14Uw*uK5Vs_IOyR8%-j|*Uu
zuw8SFfR|!;joFn%0YA4?>wdW>Hz>oDhh#TlF->>8W=Z4ds8`cosHbUM^{=71i-Iv@
zbJYmQd~YijFj0DGSLl&-&c~VG*5`f|2E+VgrH7hfAx6ix{S|+ByIxXTM4v4K0y81w
zn(pX|Io@4GU-vx_Il3gNj@)7tjEORE?8(k*BfXi*cGczpz6_vnl?xVzwO+_(<qSPT
znOn)~CvGJ`ty^iOs_BD~rMtDlXZ7Y!vYga^<O`b5gt@)YtoK6Lx6Cb~tMtf>t7}r7
zofiw^PWKY?H=FwrjD4=%;Wm4gmomTR%S+clFyb5&tiv<n(7-xdpPM-EM(hkRbg`0O
zb53kW`LGymZ!I5Stwpp@V>~-OJyUk^Rg0(sPp_#SsW}r--ectpmFqatr~dvrS$r#P
zDhC40l`waLOA)MH>*FgJp3Ayd1p5jz(BDwhnFWWw1Jov<_NGx*r(YW!0Mr9GN+pA*
z@IB=NROm|==#W)44f6ZTK8N&tjM*YC4&GQZ(D)XlMb%L$YxV7QkxC7Mp2ifEH2sK!
zG}~uw#eS8Hkbc?y_;6N;BrU_cAK|PtJ(3%|3aT&>ixm<9LHM0G#ttB49OI5${KWUi
ziz&wvyn{0<KZR(<R!EUI(?gETfrUt>4J&OlSp521;d?({@?gm-Q@7olCclH3P=bcI
zW6DIddjT9SX@OJlO1NR4;$H+MV3$?kQ_08I_aVrnnr~9|0pDTQhnXsfDx5o|b;z0f
zpIG5=c;iG8?5%bW1%5T6fRrG17wzN1i@o8jAInz&_P1ek8YDuAX&MB=TzBYbH^X$C
zVL|E;PjfJm3I!aKdqpWjyAN1o0v8BLlC<Esen6OdV7U&goGUv`8c01bKT&?P(~w2O
z+g&ANF`%IXO}-XZVaF65VF-vMt=H}r!S(k??h67Ex8vu<1hW>$NYe+keUXD46H0W=
zE2hU5Rnoo+^ygzzMriX5K)eCW4#OOqyo7~j|JsK^QEa71hg2!Ns((zqZvVg)>nktY
z`#KXw*Yk@SR16X%DZ)W5K>8hL5~4O;G0jS++wj>+>xPp#U`;Rs+BtD~37Va-%0j;j
zR^3ZqMI!>nv=LaGTO`=drN|p<0!SuITX~fL<;v85a^CvWOB+kw6Zg~WD+RGv+_0fA
z)lna<mt;d8_-{!cYhPb+@Z;({aDZc6(Ebw!`;Bh>iG#6o0>OVHVgG^y|H8Kbpl80_
z|BhUz6F&k803vlCxrGxp2*2*Ku9Ska*kvNT5Z}9a5u1!+0gbioSDQ(smR>EqM-t6{
ztCY5{%s-Q@=XgOZ?$Ryl;&qI^b%IZ{i^J@t&>(cX#8_MY>nPY%5ie|HuW>~o1fp#6
zr3#fhEvasID1k)FQ$cfYpV2+it8RN}W{2U+nNzB_ZEMB0w#tW#*>?>|rta1-lNuN{
zmhxNol@Q4bwUc_e&k?_xFr^{CIOU@{W&;t6z$5fJ&N}(PD<3o4gzEHLNDkdlwA5MJ
zs}o&UdCzSOrL4Pmr{ae6Y#%KL1y5m3nfFsfE#q+l{k{d6BO3N%Dk+%wh0!YL-o3be
zney6VI`@~GF(gRqkd~OiqXIl|%77NkIwP@EF&#E-r~6j@Lce64aVv<CCDcqJBOle4
zB$nr8GbZ^1@-2s=jbf84)CJ6V!Yk$t=i5eNZ}H2f2Fu9m(_gdWWo`prt+yU&-C-&_
zmxyh*wH*ItSN~#0KB1E=?7tPB_ZajS^!tqB{sFr_W40nD4o3DCwvIOTe;}(*A~^%=
zCrnmMR6$5tj$X*Z@ef2()x_RF$;95mlu^;f+5qymwwQsng_RqC=5Iy7vnJrTD(xRy
zRt9Dc02c7yO@xGOTmf1DMi3_pfDz2X1^|IMIRIQ>PJqsD9Z5$6D+?n*h?$iM0Qg%X
z=wM{>gfBm-{4PFE`nmkv__sjVz*fS<!p!_%DoTzf)~W!`Kbk)mB>pNqulfnhH30*D
zBfg6NK&GGZ>StU1yNP~}pyXue_#3NMws$glHs5clkb#5AZ{+)bXRc=mTiD#d{x4(5
z82niVfw+G6_wPFb_`PR;96C|R6N%LVVg_JVvw#Rf94!74{BEaaVeDw`@MKfq|L|o2
zaWHXmfPh@j4#LjD&ID!wva)mi|9$+op8u!W{J+()0iX5RxY(IMTp(5;JAj=P$OL+F
zZx9!NjUC7YWaDIeTGv0NTu)qKE_P0iXTSf4oSl`G3CIfOdRjOOh>M96%nE+$<4Fa`
z#>B<O0b~LFQ_jW8$@Hwj$@$dB-woM;;HMFR9Gswk_6!8Da&R&M!5l2CtN=C;3)7Q-
zKTnDk2x8)7WBYCDfAzt}&dS8a`g<);m46NW`v5$hPa748#WQOe0D6wa|MXGNvmyT7
zN#*~8J^^~>Sp4BQa({Csp7|4CmOuRL|H6+}i4(L81fm5UJ%qc^svx9(E}j>?s6ddW
zeQoDK&l`xVosBDLxbtvJ^<7hH6HH=rx&*}P%*Au=j1Y90%;k!IdmjF3?A979^!+sa
z!OKwHTh%ov5FEbe@aMz{eSxMmyNu5ttf-l=Jcr#W1%<z(DqX1=iFt+Qq&SC1&1CE;
z6fwblv`N%2ND}Uf^wlFW#4rm~)UtSb;|t`&z$l^e`FgBhy68ESXv?KtiPsobV@7>#
zw1#n;aYp!qZ&?VnyPs`b)<@MI{~cDD-Tf+P0$Ybc#cDM7Y_%4<rS-F9iH?QJhpWSS
z^I>XOYttTr?2Wv-Q3>{u2%>e<u(AW}hf+0gN3*fZ^04koa~k&jE7m>8)_;k#e=%8}
zA}yGM^Y6dUzvw0Z3M1-@CZ@j)|0l16<Im&%U$)_yQ=(+yZUO-R5xdj?S|9)z0RAI1
zD%#jM0ziNBYM3P<rZxZ&`(HltH~GX!U;pYcGt4i-@1+P3WZwe>0>Q^n@DR0FT-ezB
z4@j#S&=_x*1cZ`^I;CD<><R+N5GBmt`@(sI(nbROQ{LHND9G4QLP5P4p@8f9&y_nk
Y8rVC!+CMGoDRQy0Ay86^%8McVAJsn-^Z)<=

literal 0
HcmV?d00001

diff --git a/paper/src/main.tex b/paper/src/main.tex
index 80699a2..fd9298e 100644
--- a/paper/src/main.tex
+++ b/paper/src/main.tex
@@ -1,39 +1,30 @@
 % -*- TeX-master: t -*-
-\documentclass[sigconf,nonacm,natbib=false]{acmart}
+\documentclass[12pt,letterpaper]{article}
 
-% Remove ACM copyright/conference info for thesis
-\settopmatter{printacmref=false}
-\renewcommand\footnotetextcopyrightpermission[1]{}
 \pagestyle{plain}
 
 \input{preamble}
 
 \begin{document}
 
-\title{Pricing Heuristics Against Non-human Transaction Orchestration Mechanisms}
+\title{Adversarially Distributionally Robust Optimization and Reinforcement Learning for Informed Dynamic Pricing under Strategic Demand Contamination}
 
-\author{Daniel Rösel}
-\email{daniel@alves.world}
-\affiliation{%
-  \institution{IE University}
-  \city{Madrid}
-  \country{Spain}
+\author{
+  Daniel Rösel\thanks{Primary author and student researcher. Email: daniel@alves.world} \\
+  IE University, Madrid, Spain \\[1em]
+  Alberto Martín Izquierdo\thanks{Thesis advisor. Email: amartini@faculty.ie.edu} \\
+  IE University, Madrid, Spain
 }
 
-\author{Alberto Martín Izquierdo}
-\email{amartini@faculty.ie.edu}
-\affiliation{%
-  \institution{IE University}
-  \city{Madrid}
-  \country{Spain}
-}
-
-\begin{abstract}
-The primary objective of this thesis is to develop and validate pricing heuristics that protect e-commerce platforms from systematic exploitation by Large Language Model (LLM) agents within dynamic pricing environments. As AI agents increasingly mediate consumer transactions, they enable users to circumvent the Cost of Information (the price premium accumulated through demand signal expression) by conducting reconnaissance in isolated sessions before executing purchases through clean sessions at base prices. This research will make an anticipatory contribution by adapting recommendation system methodologies to distinguish between genuine human browsing behaviour and agent-orchestrated information gathering, thereby enabling pricing systems to maintain margin integrity without degrading the user experience for legitimate customers or getting rid of leads generated by LLMs.
-\end{abstract}
+\date{\today}
 
 \maketitle
 
+\begin{abstract}
+The primary objective of this thesis is to develop and validate pricing heuristics that protect e-commerce platforms from systematic exploitation by Large Language Model (LLM) agents within dynamic pricing environments. As AI agents increasingly mediate consumer transactions, they enable users to circumvent the Cost of Information (the price premium accumulated through demand signal expression) by conducting reconnaissance in isolated sessions before executing purchases through clean sessions at base prices. This research will make an anticipatory contribution by adapting recommendation system methodologies to distinguish between genuine human browsing behavior and agent-orchestrated information gathering, thereby enabling pricing systems to maintain margin integrity without degrading the user experience for legitimate customers or getting rid of leads generated by LLMs.
+\end{abstract}
+
+
 \input{chapters/01-intro}
 \input{chapters/02-literature-review}
 \input{chapters/03-methodology}
@@ -42,11 +33,19 @@ The primary objective of this thesis is to develop and validate pricing heuristi
 \input{chapters/06-conclusion}
 
 
+\section*{Acknowledgments}
+Eugene Bykovets, PhD - ETH for helping with problem formulation.
+Research supported with Cloud TPUs from Google's TPU Research Cloud (TRC).
+
 \printbibliography
 
 \clearpage
-\onecolumn
 \appendix
+\section{Terminology}
+\begin{description}
+\item[Agent $A$] An actor of non-human nature, powered by an LLM.
+\item[Human $H$] An individual human with some job to be done.
+\end{description}
 \input{../build/concatenated_code}
 
 \end{document}
diff --git a/paper/src/preamble.tex b/paper/src/preamble.tex
index 79b2857..c24e7cf 100644
--- a/paper/src/preamble.tex
+++ b/paper/src/preamble.tex
@@ -1,6 +1,25 @@
-% acmart already includes: graphicx, hyperref, booktabs, amsmath, natbib
-% Only load packages not included in acmart
+% Math packages (load before fonts to avoid conflicts)
+\usepackage{amsmath}
+\usepackage{amsthm}
 
+% Define theorem environments
+\newtheorem{theorem}{Theorem}
+\newtheorem{definition}{Definition}
+\newtheorem{lemma}{Lemma}
+\newtheorem{corollary}{Corollary}
+
+% Font and spacing
+\usepackage{newtxtext,newtxmath}
+\usepackage{setspace}
+\doublespacing
+
+% Page geometry
+\usepackage[margin=1in]{geometry}
+
+% Essential packages
+\usepackage{graphicx}
+\usepackage{hyperref}
+\usepackage{booktabs}
 \usepackage{csquotes}
 \usepackage{subcaption}
 \usepackage{siunitx}
@@ -8,6 +27,10 @@
 \usepackage{listings}
 \usepackage{xcolor}
 \usepackage[ruled,vlined]{algorithm2e}
+\usepackage{cleveref}
+
+% Configure cleveref for algorithm2e
+\crefname{algocf}{Algorithm}{Algorithms}
 
 \usetikzlibrary{positioning, shapes, arrows.meta, fit, backgrounds}
 \lstset{
diff --git a/sim/rl/environment.py b/sim/rl/environment.py
new file mode 100644
index 0000000..19f9ad4
--- /dev/null
+++ b/sim/rl/environment.py
@@ -0,0 +1,451 @@
+import gymnasium as gym
+from gymnasium import spaces
+import numpy as np
+from dataclasses import dataclass
+import pandas as pd
+from typing import Callable, Optional, Dict, Any, List
+
+# "learner"  agent learning to optimize pricing
+# "agent"  part of environment creating demand signals that learner processes
+
+@dataclass
+class BusinessLogicConstraints():
+    max_price_adjustment: float = 0.30
+    system_max_price: float = 500.0
+    system_min_price: float = 1.0
+    product_catelogue_size: int = 100
+    episode_length: int = 200
+    sessions_per_step: int = 250
+    agent_share: float = 0.25
+    agent_recon_multiplier: float = 6.0
+    agent_purchase_probability: float = 0.20
+    coi_strength: float = 0.25
+    coi_threshold: float = 4.0
+    coi_sigmoid_temp: float = 1.25
+    base_human_demand: float = 0.08
+    base_agent_demand: float = 0.05
+    human_price_elasticity: float = -1.2
+    agent_price_elasticity: float = -0.6
+    w_agent_loss: float = 1.0
+    w_volatility: float = 5.0
+    w_estimation_error: float = 0.25
+    seed: int = 7
+
+
+def _sigmoid(x: np.ndarray) -> np.ndarray:
+    return 1.0 / (1.0 + np.exp(-x))
+
+
+def simple_agent_detector(session_df: pd.DataFrame) -> pd.Series:
+    # baseline heuristic: high velocity + low conversion
+    v = session_df.get("interaction_velocity", pd.Series(0.0, index=session_df.index))
+    cr = session_df.get("conversion_rate", pd.Series(0.0, index=session_df.index))
+    total = session_df.get("total_interactions", pd.Series(0, index=session_df.index))
+    return (total >= 12) & (v >= 0.20) & (cr <= 0.01)
+
+
+class CommercePlatform:
+    def __init__(self, product_catelogue_size: int, max_price: float, min_price: float,
+                 constraints: BusinessLogicConstraints, agent_detector: Optional[Callable[[pd.DataFrame], pd.Series]] = None,
+                 use_defense: bool = False):
+        self.product_catelogue_size = product_catelogue_size
+        self.max_price = max_price
+        self.min_price = min_price
+        self.constraints = constraints
+        self.use_defense = use_defense
+        self.agent_detector = agent_detector
+        self.simulation_history: List[Dict[str, Any]] = []
+        self._rng = np.random.default_rng(constraints.seed)
+        self._popularity = self._rng.lognormal(mean=0.0, sigma=0.6, size=self.product_catelogue_size)
+        self._popularity = self._popularity / (self._popularity.mean() + 1e-12)
+        self._last_interaction_df: pd.DataFrame = pd.DataFrame()
+
+    def setup_true_demand(self, prices: np.ndarray) -> Dict[str, np.ndarray]:
+        # ground truth purchase propensities
+        p = np.clip(prices, self.min_price, self.max_price)
+        pn = p / self.max_price
+        human_prob = self.constraints.base_human_demand * (pn ** self.constraints.human_price_elasticity)
+        agent_prob = self.constraints.base_agent_demand * (pn ** self.constraints.agent_price_elasticity)
+        return {
+            "human_purchase_prob": np.clip(human_prob * self._popularity, 0.0, 0.95),
+            "agent_purchase_prob": np.clip(agent_prob * self._popularity, 0.0, 0.95)
+        }
+
+    def _session_markup_multiplier(self, signal_score: float) -> float:
+        # session-based COI markup based on demand signal expression
+        x = (signal_score - self.constraints.coi_threshold) / max(self.constraints.coi_sigmoid_temp, 1e-6)
+        return 1.0 + self.constraints.coi_strength * float(_sigmoid(np.array([x]))[0])
+
+    def _simulate_sessions(self, base_prices: np.ndarray) -> pd.DataFrame:
+        demand = self.setup_true_demand(base_prices)
+        human_pprob = demand["human_purchase_prob"]
+        agent_pprob = demand["agent_purchase_prob"]
+        events: List[Dict[str, Any]] = []
+        T = self.constraints.sessions_per_step
+        n_agent_sessions = int(round(T * self.constraints.agent_share))
+        n_human_sessions = T - n_agent_sessions
+
+        # human sessions: normal browse with possible purchase
+        for s in range(n_human_sessions):
+            session_id = f"h_{len(events)}_{s}"
+            k = int(self._rng.integers(1, 4))
+            prod_ids = self._rng.choice(self.product_catelogue_size, size=k, replace=False)
+            t = 0.0
+            inter_times = self._rng.gamma(shape=2.0, scale=3.0, size=3 * k)
+            signal_score = 0.0
+            purchased_any = False
+
+            for i, pid in enumerate(prod_ids):
+                t += float(inter_times[i])
+                price_shown = float(base_prices[pid])
+                events.append({
+                    "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
+                    "action": "view", "t": t, "price_shown": price_shown, "is_purchase": 0,
+                    "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
+                })
+                signal_score += 1.0
+
+                if self._rng.random() < 0.35:
+                    t += float(inter_times[i + k])
+                    events.append({
+                        "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
+                        "action": "cart", "t": t, "price_shown": price_shown, "is_purchase": 0,
+                        "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
+                    })
+                    signal_score += 2.0
+
+                if (not purchased_any) and (self._rng.random() < float(human_pprob[pid])):
+                    t += float(inter_times[i + 2 * k])
+                    mult = self._session_markup_multiplier(signal_score)
+                    price_paid = float(np.clip(base_prices[pid] * mult, self.min_price, self.max_price))
+                    events.append({
+                        "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
+                        "action": "purchase", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 1,
+                        "price_paid": price_paid, "oracle_price_paid": price_paid, "signal_score": signal_score,
+                    })
+                    purchased_any = True
+
+        # agent sessions: split recon/purchase to circumvent COI
+        n_agent_ids = max(1, n_agent_sessions // 2)
+        for a in range(n_agent_ids):
+            agent_id = f"a_{a}"
+            recon_session_id = f"{agent_id}_recon"
+            t = 0.0
+            n_views = int(self._rng.poisson(lam=8) * self.constraints.agent_recon_multiplier) + 5
+            inter_times = self._rng.gamma(shape=2.0, scale=0.6, size=max(n_views, 1))
+            prod_ids = self._rng.integers(0, self.product_catelogue_size, size=n_views)
+            recon_signal = 0.0
+
+            for i, pid in enumerate(prod_ids):
+                t += float(inter_times[i])
+                events.append({
+                    "session_id": recon_session_id, "actor": "agent", "agent_id": agent_id, "product_id": int(pid),
+                    "action": "view", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 0,
+                    "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
+                })
+                recon_signal += 1.0
+
+            # clean purchase session with minimal interactions
+            if self._rng.random() < self.constraints.agent_purchase_probability:
+                purchase_session_id = f"{agent_id}_clean"
+                pid = int(self._rng.integers(0, self.product_catelogue_size))
+                t2 = 0.0
+                clean_signal = 0.0
+                t2 += float(self._rng.gamma(shape=2.0, scale=0.7))
+                events.append({
+                    "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid,
+                    "action": "view", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 0,
+                    "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
+                })
+                clean_signal += 1.0
+
+                if self._rng.random() < float(agent_pprob[pid]):
+                    t2 += float(self._rng.gamma(shape=2.0, scale=0.7))
+                    obs_mult = self._session_markup_multiplier(clean_signal)
+                    obs_paid = float(np.clip(base_prices[pid] * obs_mult, self.min_price, self.max_price))
+                    oracle_mult = self._session_markup_multiplier(recon_signal)  # oracle links recon->purchase
+                    oracle_paid = float(np.clip(base_prices[pid] * oracle_mult, self.min_price, self.max_price))
+                    events.append({
+                        "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid,
+                        "action": "purchase", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 1,
+                        "price_paid": obs_paid, "oracle_price_paid": oracle_paid, "signal_score": clean_signal,
+                    })
+
+        return pd.DataFrame(events)
+
+    def compute_interaction_features(self, interaction_df: pd.DataFrame) -> Dict[str, float]:
+        if interaction_df.empty:
+            return {"mean_sale_price": 0.0, "look_to_book": 0.0}
+        purchases = interaction_df[interaction_df["action"] == "purchase"]
+        mean_sale_price = float(purchases["price_paid"].mean()) if not purchases.empty else 0.0
+        views = float((interaction_df["action"] == "view").sum())
+        buys = float((interaction_df["action"] == "purchase").sum())
+        return {"mean_sale_price": mean_sale_price, "look_to_book": float(views / (buys + 1e-6))}
+
+    def _session_feature_table(self, df: pd.DataFrame) -> pd.DataFrame:
+        if df.empty:
+            return pd.DataFrame()
+        g = df.groupby("session_id", sort=False)
+        session_duration = g["t"].max() - g["t"].min()
+        total_interactions = g.size()
+        avg_time_between = g["t"].apply(lambda x: float(np.diff(np.sort(x.to_numpy())).mean()) if len(x) > 1 else 0.0)
+        interaction_velocity = total_interactions / (session_duration + 1e-6)
+        views = g.apply(lambda x: int((x["action"] == "view").sum()), include_groups=False)
+        cart_adds = g.apply(lambda x: int((x["action"] == "cart").sum()), include_groups=False)
+        purchases = g.apply(lambda x: int((x["action"] == "purchase").sum()), include_groups=False)
+        conversion_rate = purchases / (views + 1e-6)
+        is_agent = g["actor"].apply(lambda s: bool((s == "agent").any()), include_groups=False)
+
+        return pd.DataFrame({
+            "session_duration_sec": session_duration.astype(float),
+            "avg_time_between_events": avg_time_between.astype(float),
+            "total_interactions": total_interactions.astype(int),
+            "interaction_velocity": interaction_velocity.astype(float),
+            "item_views": views.astype(int),
+            "cart_adds": cart_adds.astype(int),
+            "purchases": purchases.astype(int),
+            "conversion_rate": conversion_rate.astype(float),
+            "is_agent": is_agent.astype(bool),
+        }).reset_index()
+
+    def demand_estimate(self, interaction_df: pd.DataFrame, exclude_sessions: Optional[pd.Series] = None) -> np.ndarray:
+        # proxy demand from weighted interaction events
+        if interaction_df.empty:
+            return np.zeros(self.product_catelogue_size, dtype=np.float32)
+        df = interaction_df
+        if exclude_sessions is not None:
+            bad_sessions = set(exclude_sessions.loc[exclude_sessions].index)
+            df = df[~df["session_id"].isin(bad_sessions)]
+        weights = {"view": 0.15, "cart": 0.75, "purchase": 2.5}
+        w = df["action"].map(weights).fillna(0.0).to_numpy(dtype=float)
+        prod = df["product_id"].to_numpy(dtype=int)
+        q_hat = np.zeros(self.product_catelogue_size, dtype=float)
+        np.add.at(q_hat, prod, w)
+        return q_hat.astype(np.float32)
+
+    def run_pricing_simulation(self, prices: np.ndarray) -> Dict[str, Any]:
+        interaction_df = self._simulate_sessions(prices)
+        self._last_interaction_df = interaction_df
+        session_df = self._session_feature_table(interaction_df)
+
+        predicted_agent_sessions = None
+        if (self.use_defense and self.agent_detector is not None and not session_df.empty):
+            predicted_agent_sessions = self.agent_detector(session_df.set_index("session_id"))
+
+        q_hat_naive = self.demand_estimate(interaction_df, exclude_sessions=None)
+        q_hat_defended = self.demand_estimate(interaction_df, exclude_sessions=predicted_agent_sessions) \
+            if predicted_agent_sessions is not None else q_hat_naive.copy()
+
+        true_human = np.zeros(self.product_catelogue_size, dtype=float)
+        true_agent = np.zeros(self.product_catelogue_size, dtype=float)
+        if not interaction_df.empty:
+            purchases = interaction_df[interaction_df["action"] == "purchase"]
+            if not purchases.empty:
+                for _, r in purchases.iterrows():
+                    if r["actor"] == "human":
+                        true_human[int(r["product_id"])] += 1.0
+                    else:
+                        true_agent[int(r["product_id"])] += 1.0
+
+        revenue_observed = float(interaction_df["price_paid"].sum()) if not interaction_df.empty else 0.0
+        revenue_oracle = float(interaction_df["oracle_price_paid"].sum()) if not interaction_df.empty else 0.0
+        agent_loss = max(0.0, revenue_oracle - revenue_observed)
+
+        eps = 1e-6
+        internal_error_naive = np.abs(true_human - q_hat_naive) / (true_human + eps)
+        internal_error_def = np.abs(true_human - q_hat_defended) / (true_human + eps)
+        interaction_features = self.compute_interaction_features(interaction_df)
+
+        summary = {
+            "prices": prices.copy(),
+            "interaction_df": interaction_df,
+            "session_df": session_df,
+            "q_hat_naive": q_hat_naive,
+            "q_hat_defended": q_hat_defended,
+            "true_human_demand": true_human.astype(np.float32),
+            "true_agent_purchases": true_agent.astype(np.float32),
+            "internal_error_naive": internal_error_naive.astype(np.float32),
+            "internal_error_defended": internal_error_def.astype(np.float32),
+            "interaction_features": interaction_features,
+            "revenue_observed": revenue_observed,
+            "revenue_oracle": revenue_oracle,
+            "agent_loss": agent_loss,
+            "predicted_agent_sessions": predicted_agent_sessions,
+        }
+        self.simulation_history.append(summary)
+        return summary
+
+    def get_interaction_data(self) -> np.ndarray:
+        if self._last_interaction_df.empty:
+            return np.array([], dtype=object)
+        return self._last_interaction_df.to_dict(orient="records")
+
+
+class PHANTOMEnv(gym.Env):
+    metadata = {"render_modes": []}
+
+    def __init__(self, use_defense: bool = False):
+        super().__init__()
+        self.constraints = BusinessLogicConstraints()
+        self.action_space = spaces.Box(low=-self.constraints.max_price_adjustment,
+                                       high=self.constraints.max_price_adjustment,
+                                       shape=(self.constraints.product_catelogue_size,), dtype=np.float32)
+        self.observation_space = spaces.Dict({
+            "elasticity": spaces.Dict({
+                "price": spaces.Box(
+                    low=np.full((self.constraints.product_catelogue_size,), self.constraints.system_min_price, dtype=np.float32),
+                    high=np.full((self.constraints.product_catelogue_size,), self.constraints.system_max_price, dtype=np.float32),
+                    dtype=np.float32),
+                "demand": spaces.Box(
+                    low=np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32),
+                    high=np.full((self.constraints.product_catelogue_size,), 1e6, dtype=np.float32),
+                    dtype=np.float32),
+            })
+        })
+        self.commerce_platform = CommercePlatform(
+            product_catelogue_size=self.constraints.product_catelogue_size,
+            max_price=self.constraints.system_max_price,
+            min_price=self.constraints.system_min_price,
+            constraints=self.constraints,
+            agent_detector=simple_agent_detector,
+            use_defense=use_defense)
+        self._rng = np.random.default_rng(self.constraints.seed)
+        self.t = 0
+        self._prev_prices: Optional[np.ndarray] = None
+        self.state: Dict[str, Any] = {}
+
+    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):
+        super().reset(seed=seed)
+        if seed is not None:
+            self._rng = np.random.default_rng(seed)
+            self.commerce_platform._rng = np.random.default_rng(seed)
+        self.t = 0
+        init_prices = self._rng.uniform(low=60.0, high=140.0, size=(self.constraints.product_catelogue_size,)).astype(np.float32)
+        self._prev_prices = init_prices.copy()
+        self.state = {
+            "elasticity": {
+                "price": init_prices,
+                "demand": np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32),
+            }
+        }
+        return self.state, {}
+
+    def step(self, action: np.ndarray):
+        self.t += 1
+        base_prices = self.state["elasticity"]["price"].astype(np.float32)
+        new_prices = np.clip(base_prices * (1.0 + action.astype(np.float32)),
+                           self.constraints.system_min_price,
+                           self.constraints.system_max_price).astype(np.float32)
+        result = self.commerce_platform.run_pricing_simulation(new_prices)
+
+        if self.commerce_platform.use_defense:
+            demand_est = result["q_hat_defended"]
+            internal_err = result["internal_error_defended"]
+        else:
+            demand_est = result["q_hat_naive"]
+            internal_err = result["internal_error_naive"]
+
+        self.state["elasticity"]["price"] = new_prices
+        self.state["elasticity"]["demand"] = demand_est
+
+        volatility = 0.0 if self._prev_prices is None else \
+            float(np.mean(np.abs((new_prices - self._prev_prices) / (self._prev_prices + 1e-6))))
+        self._prev_prices = new_prices.copy()
+
+        revenue_observed = float(result["revenue_observed"])
+        agent_loss = float(result["agent_loss"])
+        err_mean = float(np.mean(internal_err))
+
+        reward = (revenue_observed
+                 - self.constraints.w_agent_loss * agent_loss
+                 - self.constraints.w_volatility * volatility
+                 - self.constraints.w_estimation_error * err_mean)
+
+        terminated = self.t >= self.constraints.episode_length
+        info = {
+            "t": self.t,
+            "revenue_observed": revenue_observed,
+            "revenue_oracle": float(result["revenue_oracle"]),
+            "agent_loss": agent_loss,
+            "ux_volatility": volatility,
+            "mean_internal_error": err_mean,
+            "look_to_book": float(result["interaction_features"].get("look_to_book", 0.0)),
+            "mean_sale_price": float(result["interaction_features"].get("mean_sale_price", 0.0)),
+            "true_human_purchases_total": float(np.sum(result["true_human_demand"])),
+            "true_agent_purchases_total": float(np.sum(result["true_agent_purchases"])),
+        }
+        return self.state, float(reward), terminated, False, info
+
+
+if __name__ == "__main__":
+    import matplotlib.pyplot as plt
+    from collections import defaultdict
+
+    runs = {}
+    for use_defense in (False, True):
+        env = PHANTOMEnv(use_defense=use_defense)
+        obs, _ = env.reset(seed=42)
+        metrics = defaultdict(list)
+        total_reward = 0.0
+        done = False
+
+        while not done:
+            action = env.action_space.sample()
+            obs, reward, done, _, info = env.step(action)
+            total_reward += reward
+            p_mean = float(np.mean(obs["elasticity"]["price"]))
+            q_mean = float(np.mean(obs["elasticity"]["demand"]))
+            p_std = float(np.std(obs["elasticity"]["price"]))
+
+            metrics['t'].append(info['t'])
+            metrics['price_mean'].append(p_mean)
+            metrics['price_std'].append(p_std)
+            metrics['demand_mean'].append(q_mean)
+            metrics['revenue_observed'].append(info['revenue_observed'])
+            metrics['revenue_oracle'].append(info['revenue_oracle'])
+            metrics['agent_loss'].append(info['agent_loss'])
+            metrics['ux_volatility'].append(info['ux_volatility'])
+            metrics['look_to_book'].append(info['look_to_book'])
+            metrics['reward'].append(reward)
+            metrics['human_purchases'].append(info['true_human_purchases_total'])
+            metrics['agent_purchases'].append(info['true_agent_purchases_total'])
+
+            if info['t'] % 20 == 0 or done:
+                print(f"defense={'ON ' if use_defense else 'OFF'} t={info['t']:03d} p={p_mean:6.2f}±{p_std:4.2f} "
+                      f"q={q_mean:6.2f} rev={info['revenue_observed']:7.2f} oracle={info['revenue_oracle']:7.2f} "
+                      f"loss={info['agent_loss']:6.2f} ux={info['ux_volatility']:.3f} "
+                      f"ltb={info['look_to_book']:5.2f} r={reward:7.2f}")
+
+        runs[use_defense] = metrics
+        print(f"defense={'ON ' if use_defense else 'OFF'} total_reward={total_reward:.2f}\n")
+
+    fig, axes = plt.subplots(3, 3, figsize=(15, 12))
+    fig.suptitle('PHANTOM Environment: Defense OFF vs ON', fontsize=14, fontweight='bold')
+
+    plot_configs = [
+        ('price_mean', 'Mean Price', 'Price'),
+        ('demand_mean', 'Mean Demand Estimate', 'Demand'),
+        ('revenue_observed', 'Revenue (Observed)', 'Revenue'),
+        ('agent_loss', 'Agent Loss (Oracle - Observed)', 'Loss'),
+        ('ux_volatility', 'UX Volatility (Price Change)', 'Volatility'),
+        ('look_to_book', 'Look-to-Book Ratio', 'Ratio'),
+        ('reward', 'Step Reward', 'Reward'),
+        ('human_purchases', 'Human Purchases', 'Count'),
+        ('agent_purchases', 'Agent Purchases', 'Count'),
+    ]
+
+    for idx, (key, title, ylabel) in enumerate(plot_configs):
+        ax = axes[idx // 3, idx % 3]
+        for use_defense, label, color in [(False, 'No Defense', 'red'), (True, 'With Defense', 'blue')]:
+            m = runs[use_defense]
+            ax.plot(m['t'], m[key], label=label, color=color, alpha=0.7, linewidth=1.5)
+        ax.set_xlabel('Step')
+        ax.set_ylabel(ylabel)
+        ax.set_title(title, fontsize=10, fontweight='bold')
+        ax.legend(loc='best', fontsize=8)
+        ax.grid(True, alpha=0.3)
+
+    plt.tight_layout()
+    plt.savefig('phantom_env_comparison.png', dpi=150, bbox_inches='tight')
+    print("Plot saved to phantom_env_comparison.png")
+    plt.show()

From 20132c084c0056de8a1d5f4019138cc2eba1829c Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Sun, 14 Dec 2025 17:30:01 +0100
Subject: [PATCH 02/35] initial environemnt definitions

---
 sim/rl/environment.py | 471 +++++-------------------------------------
 1 file changed, 50 insertions(+), 421 deletions(-)

diff --git a/sim/rl/environment.py b/sim/rl/environment.py
index 19f9ad4..803a4fd 100644
--- a/sim/rl/environment.py
+++ b/sim/rl/environment.py
@@ -2,450 +2,79 @@ import gymnasium as gym
 from gymnasium import spaces
 import numpy as np
 from dataclasses import dataclass
-import pandas as pd
-from typing import Callable, Optional, Dict, Any, List
 
-# "learner"  agent learning to optimize pricing
-# "agent"  part of environment creating demand signals that learner processes
+# here when we say "learner" we mean the agent that is learning to optimize the pricing and "agent" is part of the envrionment where the agent is creating demand that that "learner" is processing"
 
 @dataclass
 class BusinessLogicConstraints():
-    max_price_adjustment: float = 0.30
-    system_max_price: float = 500.0
-    system_min_price: float = 1.0
-    product_catelogue_size: int = 100
-    episode_length: int = 200
-    sessions_per_step: int = 250
-    agent_share: float = 0.25
-    agent_recon_multiplier: float = 6.0
-    agent_purchase_probability: float = 0.20
-    coi_strength: float = 0.25
-    coi_threshold: float = 4.0
-    coi_sigmoid_temp: float = 1.25
-    base_human_demand: float = 0.08
-    base_agent_demand: float = 0.05
-    human_price_elasticity: float = -1.2
-    agent_price_elasticity: float = -0.6
-    w_agent_loss: float = 1.0
-    w_volatility: float = 5.0
-    w_estimation_error: float = 0.25
-    seed: int = 7
-
-
-def _sigmoid(x: np.ndarray) -> np.ndarray:
-    return 1.0 / (1.0 + np.exp(-x))
-
-
-def simple_agent_detector(session_df: pd.DataFrame) -> pd.Series:
-    # baseline heuristic: high velocity + low conversion
-    v = session_df.get("interaction_velocity", pd.Series(0.0, index=session_df.index))
-    cr = session_df.get("conversion_rate", pd.Series(0.0, index=session_df.index))
-    total = session_df.get("total_interactions", pd.Series(0, index=session_df.index))
-    return (total >= 12) & (v >= 0.20) & (cr <= 0.01)
-
-
-class CommercePlatform:
-    def __init__(self, product_catelogue_size: int, max_price: float, min_price: float,
-                 constraints: BusinessLogicConstraints, agent_detector: Optional[Callable[[pd.DataFrame], pd.Series]] = None,
-                 use_defense: bool = False):
-        self.product_catelogue_size = product_catelogue_size
-        self.max_price = max_price
-        self.min_price = min_price
-        self.constraints = constraints
-        self.use_defense = use_defense
-        self.agent_detector = agent_detector
-        self.simulation_history: List[Dict[str, Any]] = []
-        self._rng = np.random.default_rng(constraints.seed)
-        self._popularity = self._rng.lognormal(mean=0.0, sigma=0.6, size=self.product_catelogue_size)
-        self._popularity = self._popularity / (self._popularity.mean() + 1e-12)
-        self._last_interaction_df: pd.DataFrame = pd.DataFrame()
-
-    def setup_true_demand(self, prices: np.ndarray) -> Dict[str, np.ndarray]:
-        # ground truth purchase propensities
-        p = np.clip(prices, self.min_price, self.max_price)
-        pn = p / self.max_price
-        human_prob = self.constraints.base_human_demand * (pn ** self.constraints.human_price_elasticity)
-        agent_prob = self.constraints.base_agent_demand * (pn ** self.constraints.agent_price_elasticity)
-        return {
-            "human_purchase_prob": np.clip(human_prob * self._popularity, 0.0, 0.95),
-            "agent_purchase_prob": np.clip(agent_prob * self._popularity, 0.0, 0.95)
-        }
-
-    def _session_markup_multiplier(self, signal_score: float) -> float:
-        # session-based COI markup based on demand signal expression
-        x = (signal_score - self.constraints.coi_threshold) / max(self.constraints.coi_sigmoid_temp, 1e-6)
-        return 1.0 + self.constraints.coi_strength * float(_sigmoid(np.array([x]))[0])
-
-    def _simulate_sessions(self, base_prices: np.ndarray) -> pd.DataFrame:
-        demand = self.setup_true_demand(base_prices)
-        human_pprob = demand["human_purchase_prob"]
-        agent_pprob = demand["agent_purchase_prob"]
-        events: List[Dict[str, Any]] = []
-        T = self.constraints.sessions_per_step
-        n_agent_sessions = int(round(T * self.constraints.agent_share))
-        n_human_sessions = T - n_agent_sessions
-
-        # human sessions: normal browse with possible purchase
-        for s in range(n_human_sessions):
-            session_id = f"h_{len(events)}_{s}"
-            k = int(self._rng.integers(1, 4))
-            prod_ids = self._rng.choice(self.product_catelogue_size, size=k, replace=False)
-            t = 0.0
-            inter_times = self._rng.gamma(shape=2.0, scale=3.0, size=3 * k)
-            signal_score = 0.0
-            purchased_any = False
-
-            for i, pid in enumerate(prod_ids):
-                t += float(inter_times[i])
-                price_shown = float(base_prices[pid])
-                events.append({
-                    "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
-                    "action": "view", "t": t, "price_shown": price_shown, "is_purchase": 0,
-                    "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
-                })
-                signal_score += 1.0
-
-                if self._rng.random() < 0.35:
-                    t += float(inter_times[i + k])
-                    events.append({
-                        "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
-                        "action": "cart", "t": t, "price_shown": price_shown, "is_purchase": 0,
-                        "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
-                    })
-                    signal_score += 2.0
-
-                if (not purchased_any) and (self._rng.random() < float(human_pprob[pid])):
-                    t += float(inter_times[i + 2 * k])
-                    mult = self._session_markup_multiplier(signal_score)
-                    price_paid = float(np.clip(base_prices[pid] * mult, self.min_price, self.max_price))
-                    events.append({
-                        "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
-                        "action": "purchase", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 1,
-                        "price_paid": price_paid, "oracle_price_paid": price_paid, "signal_score": signal_score,
-                    })
-                    purchased_any = True
-
-        # agent sessions: split recon/purchase to circumvent COI
-        n_agent_ids = max(1, n_agent_sessions // 2)
-        for a in range(n_agent_ids):
-            agent_id = f"a_{a}"
-            recon_session_id = f"{agent_id}_recon"
-            t = 0.0
-            n_views = int(self._rng.poisson(lam=8) * self.constraints.agent_recon_multiplier) + 5
-            inter_times = self._rng.gamma(shape=2.0, scale=0.6, size=max(n_views, 1))
-            prod_ids = self._rng.integers(0, self.product_catelogue_size, size=n_views)
-            recon_signal = 0.0
-
-            for i, pid in enumerate(prod_ids):
-                t += float(inter_times[i])
-                events.append({
-                    "session_id": recon_session_id, "actor": "agent", "agent_id": agent_id, "product_id": int(pid),
-                    "action": "view", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 0,
-                    "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
-                })
-                recon_signal += 1.0
-
-            # clean purchase session with minimal interactions
-            if self._rng.random() < self.constraints.agent_purchase_probability:
-                purchase_session_id = f"{agent_id}_clean"
-                pid = int(self._rng.integers(0, self.product_catelogue_size))
-                t2 = 0.0
-                clean_signal = 0.0
-                t2 += float(self._rng.gamma(shape=2.0, scale=0.7))
-                events.append({
-                    "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid,
-                    "action": "view", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 0,
-                    "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
-                })
-                clean_signal += 1.0
-
-                if self._rng.random() < float(agent_pprob[pid]):
-                    t2 += float(self._rng.gamma(shape=2.0, scale=0.7))
-                    obs_mult = self._session_markup_multiplier(clean_signal)
-                    obs_paid = float(np.clip(base_prices[pid] * obs_mult, self.min_price, self.max_price))
-                    oracle_mult = self._session_markup_multiplier(recon_signal)  # oracle links recon->purchase
-                    oracle_paid = float(np.clip(base_prices[pid] * oracle_mult, self.min_price, self.max_price))
-                    events.append({
-                        "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid,
-                        "action": "purchase", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 1,
-                        "price_paid": obs_paid, "oracle_price_paid": oracle_paid, "signal_score": clean_signal,
-                    })
-
-        return pd.DataFrame(events)
-
-    def compute_interaction_features(self, interaction_df: pd.DataFrame) -> Dict[str, float]:
-        if interaction_df.empty:
-            return {"mean_sale_price": 0.0, "look_to_book": 0.0}
-        purchases = interaction_df[interaction_df["action"] == "purchase"]
-        mean_sale_price = float(purchases["price_paid"].mean()) if not purchases.empty else 0.0
-        views = float((interaction_df["action"] == "view").sum())
-        buys = float((interaction_df["action"] == "purchase").sum())
-        return {"mean_sale_price": mean_sale_price, "look_to_book": float(views / (buys + 1e-6))}
-
-    def _session_feature_table(self, df: pd.DataFrame) -> pd.DataFrame:
-        if df.empty:
-            return pd.DataFrame()
-        g = df.groupby("session_id", sort=False)
-        session_duration = g["t"].max() - g["t"].min()
-        total_interactions = g.size()
-        avg_time_between = g["t"].apply(lambda x: float(np.diff(np.sort(x.to_numpy())).mean()) if len(x) > 1 else 0.0)
-        interaction_velocity = total_interactions / (session_duration + 1e-6)
-        views = g.apply(lambda x: int((x["action"] == "view").sum()), include_groups=False)
-        cart_adds = g.apply(lambda x: int((x["action"] == "cart").sum()), include_groups=False)
-        purchases = g.apply(lambda x: int((x["action"] == "purchase").sum()), include_groups=False)
-        conversion_rate = purchases / (views + 1e-6)
-        is_agent = g["actor"].apply(lambda s: bool((s == "agent").any()), include_groups=False)
-
-        return pd.DataFrame({
-            "session_duration_sec": session_duration.astype(float),
-            "avg_time_between_events": avg_time_between.astype(float),
-            "total_interactions": total_interactions.astype(int),
-            "interaction_velocity": interaction_velocity.astype(float),
-            "item_views": views.astype(int),
-            "cart_adds": cart_adds.astype(int),
-            "purchases": purchases.astype(int),
-            "conversion_rate": conversion_rate.astype(float),
-            "is_agent": is_agent.astype(bool),
-        }).reset_index()
-
-    def demand_estimate(self, interaction_df: pd.DataFrame, exclude_sessions: Optional[pd.Series] = None) -> np.ndarray:
-        # proxy demand from weighted interaction events
-        if interaction_df.empty:
-            return np.zeros(self.product_catelogue_size, dtype=np.float32)
-        df = interaction_df
-        if exclude_sessions is not None:
-            bad_sessions = set(exclude_sessions.loc[exclude_sessions].index)
-            df = df[~df["session_id"].isin(bad_sessions)]
-        weights = {"view": 0.15, "cart": 0.75, "purchase": 2.5}
-        w = df["action"].map(weights).fillna(0.0).to_numpy(dtype=float)
-        prod = df["product_id"].to_numpy(dtype=int)
-        q_hat = np.zeros(self.product_catelogue_size, dtype=float)
-        np.add.at(q_hat, prod, w)
-        return q_hat.astype(np.float32)
-
-    def run_pricing_simulation(self, prices: np.ndarray) -> Dict[str, Any]:
-        interaction_df = self._simulate_sessions(prices)
-        self._last_interaction_df = interaction_df
-        session_df = self._session_feature_table(interaction_df)
-
-        predicted_agent_sessions = None
-        if (self.use_defense and self.agent_detector is not None and not session_df.empty):
-            predicted_agent_sessions = self.agent_detector(session_df.set_index("session_id"))
-
-        q_hat_naive = self.demand_estimate(interaction_df, exclude_sessions=None)
-        q_hat_defended = self.demand_estimate(interaction_df, exclude_sessions=predicted_agent_sessions) \
-            if predicted_agent_sessions is not None else q_hat_naive.copy()
-
-        true_human = np.zeros(self.product_catelogue_size, dtype=float)
-        true_agent = np.zeros(self.product_catelogue_size, dtype=float)
-        if not interaction_df.empty:
-            purchases = interaction_df[interaction_df["action"] == "purchase"]
-            if not purchases.empty:
-                for _, r in purchases.iterrows():
-                    if r["actor"] == "human":
-                        true_human[int(r["product_id"])] += 1.0
-                    else:
-                        true_agent[int(r["product_id"])] += 1.0
-
-        revenue_observed = float(interaction_df["price_paid"].sum()) if not interaction_df.empty else 0.0
-        revenue_oracle = float(interaction_df["oracle_price_paid"].sum()) if not interaction_df.empty else 0.0
-        agent_loss = max(0.0, revenue_oracle - revenue_observed)
-
-        eps = 1e-6
-        internal_error_naive = np.abs(true_human - q_hat_naive) / (true_human + eps)
-        internal_error_def = np.abs(true_human - q_hat_defended) / (true_human + eps)
-        interaction_features = self.compute_interaction_features(interaction_df)
-
-        summary = {
-            "prices": prices.copy(),
-            "interaction_df": interaction_df,
-            "session_df": session_df,
-            "q_hat_naive": q_hat_naive,
-            "q_hat_defended": q_hat_defended,
-            "true_human_demand": true_human.astype(np.float32),
-            "true_agent_purchases": true_agent.astype(np.float32),
-            "internal_error_naive": internal_error_naive.astype(np.float32),
-            "internal_error_defended": internal_error_def.astype(np.float32),
-            "interaction_features": interaction_features,
-            "revenue_observed": revenue_observed,
-            "revenue_oracle": revenue_oracle,
-            "agent_loss": agent_loss,
-            "predicted_agent_sessions": predicted_agent_sessions,
-        }
-        self.simulation_history.append(summary)
-        return summary
-
-    def get_interaction_data(self) -> np.ndarray:
-        if self._last_interaction_df.empty:
-            return np.array([], dtype=object)
-        return self._last_interaction_df.to_dict(orient="records")
+    max_price_adjustment : float = 0.3 # maximum adjustment of price
+    system_max_price : float = 500.0 # maximum price allowed in the system
+    product_catelogue_size : int = 100 # number of products in the catalogue
 
 
 class PHANTOMEnv(gym.Env):
-    metadata = {"render_modes": []}
-
-    def __init__(self, use_defense: bool = False):
-        super().__init__()
+    def __init__(self):
+        super(PHANTOMEnv, self).__init__()
         self.constraints = BusinessLogicConstraints()
-        self.action_space = spaces.Box(low=-self.constraints.max_price_adjustment,
-                                       high=self.constraints.max_price_adjustment,
-                                       shape=(self.constraints.product_catelogue_size,), dtype=np.float32)
+        self.action_space = spaces.Box(
+            low=-self.constraints.max_price_adjustment, high=self.constraints.max_price_adjustment,
+            shape=(1,), dtype=np.float32) #  we allow teh learner to adjust price by some BusinessLogicConstraints factor
+        # Example for using image as input:
         self.observation_space = spaces.Dict({
-            "elasticity": spaces.Dict({
-                "price": spaces.Box(
-                    low=np.full((self.constraints.product_catelogue_size,), self.constraints.system_min_price, dtype=np.float32),
-                    high=np.full((self.constraints.product_catelogue_size,), self.constraints.system_max_price, dtype=np.float32),
-                    dtype=np.float32),
-                "demand": spaces.Box(
-                    low=np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32),
-                    high=np.full((self.constraints.product_catelogue_size,), 1e6, dtype=np.float32),
-                    dtype=np.float32),
+            'elasticity': spaces.Dict({
+                'price': spaces.Box(low=0, high=self.constraints.system_max_price,
+                                    shape=(self.constraints.product_catelogue_size,), dtype=np.float32),
+                'demand': spaces.Box(low=0, high=np.inf,
+                                     shape=(self.constraints.product_catelogue_size,), dtype=np.float32)
             })
         })
-        self.commerce_platform = CommercePlatform(
-            product_catelogue_size=self.constraints.product_catelogue_size,
-            max_price=self.constraints.system_max_price,
-            min_price=self.constraints.system_min_price,
-            constraints=self.constraints,
-            agent_detector=simple_agent_detector,
-            use_defense=use_defense)
-        self._rng = np.random.default_rng(self.constraints.seed)
-        self.t = 0
-        self._prev_prices: Optional[np.ndarray] = None
-        self.state: Dict[str, Any] = {}
 
-    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):
+    def reset(self, seed=None, options=None):
         super().reset(seed=seed)
-        if seed is not None:
-            self._rng = np.random.default_rng(seed)
-            self.commerce_platform._rng = np.random.default_rng(seed)
-        self.t = 0
-        init_prices = self._rng.uniform(low=60.0, high=140.0, size=(self.constraints.product_catelogue_size,)).astype(np.float32)
-        self._prev_prices = init_prices.copy()
+        # Initialize state
         self.state = {
-            "elasticity": {
-                "price": init_prices,
-                "demand": np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32),
-            }
+            'price': 100.0,  # base price
+            'demand': 0.0
         }
         return self.state, {}
 
-    def step(self, action: np.ndarray):
-        self.t += 1
-        base_prices = self.state["elasticity"]["price"].astype(np.float32)
-        new_prices = np.clip(base_prices * (1.0 + action.astype(np.float32)),
-                           self.constraints.system_min_price,
-                           self.constraints.system_max_price).astype(np.float32)
-        result = self.commerce_platform.run_pricing_simulation(new_prices)
+    def step(self, action):
+        # Apply action
+        price_adjustment = action[0]
+        new_price = self.state['price'] * (1 + price_adjustment)
+        self.state['price'] = new_price
 
-        if self.commerce_platform.use_defense:
-            demand_est = result["q_hat_defended"]
-            internal_err = result["internal_error_defended"]
-        else:
-            demand_est = result["q_hat_naive"]
-            internal_err = result["internal_error_naive"]
+        # Simulate demand based on new price
+        demand = self.simulate_demand(new_price)
+        self.state['demand'] = demand
 
-        self.state["elasticity"]["price"] = new_prices
-        self.state["elasticity"]["demand"] = demand_est
+        # Calculate reward (e.g., revenue)
+        reward = new_price * demand
 
-        volatility = 0.0 if self._prev_prices is None else \
-            float(np.mean(np.abs((new_prices - self._prev_prices) / (self._prev_prices + 1e-6))))
-        self._prev_prices = new_prices.copy()
+        # Check if episode is done
+        done = self.state['price'] <= 0.0 or self.state['demand'] <= 0.0
 
-        revenue_observed = float(result["revenue_observed"])
-        agent_loss = float(result["agent_loss"])
-        err_mean = float(np.mean(internal_err))
-
-        reward = (revenue_observed
-                 - self.constraints.w_agent_loss * agent_loss
-                 - self.constraints.w_volatility * volatility
-                 - self.constraints.w_estimation_error * err_mean)
-
-        terminated = self.t >= self.constraints.episode_length
-        info = {
-            "t": self.t,
-            "revenue_observed": revenue_observed,
-            "revenue_oracle": float(result["revenue_oracle"]),
-            "agent_loss": agent_loss,
-            "ux_volatility": volatility,
-            "mean_internal_error": err_mean,
-            "look_to_book": float(result["interaction_features"].get("look_to_book", 0.0)),
-            "mean_sale_price": float(result["interaction_features"].get("mean_sale_price", 0.0)),
-            "true_human_purchases_total": float(np.sum(result["true_human_demand"])),
-            "true_agent_purchases_total": float(np.sum(result["true_agent_purchases"])),
-        }
-        return self.state, float(reward), terminated, False, info
 
+        return self.state, reward, done, False, {}
+    def simulate_demand(self, price):
+        # Simple linear demand model: demand decreases as price increases
+        base_demand = 200
+        price_sensitivity = 0.5
+        demand = max(0, base_demand - price_sensitivity * price)
+        return demand
 
 if __name__ == "__main__":
-    import matplotlib.pyplot as plt
-    from collections import defaultdict
+    env = PHANTOMEnv()
+    obs, _ = env.reset()
+    done = False
+    total_reward = 0
 
-    runs = {}
-    for use_defense in (False, True):
-        env = PHANTOMEnv(use_defense=use_defense)
-        obs, _ = env.reset(seed=42)
-        metrics = defaultdict(list)
-        total_reward = 0.0
-        done = False
+    while not done:
+        action = env.action_space.sample()  # Random action
+        obs, reward, done, _, _ = env.step(action)
+        total_reward += reward
+        print(f"Price: {obs['price']:.2f}, Demand: {obs['demand']:.2f}, Reward: {reward:.2f}")
+        if done:
+            break
 
-        while not done:
-            action = env.action_space.sample()
-            obs, reward, done, _, info = env.step(action)
-            total_reward += reward
-            p_mean = float(np.mean(obs["elasticity"]["price"]))
-            q_mean = float(np.mean(obs["elasticity"]["demand"]))
-            p_std = float(np.std(obs["elasticity"]["price"]))
-
-            metrics['t'].append(info['t'])
-            metrics['price_mean'].append(p_mean)
-            metrics['price_std'].append(p_std)
-            metrics['demand_mean'].append(q_mean)
-            metrics['revenue_observed'].append(info['revenue_observed'])
-            metrics['revenue_oracle'].append(info['revenue_oracle'])
-            metrics['agent_loss'].append(info['agent_loss'])
-            metrics['ux_volatility'].append(info['ux_volatility'])
-            metrics['look_to_book'].append(info['look_to_book'])
-            metrics['reward'].append(reward)
-            metrics['human_purchases'].append(info['true_human_purchases_total'])
-            metrics['agent_purchases'].append(info['true_agent_purchases_total'])
-
-            if info['t'] % 20 == 0 or done:
-                print(f"defense={'ON ' if use_defense else 'OFF'} t={info['t']:03d} p={p_mean:6.2f}±{p_std:4.2f} "
-                      f"q={q_mean:6.2f} rev={info['revenue_observed']:7.2f} oracle={info['revenue_oracle']:7.2f} "
-                      f"loss={info['agent_loss']:6.2f} ux={info['ux_volatility']:.3f} "
-                      f"ltb={info['look_to_book']:5.2f} r={reward:7.2f}")
-
-        runs[use_defense] = metrics
-        print(f"defense={'ON ' if use_defense else 'OFF'} total_reward={total_reward:.2f}\n")
-
-    fig, axes = plt.subplots(3, 3, figsize=(15, 12))
-    fig.suptitle('PHANTOM Environment: Defense OFF vs ON', fontsize=14, fontweight='bold')
-
-    plot_configs = [
-        ('price_mean', 'Mean Price', 'Price'),
-        ('demand_mean', 'Mean Demand Estimate', 'Demand'),
-        ('revenue_observed', 'Revenue (Observed)', 'Revenue'),
-        ('agent_loss', 'Agent Loss (Oracle - Observed)', 'Loss'),
-        ('ux_volatility', 'UX Volatility (Price Change)', 'Volatility'),
-        ('look_to_book', 'Look-to-Book Ratio', 'Ratio'),
-        ('reward', 'Step Reward', 'Reward'),
-        ('human_purchases', 'Human Purchases', 'Count'),
-        ('agent_purchases', 'Agent Purchases', 'Count'),
-    ]
-
-    for idx, (key, title, ylabel) in enumerate(plot_configs):
-        ax = axes[idx // 3, idx % 3]
-        for use_defense, label, color in [(False, 'No Defense', 'red'), (True, 'With Defense', 'blue')]:
-            m = runs[use_defense]
-            ax.plot(m['t'], m[key], label=label, color=color, alpha=0.7, linewidth=1.5)
-        ax.set_xlabel('Step')
-        ax.set_ylabel(ylabel)
-        ax.set_title(title, fontsize=10, fontweight='bold')
-        ax.legend(loc='best', fontsize=8)
-        ax.grid(True, alpha=0.3)
-
-    plt.tight_layout()
-    plt.savefig('phantom_env_comparison.png', dpi=150, bbox_inches='tight')
-    print("Plot saved to phantom_env_comparison.png")
-    plt.show()
+    print(f"Total Reward: {total_reward:.2f}")

From 7d09232e48072598e7bf7bab46749fba47f00720 Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Sun, 14 Dec 2025 17:53:48 +0100
Subject: [PATCH 03/35] high level defintion

---
 sim/rl/environment.py | 94 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 83 insertions(+), 11 deletions(-)

diff --git a/sim/rl/environment.py b/sim/rl/environment.py
index 803a4fd..a09438f 100644
--- a/sim/rl/environment.py
+++ b/sim/rl/environment.py
@@ -2,6 +2,7 @@ import gymnasium as gym
 from gymnasium import spaces
 import numpy as np
 from dataclasses import dataclass
+import pandas as pd
 
 # here when we say "learner" we mean the agent that is learning to optimize the pricing and "agent" is part of the envrionment where the agent is creating demand that that "learner" is processing"
 
@@ -9,17 +10,89 @@ from dataclasses import dataclass
 class BusinessLogicConstraints():
     max_price_adjustment : float = 0.3 # maximum adjustment of price
     system_max_price : float = 500.0 # maximum price allowed in the system
+    system_min_price : float = 1.0 # minimum price allowed in the system
     product_catelogue_size : int = 100 # number of products in the catalogue
 
 
+class CommercePlatform:
+    def __init__(self, product_catelogue_size: int, max_price: float, min_price: float):
+        self.product_catelogue_size = product_catelogue_size
+        self.max_price = max_price
+        self.min_price = min_price
+        self.simulation_history = []
+
+
+    def setup_true_demand(self,prices: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
+        human_price_elasticity = -1.5  # Example elasticity value
+        base_demand = 100  # Base demand for products
+        demand = base_demand * (prices / self.max_price) ** human_price_elasticity
+
+        agent_price_elasticity = -2.0  # Example elasticity value for agents
+        agent_base_demand = 150  # Base demand for agents
+        agent_demand = agent_base_demand * (prices / self.max_price) ** agent_price_elasticity
+
+        return demand + agent_demand, agent_demand
+
+
+    def compute_interaction_features(self, interaction_data: np.ndarray) -> dict:
+        df = pd.DataFrame(interaction_data)
+        return {
+            'mean_sale_price': df[df['action'] == 'purchase']['price'].mean(),
+        }
+
+    def run_pricing_simulation(self, prices: np.ndarray) -> np.ndarray:
+        # Simulate demand based on prices
+
+        observed_demand, demand_from_agents = self.setup_true_demand(prices)
+        true_demand = observed_demand - demand_from_agents
+
+        interaction_data = self.get_interaction_data()
+        interaction_features = self.compute_interaction_features(interaction_data)
+        demand_estimates = self.demand_estimate(interaction_data)
+        internal_error = np.abs(true_demand - demand_estimates) / (true_demand + 1e-6)
+
+        self.simulation_history.append(
+            {
+                'prices': prices,
+                'true_demand': true_demand,
+                'demand_estimates': demand_estimates,
+                'internal_error': internal_error,
+                'interaction_data': interaction_data,
+                'interaction_features': interaction_features
+            })
+        return np.array(interaction_data)
+
+    def get_interaction_data(self) -> np.ndarray:
+        # Simulate interaction data
+        interaction_data = []
+        return np.array(interaction_data)
+
+
+    def demand_estimate(self, interactions : np.ndarray) -> np.ndarray:
+        demand_estimates = np.random.rand(self.product_catelogue_size) * 100  # Dummy demand estimates
+        return demand_estimates
+
+
+
+
+
+
+
+
+
 class PHANTOMEnv(gym.Env):
     def __init__(self):
         super(PHANTOMEnv, self).__init__()
         self.constraints = BusinessLogicConstraints()
         self.action_space = spaces.Box(
             low=-self.constraints.max_price_adjustment, high=self.constraints.max_price_adjustment,
-            shape=(1,), dtype=np.float32) #  we allow teh learner to adjust price by some BusinessLogicConstraints factor
+            shape=(self.constraints.product_catelogue_size,), dtype=np.float32) #  we allow teh learner to adjust price by some BusinessLogicConstraints factor
         # Example for using image as input:
+        self.commerce_platform = CommercePlatform(
+            product_catelogue_size=self.constraints.product_catelogue_size,
+            max_price=self.constraints.system_max_price,
+            min_price=self.constraints.system_min_price
+        )
         self.observation_space = spaces.Dict({
             'elasticity': spaces.Dict({
                 'price': spaces.Box(low=0, high=self.constraints.system_max_price,
@@ -29,24 +102,23 @@ class PHANTOMEnv(gym.Env):
             })
         })
 
-    def reset(self, seed=None, options=None):
+    def reset(self, seed :int, options) -> tuple[dict, dict]:
         super().reset(seed=seed)
         # Initialize state
         self.state = {
-            'price': 100.0,  # base price
-            'demand': 0.0
+            'elasticity': {
+                'price': np.full((self.constraints.product_catelogue_size,), 100.0, dtype=np.float32),
+                'demand': np.full((self.constraints.product_catelogue_size,), 50.0, dtype=np.float32)
+            }
         }
         return self.state, {}
 
     def step(self, action):
-        # Apply action
-        price_adjustment = action[0]
-        new_price = self.state['price'] * (1 + price_adjustment)
-        self.state['price'] = new_price
+        self.state['price'] = np.clip(self.state['price'] * (1 + action),
+                            self.constraints.system_min_price,
+                            self.constraints.system_max_price)
+
 
-        # Simulate demand based on new price
-        demand = self.simulate_demand(new_price)
-        self.state['demand'] = demand
 
         # Calculate reward (e.g., revenue)
         reward = new_price * demand

From 8a084584786bfbaff6b913ccf7d8af3c0804349d Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Sun, 14 Dec 2025 17:59:34 +0100
Subject: [PATCH 04/35] formlating the reward simply

---
 sim/rl/environment.py | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/sim/rl/environment.py b/sim/rl/environment.py
index a09438f..ca7159b 100644
--- a/sim/rl/environment.py
+++ b/sim/rl/environment.py
@@ -40,7 +40,7 @@ class CommercePlatform:
             'mean_sale_price': df[df['action'] == 'purchase']['price'].mean(),
         }
 
-    def run_pricing_simulation(self, prices: np.ndarray) -> np.ndarray:
+    def run_pricing_simulation(self, prices: np.ndarray) -> dict:
         # Simulate demand based on prices
 
         observed_demand, demand_from_agents = self.setup_true_demand(prices)
@@ -51,16 +51,17 @@ class CommercePlatform:
         demand_estimates = self.demand_estimate(interaction_data)
         internal_error = np.abs(true_demand - demand_estimates) / (true_demand + 1e-6)
 
-        self.simulation_history.append(
-            {
+
+        summary = {
                 'prices': prices,
                 'true_demand': true_demand,
                 'demand_estimates': demand_estimates,
                 'internal_error': internal_error,
                 'interaction_data': interaction_data,
                 'interaction_features': interaction_features
-            })
-        return np.array(interaction_data)
+            }
+        self.simulation_history.append(summary)
+        return summary
 
     def get_interaction_data(self) -> np.ndarray:
         # Simulate interaction data
@@ -118,10 +119,24 @@ class PHANTOMEnv(gym.Env):
                             self.constraints.system_min_price,
                             self.constraints.system_max_price)
 
+        result = self.commerce_platform.run_pricing_simulation(self.state['price'])
+        history = self.commerce_platform.simulation_history
+        self.state['demand'] = result['demand_estimates']
+
+
+
+        reward = sum(
+            self.state['price'] * self.state['demand'],
+            # performance historically, to take into account business kpi trends (using features from interaction data)
+            sum(
+                [-0.05 * i * history[-1]['internal_error'] for i in range(1, len(history))],
+            ) if len(history) > 1 else 0,
+            sum(
+                [0.1 * history[-1]['interaction_features']['mean_sale_price'] - 0.1 * history[i]['interaction_features']['mean_sale_price'] for i in range(len(history)-1)],
+            ) if len(history) > 1 else 0
+        )
 
 
-        # Calculate reward (e.g., revenue)
-        reward = new_price * demand
 
         # Check if episode is done
         done = self.state['price'] <= 0.0 or self.state['demand'] <= 0.0

From 201c98bcacd2420ed45c52a583383618b26d46dc Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Sun, 14 Dec 2025 18:59:02 +0100
Subject: [PATCH 05/35] improved implementation

---
 sim/rl/environment.py | 512 ++++++++++++++++++++++++++++++++----------
 1 file changed, 398 insertions(+), 114 deletions(-)

diff --git a/sim/rl/environment.py b/sim/rl/environment.py
index ca7159b..19f9ad4 100644
--- a/sim/rl/environment.py
+++ b/sim/rl/environment.py
@@ -3,165 +3,449 @@ from gymnasium import spaces
 import numpy as np
 from dataclasses import dataclass
 import pandas as pd
+from typing import Callable, Optional, Dict, Any, List
 
-# here when we say "learner" we mean the agent that is learning to optimize the pricing and "agent" is part of the envrionment where the agent is creating demand that that "learner" is processing"
+# "learner"  agent learning to optimize pricing
+# "agent"  part of environment creating demand signals that learner processes
 
 @dataclass
 class BusinessLogicConstraints():
-    max_price_adjustment : float = 0.3 # maximum adjustment of price
-    system_max_price : float = 500.0 # maximum price allowed in the system
-    system_min_price : float = 1.0 # minimum price allowed in the system
-    product_catelogue_size : int = 100 # number of products in the catalogue
+    max_price_adjustment: float = 0.30
+    system_max_price: float = 500.0
+    system_min_price: float = 1.0
+    product_catelogue_size: int = 100
+    episode_length: int = 200
+    sessions_per_step: int = 250
+    agent_share: float = 0.25
+    agent_recon_multiplier: float = 6.0
+    agent_purchase_probability: float = 0.20
+    coi_strength: float = 0.25
+    coi_threshold: float = 4.0
+    coi_sigmoid_temp: float = 1.25
+    base_human_demand: float = 0.08
+    base_agent_demand: float = 0.05
+    human_price_elasticity: float = -1.2
+    agent_price_elasticity: float = -0.6
+    w_agent_loss: float = 1.0
+    w_volatility: float = 5.0
+    w_estimation_error: float = 0.25
+    seed: int = 7
+
+
+def _sigmoid(x: np.ndarray) -> np.ndarray:
+    return 1.0 / (1.0 + np.exp(-x))
+
+
+def simple_agent_detector(session_df: pd.DataFrame) -> pd.Series:
+    # baseline heuristic: high velocity + low conversion
+    v = session_df.get("interaction_velocity", pd.Series(0.0, index=session_df.index))
+    cr = session_df.get("conversion_rate", pd.Series(0.0, index=session_df.index))
+    total = session_df.get("total_interactions", pd.Series(0, index=session_df.index))
+    return (total >= 12) & (v >= 0.20) & (cr <= 0.01)
 
 
 class CommercePlatform:
-    def __init__(self, product_catelogue_size: int, max_price: float, min_price: float):
+    def __init__(self, product_catelogue_size: int, max_price: float, min_price: float,
+                 constraints: BusinessLogicConstraints, agent_detector: Optional[Callable[[pd.DataFrame], pd.Series]] = None,
+                 use_defense: bool = False):
         self.product_catelogue_size = product_catelogue_size
         self.max_price = max_price
         self.min_price = min_price
-        self.simulation_history = []
+        self.constraints = constraints
+        self.use_defense = use_defense
+        self.agent_detector = agent_detector
+        self.simulation_history: List[Dict[str, Any]] = []
+        self._rng = np.random.default_rng(constraints.seed)
+        self._popularity = self._rng.lognormal(mean=0.0, sigma=0.6, size=self.product_catelogue_size)
+        self._popularity = self._popularity / (self._popularity.mean() + 1e-12)
+        self._last_interaction_df: pd.DataFrame = pd.DataFrame()
 
-
-    def setup_true_demand(self,prices: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
-        human_price_elasticity = -1.5  # Example elasticity value
-        base_demand = 100  # Base demand for products
-        demand = base_demand * (prices / self.max_price) ** human_price_elasticity
-
-        agent_price_elasticity = -2.0  # Example elasticity value for agents
-        agent_base_demand = 150  # Base demand for agents
-        agent_demand = agent_base_demand * (prices / self.max_price) ** agent_price_elasticity
-
-        return demand + agent_demand, agent_demand
-
-
-    def compute_interaction_features(self, interaction_data: np.ndarray) -> dict:
-        df = pd.DataFrame(interaction_data)
+    def setup_true_demand(self, prices: np.ndarray) -> Dict[str, np.ndarray]:
+        # ground truth purchase propensities
+        p = np.clip(prices, self.min_price, self.max_price)
+        pn = p / self.max_price
+        human_prob = self.constraints.base_human_demand * (pn ** self.constraints.human_price_elasticity)
+        agent_prob = self.constraints.base_agent_demand * (pn ** self.constraints.agent_price_elasticity)
         return {
-            'mean_sale_price': df[df['action'] == 'purchase']['price'].mean(),
+            "human_purchase_prob": np.clip(human_prob * self._popularity, 0.0, 0.95),
+            "agent_purchase_prob": np.clip(agent_prob * self._popularity, 0.0, 0.95)
         }
 
-    def run_pricing_simulation(self, prices: np.ndarray) -> dict:
-        # Simulate demand based on prices
+    def _session_markup_multiplier(self, signal_score: float) -> float:
+        # session-based COI markup based on demand signal expression
+        x = (signal_score - self.constraints.coi_threshold) / max(self.constraints.coi_sigmoid_temp, 1e-6)
+        return 1.0 + self.constraints.coi_strength * float(_sigmoid(np.array([x]))[0])
 
-        observed_demand, demand_from_agents = self.setup_true_demand(prices)
-        true_demand = observed_demand - demand_from_agents
+    def _simulate_sessions(self, base_prices: np.ndarray) -> pd.DataFrame:
+        demand = self.setup_true_demand(base_prices)
+        human_pprob = demand["human_purchase_prob"]
+        agent_pprob = demand["agent_purchase_prob"]
+        events: List[Dict[str, Any]] = []
+        T = self.constraints.sessions_per_step
+        n_agent_sessions = int(round(T * self.constraints.agent_share))
+        n_human_sessions = T - n_agent_sessions
 
-        interaction_data = self.get_interaction_data()
-        interaction_features = self.compute_interaction_features(interaction_data)
-        demand_estimates = self.demand_estimate(interaction_data)
-        internal_error = np.abs(true_demand - demand_estimates) / (true_demand + 1e-6)
+        # human sessions: normal browse with possible purchase
+        for s in range(n_human_sessions):
+            session_id = f"h_{len(events)}_{s}"
+            k = int(self._rng.integers(1, 4))
+            prod_ids = self._rng.choice(self.product_catelogue_size, size=k, replace=False)
+            t = 0.0
+            inter_times = self._rng.gamma(shape=2.0, scale=3.0, size=3 * k)
+            signal_score = 0.0
+            purchased_any = False
 
+            for i, pid in enumerate(prod_ids):
+                t += float(inter_times[i])
+                price_shown = float(base_prices[pid])
+                events.append({
+                    "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
+                    "action": "view", "t": t, "price_shown": price_shown, "is_purchase": 0,
+                    "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
+                })
+                signal_score += 1.0
+
+                if self._rng.random() < 0.35:
+                    t += float(inter_times[i + k])
+                    events.append({
+                        "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
+                        "action": "cart", "t": t, "price_shown": price_shown, "is_purchase": 0,
+                        "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
+                    })
+                    signal_score += 2.0
+
+                if (not purchased_any) and (self._rng.random() < float(human_pprob[pid])):
+                    t += float(inter_times[i + 2 * k])
+                    mult = self._session_markup_multiplier(signal_score)
+                    price_paid = float(np.clip(base_prices[pid] * mult, self.min_price, self.max_price))
+                    events.append({
+                        "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
+                        "action": "purchase", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 1,
+                        "price_paid": price_paid, "oracle_price_paid": price_paid, "signal_score": signal_score,
+                    })
+                    purchased_any = True
+
+        # agent sessions: split recon/purchase to circumvent COI
+        n_agent_ids = max(1, n_agent_sessions // 2)
+        for a in range(n_agent_ids):
+            agent_id = f"a_{a}"
+            recon_session_id = f"{agent_id}_recon"
+            t = 0.0
+            n_views = int(self._rng.poisson(lam=8) * self.constraints.agent_recon_multiplier) + 5
+            inter_times = self._rng.gamma(shape=2.0, scale=0.6, size=max(n_views, 1))
+            prod_ids = self._rng.integers(0, self.product_catelogue_size, size=n_views)
+            recon_signal = 0.0
+
+            for i, pid in enumerate(prod_ids):
+                t += float(inter_times[i])
+                events.append({
+                    "session_id": recon_session_id, "actor": "agent", "agent_id": agent_id, "product_id": int(pid),
+                    "action": "view", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 0,
+                    "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
+                })
+                recon_signal += 1.0
+
+            # clean purchase session with minimal interactions
+            if self._rng.random() < self.constraints.agent_purchase_probability:
+                purchase_session_id = f"{agent_id}_clean"
+                pid = int(self._rng.integers(0, self.product_catelogue_size))
+                t2 = 0.0
+                clean_signal = 0.0
+                t2 += float(self._rng.gamma(shape=2.0, scale=0.7))
+                events.append({
+                    "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid,
+                    "action": "view", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 0,
+                    "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
+                })
+                clean_signal += 1.0
+
+                if self._rng.random() < float(agent_pprob[pid]):
+                    t2 += float(self._rng.gamma(shape=2.0, scale=0.7))
+                    obs_mult = self._session_markup_multiplier(clean_signal)
+                    obs_paid = float(np.clip(base_prices[pid] * obs_mult, self.min_price, self.max_price))
+                    oracle_mult = self._session_markup_multiplier(recon_signal)  # oracle links recon->purchase
+                    oracle_paid = float(np.clip(base_prices[pid] * oracle_mult, self.min_price, self.max_price))
+                    events.append({
+                        "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid,
+                        "action": "purchase", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 1,
+                        "price_paid": obs_paid, "oracle_price_paid": oracle_paid, "signal_score": clean_signal,
+                    })
+
+        return pd.DataFrame(events)
+
+    def compute_interaction_features(self, interaction_df: pd.DataFrame) -> Dict[str, float]:
+        if interaction_df.empty:
+            return {"mean_sale_price": 0.0, "look_to_book": 0.0}
+        purchases = interaction_df[interaction_df["action"] == "purchase"]
+        mean_sale_price = float(purchases["price_paid"].mean()) if not purchases.empty else 0.0
+        views = float((interaction_df["action"] == "view").sum())
+        buys = float((interaction_df["action"] == "purchase").sum())
+        return {"mean_sale_price": mean_sale_price, "look_to_book": float(views / (buys + 1e-6))}
+
+    def _session_feature_table(self, df: pd.DataFrame) -> pd.DataFrame:
+        if df.empty:
+            return pd.DataFrame()
+        g = df.groupby("session_id", sort=False)
+        session_duration = g["t"].max() - g["t"].min()
+        total_interactions = g.size()
+        avg_time_between = g["t"].apply(lambda x: float(np.diff(np.sort(x.to_numpy())).mean()) if len(x) > 1 else 0.0)
+        interaction_velocity = total_interactions / (session_duration + 1e-6)
+        views = g.apply(lambda x: int((x["action"] == "view").sum()), include_groups=False)
+        cart_adds = g.apply(lambda x: int((x["action"] == "cart").sum()), include_groups=False)
+        purchases = g.apply(lambda x: int((x["action"] == "purchase").sum()), include_groups=False)
+        conversion_rate = purchases / (views + 1e-6)
+        is_agent = g["actor"].apply(lambda s: bool((s == "agent").any()), include_groups=False)
+
+        return pd.DataFrame({
+            "session_duration_sec": session_duration.astype(float),
+            "avg_time_between_events": avg_time_between.astype(float),
+            "total_interactions": total_interactions.astype(int),
+            "interaction_velocity": interaction_velocity.astype(float),
+            "item_views": views.astype(int),
+            "cart_adds": cart_adds.astype(int),
+            "purchases": purchases.astype(int),
+            "conversion_rate": conversion_rate.astype(float),
+            "is_agent": is_agent.astype(bool),
+        }).reset_index()
+
+    def demand_estimate(self, interaction_df: pd.DataFrame, exclude_sessions: Optional[pd.Series] = None) -> np.ndarray:
+        # proxy demand from weighted interaction events
+        if interaction_df.empty:
+            return np.zeros(self.product_catelogue_size, dtype=np.float32)
+        df = interaction_df
+        if exclude_sessions is not None:
+            bad_sessions = set(exclude_sessions.loc[exclude_sessions].index)
+            df = df[~df["session_id"].isin(bad_sessions)]
+        weights = {"view": 0.15, "cart": 0.75, "purchase": 2.5}
+        w = df["action"].map(weights).fillna(0.0).to_numpy(dtype=float)
+        prod = df["product_id"].to_numpy(dtype=int)
+        q_hat = np.zeros(self.product_catelogue_size, dtype=float)
+        np.add.at(q_hat, prod, w)
+        return q_hat.astype(np.float32)
+
+    def run_pricing_simulation(self, prices: np.ndarray) -> Dict[str, Any]:
+        interaction_df = self._simulate_sessions(prices)
+        self._last_interaction_df = interaction_df
+        session_df = self._session_feature_table(interaction_df)
+
+        predicted_agent_sessions = None
+        if (self.use_defense and self.agent_detector is not None and not session_df.empty):
+            predicted_agent_sessions = self.agent_detector(session_df.set_index("session_id"))
+
+        q_hat_naive = self.demand_estimate(interaction_df, exclude_sessions=None)
+        q_hat_defended = self.demand_estimate(interaction_df, exclude_sessions=predicted_agent_sessions) \
+            if predicted_agent_sessions is not None else q_hat_naive.copy()
+
+        true_human = np.zeros(self.product_catelogue_size, dtype=float)
+        true_agent = np.zeros(self.product_catelogue_size, dtype=float)
+        if not interaction_df.empty:
+            purchases = interaction_df[interaction_df["action"] == "purchase"]
+            if not purchases.empty:
+                for _, r in purchases.iterrows():
+                    if r["actor"] == "human":
+                        true_human[int(r["product_id"])] += 1.0
+                    else:
+                        true_agent[int(r["product_id"])] += 1.0
+
+        revenue_observed = float(interaction_df["price_paid"].sum()) if not interaction_df.empty else 0.0
+        revenue_oracle = float(interaction_df["oracle_price_paid"].sum()) if not interaction_df.empty else 0.0
+        agent_loss = max(0.0, revenue_oracle - revenue_observed)
+
+        eps = 1e-6
+        internal_error_naive = np.abs(true_human - q_hat_naive) / (true_human + eps)
+        internal_error_def = np.abs(true_human - q_hat_defended) / (true_human + eps)
+        interaction_features = self.compute_interaction_features(interaction_df)
 
         summary = {
-                'prices': prices,
-                'true_demand': true_demand,
-                'demand_estimates': demand_estimates,
-                'internal_error': internal_error,
-                'interaction_data': interaction_data,
-                'interaction_features': interaction_features
-            }
+            "prices": prices.copy(),
+            "interaction_df": interaction_df,
+            "session_df": session_df,
+            "q_hat_naive": q_hat_naive,
+            "q_hat_defended": q_hat_defended,
+            "true_human_demand": true_human.astype(np.float32),
+            "true_agent_purchases": true_agent.astype(np.float32),
+            "internal_error_naive": internal_error_naive.astype(np.float32),
+            "internal_error_defended": internal_error_def.astype(np.float32),
+            "interaction_features": interaction_features,
+            "revenue_observed": revenue_observed,
+            "revenue_oracle": revenue_oracle,
+            "agent_loss": agent_loss,
+            "predicted_agent_sessions": predicted_agent_sessions,
+        }
         self.simulation_history.append(summary)
         return summary
 
     def get_interaction_data(self) -> np.ndarray:
-        # Simulate interaction data
-        interaction_data = []
-        return np.array(interaction_data)
-
-
-    def demand_estimate(self, interactions : np.ndarray) -> np.ndarray:
-        demand_estimates = np.random.rand(self.product_catelogue_size) * 100  # Dummy demand estimates
-        return demand_estimates
-
-
-
-
-
-
-
+        if self._last_interaction_df.empty:
+            return np.array([], dtype=object)
+        return self._last_interaction_df.to_dict(orient="records")
 
 
 class PHANTOMEnv(gym.Env):
-    def __init__(self):
-        super(PHANTOMEnv, self).__init__()
+    metadata = {"render_modes": []}
+
+    def __init__(self, use_defense: bool = False):
+        super().__init__()
         self.constraints = BusinessLogicConstraints()
-        self.action_space = spaces.Box(
-            low=-self.constraints.max_price_adjustment, high=self.constraints.max_price_adjustment,
-            shape=(self.constraints.product_catelogue_size,), dtype=np.float32) #  we allow teh learner to adjust price by some BusinessLogicConstraints factor
-        # Example for using image as input:
+        self.action_space = spaces.Box(low=-self.constraints.max_price_adjustment,
+                                       high=self.constraints.max_price_adjustment,
+                                       shape=(self.constraints.product_catelogue_size,), dtype=np.float32)
+        self.observation_space = spaces.Dict({
+            "elasticity": spaces.Dict({
+                "price": spaces.Box(
+                    low=np.full((self.constraints.product_catelogue_size,), self.constraints.system_min_price, dtype=np.float32),
+                    high=np.full((self.constraints.product_catelogue_size,), self.constraints.system_max_price, dtype=np.float32),
+                    dtype=np.float32),
+                "demand": spaces.Box(
+                    low=np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32),
+                    high=np.full((self.constraints.product_catelogue_size,), 1e6, dtype=np.float32),
+                    dtype=np.float32),
+            })
+        })
         self.commerce_platform = CommercePlatform(
             product_catelogue_size=self.constraints.product_catelogue_size,
             max_price=self.constraints.system_max_price,
-            min_price=self.constraints.system_min_price
-        )
-        self.observation_space = spaces.Dict({
-            'elasticity': spaces.Dict({
-                'price': spaces.Box(low=0, high=self.constraints.system_max_price,
-                                    shape=(self.constraints.product_catelogue_size,), dtype=np.float32),
-                'demand': spaces.Box(low=0, high=np.inf,
-                                     shape=(self.constraints.product_catelogue_size,), dtype=np.float32)
-            })
-        })
+            min_price=self.constraints.system_min_price,
+            constraints=self.constraints,
+            agent_detector=simple_agent_detector,
+            use_defense=use_defense)
+        self._rng = np.random.default_rng(self.constraints.seed)
+        self.t = 0
+        self._prev_prices: Optional[np.ndarray] = None
+        self.state: Dict[str, Any] = {}
 
-    def reset(self, seed :int, options) -> tuple[dict, dict]:
+    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):
         super().reset(seed=seed)
-        # Initialize state
+        if seed is not None:
+            self._rng = np.random.default_rng(seed)
+            self.commerce_platform._rng = np.random.default_rng(seed)
+        self.t = 0
+        init_prices = self._rng.uniform(low=60.0, high=140.0, size=(self.constraints.product_catelogue_size,)).astype(np.float32)
+        self._prev_prices = init_prices.copy()
         self.state = {
-            'elasticity': {
-                'price': np.full((self.constraints.product_catelogue_size,), 100.0, dtype=np.float32),
-                'demand': np.full((self.constraints.product_catelogue_size,), 50.0, dtype=np.float32)
+            "elasticity": {
+                "price": init_prices,
+                "demand": np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32),
             }
         }
         return self.state, {}
 
-    def step(self, action):
-        self.state['price'] = np.clip(self.state['price'] * (1 + action),
-                            self.constraints.system_min_price,
-                            self.constraints.system_max_price)
+    def step(self, action: np.ndarray):
+        self.t += 1
+        base_prices = self.state["elasticity"]["price"].astype(np.float32)
+        new_prices = np.clip(base_prices * (1.0 + action.astype(np.float32)),
+                           self.constraints.system_min_price,
+                           self.constraints.system_max_price).astype(np.float32)
+        result = self.commerce_platform.run_pricing_simulation(new_prices)
 
-        result = self.commerce_platform.run_pricing_simulation(self.state['price'])
-        history = self.commerce_platform.simulation_history
-        self.state['demand'] = result['demand_estimates']
+        if self.commerce_platform.use_defense:
+            demand_est = result["q_hat_defended"]
+            internal_err = result["internal_error_defended"]
+        else:
+            demand_est = result["q_hat_naive"]
+            internal_err = result["internal_error_naive"]
 
+        self.state["elasticity"]["price"] = new_prices
+        self.state["elasticity"]["demand"] = demand_est
 
+        volatility = 0.0 if self._prev_prices is None else \
+            float(np.mean(np.abs((new_prices - self._prev_prices) / (self._prev_prices + 1e-6))))
+        self._prev_prices = new_prices.copy()
 
-        reward = sum(
-            self.state['price'] * self.state['demand'],
-            # performance historically, to take into account business kpi trends (using features from interaction data)
-            sum(
-                [-0.05 * i * history[-1]['internal_error'] for i in range(1, len(history))],
-            ) if len(history) > 1 else 0,
-            sum(
-                [0.1 * history[-1]['interaction_features']['mean_sale_price'] - 0.1 * history[i]['interaction_features']['mean_sale_price'] for i in range(len(history)-1)],
-            ) if len(history) > 1 else 0
-        )
+        revenue_observed = float(result["revenue_observed"])
+        agent_loss = float(result["agent_loss"])
+        err_mean = float(np.mean(internal_err))
 
+        reward = (revenue_observed
+                 - self.constraints.w_agent_loss * agent_loss
+                 - self.constraints.w_volatility * volatility
+                 - self.constraints.w_estimation_error * err_mean)
 
+        terminated = self.t >= self.constraints.episode_length
+        info = {
+            "t": self.t,
+            "revenue_observed": revenue_observed,
+            "revenue_oracle": float(result["revenue_oracle"]),
+            "agent_loss": agent_loss,
+            "ux_volatility": volatility,
+            "mean_internal_error": err_mean,
+            "look_to_book": float(result["interaction_features"].get("look_to_book", 0.0)),
+            "mean_sale_price": float(result["interaction_features"].get("mean_sale_price", 0.0)),
+            "true_human_purchases_total": float(np.sum(result["true_human_demand"])),
+            "true_agent_purchases_total": float(np.sum(result["true_agent_purchases"])),
+        }
+        return self.state, float(reward), terminated, False, info
 
-        # Check if episode is done
-        done = self.state['price'] <= 0.0 or self.state['demand'] <= 0.0
-
-
-        return self.state, reward, done, False, {}
-    def simulate_demand(self, price):
-        # Simple linear demand model: demand decreases as price increases
-        base_demand = 200
-        price_sensitivity = 0.5
-        demand = max(0, base_demand - price_sensitivity * price)
-        return demand
 
 if __name__ == "__main__":
-    env = PHANTOMEnv()
-    obs, _ = env.reset()
-    done = False
-    total_reward = 0
+    import matplotlib.pyplot as plt
+    from collections import defaultdict
 
-    while not done:
-        action = env.action_space.sample()  # Random action
-        obs, reward, done, _, _ = env.step(action)
-        total_reward += reward
-        print(f"Price: {obs['price']:.2f}, Demand: {obs['demand']:.2f}, Reward: {reward:.2f}")
-        if done:
-            break
+    runs = {}
+    for use_defense in (False, True):
+        env = PHANTOMEnv(use_defense=use_defense)
+        obs, _ = env.reset(seed=42)
+        metrics = defaultdict(list)
+        total_reward = 0.0
+        done = False
 
-    print(f"Total Reward: {total_reward:.2f}")
+        while not done:
+            action = env.action_space.sample()
+            obs, reward, done, _, info = env.step(action)
+            total_reward += reward
+            p_mean = float(np.mean(obs["elasticity"]["price"]))
+            q_mean = float(np.mean(obs["elasticity"]["demand"]))
+            p_std = float(np.std(obs["elasticity"]["price"]))
+
+            metrics['t'].append(info['t'])
+            metrics['price_mean'].append(p_mean)
+            metrics['price_std'].append(p_std)
+            metrics['demand_mean'].append(q_mean)
+            metrics['revenue_observed'].append(info['revenue_observed'])
+            metrics['revenue_oracle'].append(info['revenue_oracle'])
+            metrics['agent_loss'].append(info['agent_loss'])
+            metrics['ux_volatility'].append(info['ux_volatility'])
+            metrics['look_to_book'].append(info['look_to_book'])
+            metrics['reward'].append(reward)
+            metrics['human_purchases'].append(info['true_human_purchases_total'])
+            metrics['agent_purchases'].append(info['true_agent_purchases_total'])
+
+            if info['t'] % 20 == 0 or done:
+                print(f"defense={'ON ' if use_defense else 'OFF'} t={info['t']:03d} p={p_mean:6.2f}±{p_std:4.2f} "
+                      f"q={q_mean:6.2f} rev={info['revenue_observed']:7.2f} oracle={info['revenue_oracle']:7.2f} "
+                      f"loss={info['agent_loss']:6.2f} ux={info['ux_volatility']:.3f} "
+                      f"ltb={info['look_to_book']:5.2f} r={reward:7.2f}")
+
+        runs[use_defense] = metrics
+        print(f"defense={'ON ' if use_defense else 'OFF'} total_reward={total_reward:.2f}\n")
+
+    fig, axes = plt.subplots(3, 3, figsize=(15, 12))
+    fig.suptitle('PHANTOM Environment: Defense OFF vs ON', fontsize=14, fontweight='bold')
+
+    plot_configs = [
+        ('price_mean', 'Mean Price', 'Price'),
+        ('demand_mean', 'Mean Demand Estimate', 'Demand'),
+        ('revenue_observed', 'Revenue (Observed)', 'Revenue'),
+        ('agent_loss', 'Agent Loss (Oracle - Observed)', 'Loss'),
+        ('ux_volatility', 'UX Volatility (Price Change)', 'Volatility'),
+        ('look_to_book', 'Look-to-Book Ratio', 'Ratio'),
+        ('reward', 'Step Reward', 'Reward'),
+        ('human_purchases', 'Human Purchases', 'Count'),
+        ('agent_purchases', 'Agent Purchases', 'Count'),
+    ]
+
+    for idx, (key, title, ylabel) in enumerate(plot_configs):
+        ax = axes[idx // 3, idx % 3]
+        for use_defense, label, color in [(False, 'No Defense', 'red'), (True, 'With Defense', 'blue')]:
+            m = runs[use_defense]
+            ax.plot(m['t'], m[key], label=label, color=color, alpha=0.7, linewidth=1.5)
+        ax.set_xlabel('Step')
+        ax.set_ylabel(ylabel)
+        ax.set_title(title, fontsize=10, fontweight='bold')
+        ax.legend(loc='best', fontsize=8)
+        ax.grid(True, alpha=0.3)
+
+    plt.tight_layout()
+    plt.savefig('phantom_env_comparison.png', dpi=150, bbox_inches='tight')
+    print("Plot saved to phantom_env_comparison.png")
+    plt.show()

From 3fa98f375df31eb23fe5a43116cf4c1535ce706d Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Wed, 17 Dec 2025 17:41:16 +0100
Subject: [PATCH 06/35] refactor to align moer with research in the env sims

---
 sim/rl/engine.py      | 220 ++++++++++++++++++++++++++++++++++++
 sim/rl/environment.py | 255 ++++++++++--------------------------------
 sim/rl/train.py       | 149 ++++++++++++++++++++++++
 3 files changed, 431 insertions(+), 193 deletions(-)
 create mode 100644 sim/rl/engine.py
 create mode 100644 sim/rl/train.py

diff --git a/sim/rl/engine.py b/sim/rl/engine.py
new file mode 100644
index 0000000..6d913f3
--- /dev/null
+++ b/sim/rl/engine.py
@@ -0,0 +1,220 @@
+import numpy as np
+import pandas as pd
+from abc import ABC, abstractmethod
+from typing import Dict, Any
+from environment import BusinessLogicConstraints
+
+
+class BasePricingEngine(ABC):
+    """base interface for all pricing engines"""
+    def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
+        self.c = constraints
+        self.rng = np.random.default_rng(seed)
+        self.step_count = 0
+
+    @abstractmethod
+    def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
+        """compute new prices given current state and observation from environment
+
+        args:
+            current_prices: current price vector [N]
+            observation: dict containing 'price', 'demand', and possibly interaction data
+
+        returns:
+            new_prices: updated price vector [N]
+        """
+        pass
+
+    @abstractmethod
+    def update(obs, reward, done, info):
+        pass
+
+
+
+    def reset(self):
+        """reset engine state for new episode"""
+        self.step_count = 0
+
+
+class WildPricingEngine(BasePricingEngine):
+    """production-like pricing using online elasticity estimation via EWMA regression"""
+    def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
+        super().__init__(constraints, seed)
+        # per-product unit costs (unknown to customers; known to platform)
+        self.unit_cost = self.rng.uniform(8.0, 40.0, size=self.c.product_catelogue_size).astype(np.float32)
+        # online elasticity estimate (start moderately elastic)
+        self.e_hat = np.full((self.c.product_catelogue_size,), -1.3, dtype=np.float32)
+        # EWMA state for log-log regression
+        self.mu_logp = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
+        self.mu_logq = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
+        self.cov_pq  = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
+        self.var_p   = np.ones(self.c.product_catelogue_size, dtype=np.float32)
+        # knobs typical in production
+        self.lr = 0.08
+        self.ewma = 0.05
+        self.eps_explore = 0.03
+        self.explore_scale = 0.03
+
+    def _safe_elasticity(self, e: np.ndarray) -> np.ndarray:
+        return np.clip(e, -5.0, -1.05)
+
+    def reset(self):
+        super().reset()
+        self.e_hat = np.full((self.c.product_catelogue_size,), -1.3, dtype=np.float32)
+        self.mu_logp = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
+        self.mu_logq = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
+        self.cov_pq = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
+        self.var_p = np.ones(self.c.product_catelogue_size, dtype=np.float32)
+
+    def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
+        self.step_count += 1
+        # extract demand signal (from env observation) as proxy for sales
+        demand = observation.get('demand', np.zeros(self.c.product_catelogue_size, dtype=np.float32))
+        return self._update_from_demand(current_prices, demand)
+
+    def _update_from_demand(self, prices: np.ndarray, sold: np.ndarray) -> np.ndarray:
+        # log transforms (add 1 to handle zeros)
+        logp = np.log(np.clip(prices, 1e-3, None)).astype(np.float32)
+        logq = np.log(sold + 1.0).astype(np.float32)
+        # EWMA moments for per-product regression: logq ≈ a + e*logp
+        a = self.ewma
+        dp = logp - self.mu_logp
+        dq = logq - self.mu_logq
+        self.mu_logp = (1 - a) * self.mu_logp + a * logp
+        self.mu_logq = (1 - a) * self.mu_logq + a * logq
+        self.cov_pq = (1 - a) * self.cov_pq + a * (dp * dq)
+        self.var_p = (1 - a) * self.var_p + a * (dp * dp + 1e-6)
+        e_new = self.cov_pq / (self.var_p + 1e-6)
+        self.e_hat = self._safe_elasticity(0.9 * self.e_hat + 0.1 * e_new)
+        # profit-optimal price for isoelastic demand (if e < -1)
+        e = self.e_hat
+        p_star = self.unit_cost * (e / (e + 1.0))
+        # smooth toward p_star
+        new_prices = (1 - self.lr) * prices + self.lr * p_star
+        # exploration (small random perturbations)
+        if self.rng.random() < self.eps_explore:
+            noise = self.rng.normal(0.0, self.explore_scale, size=new_prices.shape).astype(np.float32)
+            new_prices = new_prices * (1.0 + noise)
+        # apply business guardrails (max change + bounds)
+        max_adj = self.c.max_price_adjustment
+        ratio = np.clip(new_prices / (prices + 1e-6), 1 - max_adj, 1 + max_adj)
+        new_prices = prices * ratio
+        new_prices = np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32)
+        return new_prices
+
+
+class StaticPricingEngine(BasePricingEngine):
+    """baseline: fixed prices throughout episode"""
+    def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
+        super().__init__(constraints, seed)
+        self.fixed_prices = None
+
+    def reset(self):
+        super().reset()
+        self.fixed_prices = None
+
+    def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
+        self.step_count += 1
+        if self.fixed_prices is None:
+            self.fixed_prices = current_prices.copy()
+        return self.fixed_prices.copy()
+
+
+class SimpleDemandEngine(BasePricingEngine):
+    """demand-driven pricing: increase price when demand rises, decrease when it falls"""
+    def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
+        super().__init__(constraints, seed)
+        self.prev_demand = None
+        self.lr = 0.05
+
+    def reset(self):
+        super().reset()
+        self.prev_demand = None
+
+    def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
+        self.step_count += 1
+        demand = observation.get('demand', np.zeros(self.c.product_catelogue_size, dtype=np.float32))
+        if self.prev_demand is None:
+            self.prev_demand = demand.copy()
+            return current_prices.copy()
+        # simple rule: if demand increases, raise price; if decreases, lower price
+        delta_d = demand - self.prev_demand
+        price_adj = self.lr * np.sign(delta_d) * np.abs(delta_d) / (np.abs(self.prev_demand) + 1.0)
+        new_prices = current_prices * (1.0 + price_adj)
+        self.prev_demand = demand.copy()
+        # apply constraints
+        max_adj = self.c.max_price_adjustment
+        ratio = np.clip(new_prices / (current_prices + 1e-6), 1 - max_adj, 1 + max_adj)
+        new_prices = current_prices * ratio
+        return np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32)
+
+
+class RandomWalkEngine(BasePricingEngine):
+    """random walk pricing with mean reversion"""
+    def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
+        super().__init__(constraints, seed)
+        self.target_price = None
+        self.volatility = 0.02
+
+    def reset(self):
+        super().reset()
+        self.target_price = None
+
+    def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
+        self.step_count += 1
+        if self.target_price is None:
+            self.target_price = current_prices.copy()
+        # random walk with mean reversion toward target
+        noise = self.rng.normal(0.0, self.volatility, size=current_prices.shape).astype(np.float32)
+        reversion = 0.01 * (self.target_price - current_prices)
+        new_prices = current_prices * (1.0 + noise) + reversion
+        # apply constraints
+        max_adj = self.c.max_price_adjustment
+        ratio = np.clip(new_prices / (current_prices + 1e-6), 1 - max_adj, 1 + max_adj)
+        new_prices = current_prices * ratio
+        return np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32)
+
+
+class ThompsonSamplingEngine(BasePricingEngine):
+    """bayesian bandit approach per product treating price as discrete action"""
+    def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
+        super().__init__(constraints, seed)
+        self.n_price_levels = 5
+        self.alpha = np.ones((self.c.product_catelogue_size, self.n_price_levels), dtype=np.float32)
+        self.beta = np.ones((self.c.product_catelogue_size, self.n_price_levels), dtype=np.float32)
+        self.price_grid = None
+        self.last_actions = None
+
+    def reset(self):
+        super().reset()
+        self.alpha = np.ones((self.c.product_catelogue_size, self.n_price_levels), dtype=np.float32)
+        self.beta = np.ones((self.c.product_catelogue_size, self.n_price_levels), dtype=np.float32)
+        self.price_grid = None
+        self.last_actions = None
+
+    def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
+        self.step_count += 1
+        if self.price_grid is None:
+            # define price grid per product
+            lo = current_prices * 0.7
+            hi = current_prices * 1.3
+            self.price_grid = np.linspace(lo, hi, self.n_price_levels).T
+        demand = observation.get('demand', np.zeros(self.c.product_catelogue_size, dtype=np.float32))
+        # update beliefs based on last action
+        if self.last_actions is not None:
+            for i in range(self.c.product_catelogue_size):
+                a = self.last_actions[i]
+                reward = demand[i]
+                if reward > 0.5:
+                    self.alpha[i, a] += reward
+                else:
+                    self.beta[i, a] += 1.0
+        # thompson sampling: sample from posterior, pick best
+        new_prices = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
+        actions = np.zeros(self.c.product_catelogue_size, dtype=int)
+        for i in range(self.c.product_catelogue_size):
+            theta = self.rng.beta(self.alpha[i], self.beta[i]).astype(np.float32)
+            actions[i] = int(np.argmax(theta))
+            new_prices[i] = self.price_grid[i, actions[i]]
+        self.last_actions = actions
+        return np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32)
diff --git a/sim/rl/environment.py b/sim/rl/environment.py
index 19f9ad4..fd725f8 100644
--- a/sim/rl/environment.py
+++ b/sim/rl/environment.py
@@ -1,5 +1,7 @@
+from sys import intern
 import gymnasium as gym
 from gymnasium import spaces
+from matplotlib import interactive
 import numpy as np
 from dataclasses import dataclass
 import pandas as pd
@@ -24,7 +26,7 @@ class BusinessLogicConstraints():
     coi_sigmoid_temp: float = 1.25
     base_human_demand: float = 0.08
     base_agent_demand: float = 0.05
-    human_price_elasticity: float = -1.2
+    human_price_elasticity: float = -1.2 # assumptions here
     agent_price_elasticity: float = -0.6
     w_agent_loss: float = 1.0
     w_volatility: float = 5.0
@@ -35,31 +37,25 @@ class BusinessLogicConstraints():
 def _sigmoid(x: np.ndarray) -> np.ndarray:
     return 1.0 / (1.0 + np.exp(-x))
 
-
-def simple_agent_detector(session_df: pd.DataFrame) -> pd.Series:
-    # baseline heuristic: high velocity + low conversion
-    v = session_df.get("interaction_velocity", pd.Series(0.0, index=session_df.index))
-    cr = session_df.get("conversion_rate", pd.Series(0.0, index=session_df.index))
-    total = session_df.get("total_interactions", pd.Series(0, index=session_df.index))
-    return (total >= 12) & (v >= 0.20) & (cr <= 0.01)
-
-
 class CommercePlatform:
-    def __init__(self, product_catelogue_size: int, max_price: float, min_price: float,
-                 constraints: BusinessLogicConstraints, agent_detector: Optional[Callable[[pd.DataFrame], pd.Series]] = None,
-                 use_defense: bool = False):
+    """
+    This is just an extension of the state management for the environment, it does not implement anything dynamic just helps us simulate demand.
+    """
+    def __init__(self,
+                 product_catelogue_size: int,
+                 max_price: float,
+                 min_price: float,
+                 constraints: BusinessLogicConstraints):
         self.product_catelogue_size = product_catelogue_size
+        self.product_supply = np.random.uniform(low=10, high=50, size=(self.product_catelogue_size,))
         self.max_price = max_price
         self.min_price = min_price
         self.constraints = constraints
-        self.use_defense = use_defense
-        self.agent_detector = agent_detector
         self.simulation_history: List[Dict[str, Any]] = []
         self._rng = np.random.default_rng(constraints.seed)
-        self._popularity = self._rng.lognormal(mean=0.0, sigma=0.6, size=self.product_catelogue_size)
-        self._popularity = self._popularity / (self._popularity.mean() + 1e-12)
         self._last_interaction_df: pd.DataFrame = pd.DataFrame()
 
+
     def setup_true_demand(self, prices: np.ndarray) -> Dict[str, np.ndarray]:
         # ground truth purchase propensities
         p = np.clip(prices, self.min_price, self.max_price)
@@ -67,14 +63,19 @@ class CommercePlatform:
         human_prob = self.constraints.base_human_demand * (pn ** self.constraints.human_price_elasticity)
         agent_prob = self.constraints.base_agent_demand * (pn ** self.constraints.agent_price_elasticity)
         return {
-            "human_purchase_prob": np.clip(human_prob * self._popularity, 0.0, 0.95),
-            "agent_purchase_prob": np.clip(agent_prob * self._popularity, 0.0, 0.95)
+            "human_purchase_prob": np.clip(human_prob, 0.0, 0.95),
+            "agent_purchase_prob": np.clip(agent_prob, 0.0, 0.95)
         }
 
-    def _session_markup_multiplier(self, signal_score: float) -> float:
-        # session-based COI markup based on demand signal expression
-        x = (signal_score - self.constraints.coi_threshold) / max(self.constraints.coi_sigmoid_temp, 1e-6)
-        return 1.0 + self.constraints.coi_strength * float(_sigmoid(np.array([x]))[0])
+    def _load_behavioral_profile(actor : str, demand_forcing):
+        """
+        This returns a markov chain with average weights which we get from interaction data of our experiments.
+        This defines transition probabilities between different events:
+        search -> view_item_price_binN: 0.7
+        view_item_price_binN -> add_to_cart: 0.2
+        we also must reweight with the demand_forcing vector or purchase probabilities per-product
+        """
+
 
     def _simulate_sessions(self, base_prices: np.ndarray) -> pd.DataFrame:
         demand = self.setup_true_demand(base_prices)
@@ -84,94 +85,32 @@ class CommercePlatform:
         T = self.constraints.sessions_per_step
         n_agent_sessions = int(round(T * self.constraints.agent_share))
         n_human_sessions = T - n_agent_sessions
-
-        # human sessions: normal browse with possible purchase
-        for s in range(n_human_sessions):
-            session_id = f"h_{len(events)}_{s}"
-            k = int(self._rng.integers(1, 4))
-            prod_ids = self._rng.choice(self.product_catelogue_size, size=k, replace=False)
-            t = 0.0
-            inter_times = self._rng.gamma(shape=2.0, scale=3.0, size=3 * k)
-            signal_score = 0.0
-            purchased_any = False
-
-            for i, pid in enumerate(prod_ids):
-                t += float(inter_times[i])
-                price_shown = float(base_prices[pid])
-                events.append({
-                    "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
-                    "action": "view", "t": t, "price_shown": price_shown, "is_purchase": 0,
-                    "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
-                })
-                signal_score += 1.0
-
-                if self._rng.random() < 0.35:
-                    t += float(inter_times[i + k])
-                    events.append({
-                        "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
-                        "action": "cart", "t": t, "price_shown": price_shown, "is_purchase": 0,
-                        "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
-                    })
-                    signal_score += 2.0
-
-                if (not purchased_any) and (self._rng.random() < float(human_pprob[pid])):
-                    t += float(inter_times[i + 2 * k])
-                    mult = self._session_markup_multiplier(signal_score)
-                    price_paid = float(np.clip(base_prices[pid] * mult, self.min_price, self.max_price))
-                    events.append({
-                        "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
-                        "action": "purchase", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 1,
-                        "price_paid": price_paid, "oracle_price_paid": price_paid, "signal_score": signal_score,
-                    })
-                    purchased_any = True
-
-        # agent sessions: split recon/purchase to circumvent COI
         n_agent_ids = max(1, n_agent_sessions // 2)
-        for a in range(n_agent_ids):
-            agent_id = f"a_{a}"
-            recon_session_id = f"{agent_id}_recon"
-            t = 0.0
-            n_views = int(self._rng.poisson(lam=8) * self.constraints.agent_recon_multiplier) + 5
-            inter_times = self._rng.gamma(shape=2.0, scale=0.6, size=max(n_views, 1))
-            prod_ids = self._rng.integers(0, self.product_catelogue_size, size=n_views)
-            recon_signal = 0.0
+        session_map = {
+            'humans': n_human_sessions,
+            'agents': n_agent_ids
+        }
+        pprob_map = {
+            'humans': human_pprob,
+            'agents': agent_pprob
+        }
+        joint_events = []
+        for actor, n_sessions in session_map.items():
+            bp = _load_behavioral_profile(actor, pprob_map[actor])
+            counter = 0
+            events = []
+            while counter < n_sessions:
+                session_events = []
+                while len(session_events) == 0 or session_events[-1]['action'] == 'checkout':
+                    interaction_event = bp.sample(self._rng)
+                    interaction_event['session_id'] = f'{actor}_{counter:06d}'
+                    # TODO any other assignments
+                    session_events.append(interaction_event)
+                events.extend(session_events)
+                counter += 1
+            joint_events.extend(events)
 
-            for i, pid in enumerate(prod_ids):
-                t += float(inter_times[i])
-                events.append({
-                    "session_id": recon_session_id, "actor": "agent", "agent_id": agent_id, "product_id": int(pid),
-                    "action": "view", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 0,
-                    "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
-                })
-                recon_signal += 1.0
-
-            # clean purchase session with minimal interactions
-            if self._rng.random() < self.constraints.agent_purchase_probability:
-                purchase_session_id = f"{agent_id}_clean"
-                pid = int(self._rng.integers(0, self.product_catelogue_size))
-                t2 = 0.0
-                clean_signal = 0.0
-                t2 += float(self._rng.gamma(shape=2.0, scale=0.7))
-                events.append({
-                    "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid,
-                    "action": "view", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 0,
-                    "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
-                })
-                clean_signal += 1.0
-
-                if self._rng.random() < float(agent_pprob[pid]):
-                    t2 += float(self._rng.gamma(shape=2.0, scale=0.7))
-                    obs_mult = self._session_markup_multiplier(clean_signal)
-                    obs_paid = float(np.clip(base_prices[pid] * obs_mult, self.min_price, self.max_price))
-                    oracle_mult = self._session_markup_multiplier(recon_signal)  # oracle links recon->purchase
-                    oracle_paid = float(np.clip(base_prices[pid] * oracle_mult, self.min_price, self.max_price))
-                    events.append({
-                        "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid,
-                        "action": "purchase", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 1,
-                        "price_paid": obs_paid, "oracle_price_paid": oracle_paid, "signal_score": clean_signal,
-                    })
-
-        return pd.DataFrame(events)
+        return pd.DataFrame(joint_events)
 
     def compute_interaction_features(self, interaction_df: pd.DataFrame) -> Dict[str, float]:
         if interaction_df.empty:
@@ -183,6 +122,7 @@ class CommercePlatform:
         return {"mean_sale_price": mean_sale_price, "look_to_book": float(views / (buys + 1e-6))}
 
     def _session_feature_table(self, df: pd.DataFrame) -> pd.DataFrame:
+        # TODO: adapt this
         if df.empty:
             return pd.DataFrame()
         g = df.groupby("session_id", sort=False)
@@ -208,73 +148,6 @@ class CommercePlatform:
             "is_agent": is_agent.astype(bool),
         }).reset_index()
 
-    def demand_estimate(self, interaction_df: pd.DataFrame, exclude_sessions: Optional[pd.Series] = None) -> np.ndarray:
-        # proxy demand from weighted interaction events
-        if interaction_df.empty:
-            return np.zeros(self.product_catelogue_size, dtype=np.float32)
-        df = interaction_df
-        if exclude_sessions is not None:
-            bad_sessions = set(exclude_sessions.loc[exclude_sessions].index)
-            df = df[~df["session_id"].isin(bad_sessions)]
-        weights = {"view": 0.15, "cart": 0.75, "purchase": 2.5}
-        w = df["action"].map(weights).fillna(0.0).to_numpy(dtype=float)
-        prod = df["product_id"].to_numpy(dtype=int)
-        q_hat = np.zeros(self.product_catelogue_size, dtype=float)
-        np.add.at(q_hat, prod, w)
-        return q_hat.astype(np.float32)
-
-    def run_pricing_simulation(self, prices: np.ndarray) -> Dict[str, Any]:
-        interaction_df = self._simulate_sessions(prices)
-        self._last_interaction_df = interaction_df
-        session_df = self._session_feature_table(interaction_df)
-
-        predicted_agent_sessions = None
-        if (self.use_defense and self.agent_detector is not None and not session_df.empty):
-            predicted_agent_sessions = self.agent_detector(session_df.set_index("session_id"))
-
-        q_hat_naive = self.demand_estimate(interaction_df, exclude_sessions=None)
-        q_hat_defended = self.demand_estimate(interaction_df, exclude_sessions=predicted_agent_sessions) \
-            if predicted_agent_sessions is not None else q_hat_naive.copy()
-
-        true_human = np.zeros(self.product_catelogue_size, dtype=float)
-        true_agent = np.zeros(self.product_catelogue_size, dtype=float)
-        if not interaction_df.empty:
-            purchases = interaction_df[interaction_df["action"] == "purchase"]
-            if not purchases.empty:
-                for _, r in purchases.iterrows():
-                    if r["actor"] == "human":
-                        true_human[int(r["product_id"])] += 1.0
-                    else:
-                        true_agent[int(r["product_id"])] += 1.0
-
-        revenue_observed = float(interaction_df["price_paid"].sum()) if not interaction_df.empty else 0.0
-        revenue_oracle = float(interaction_df["oracle_price_paid"].sum()) if not interaction_df.empty else 0.0
-        agent_loss = max(0.0, revenue_oracle - revenue_observed)
-
-        eps = 1e-6
-        internal_error_naive = np.abs(true_human - q_hat_naive) / (true_human + eps)
-        internal_error_def = np.abs(true_human - q_hat_defended) / (true_human + eps)
-        interaction_features = self.compute_interaction_features(interaction_df)
-
-        summary = {
-            "prices": prices.copy(),
-            "interaction_df": interaction_df,
-            "session_df": session_df,
-            "q_hat_naive": q_hat_naive,
-            "q_hat_defended": q_hat_defended,
-            "true_human_demand": true_human.astype(np.float32),
-            "true_agent_purchases": true_agent.astype(np.float32),
-            "internal_error_naive": internal_error_naive.astype(np.float32),
-            "internal_error_defended": internal_error_def.astype(np.float32),
-            "interaction_features": interaction_features,
-            "revenue_observed": revenue_observed,
-            "revenue_oracle": revenue_oracle,
-            "agent_loss": agent_loss,
-            "predicted_agent_sessions": predicted_agent_sessions,
-        }
-        self.simulation_history.append(summary)
-        return summary
-
     def get_interaction_data(self) -> np.ndarray:
         if self._last_interaction_df.empty:
             return np.array([], dtype=object)
@@ -284,7 +157,7 @@ class CommercePlatform:
 class PHANTOMEnv(gym.Env):
     metadata = {"render_modes": []}
 
-    def __init__(self, use_defense: bool = False):
+    def __init__(self, constraints):
         super().__init__()
         self.constraints = BusinessLogicConstraints()
         self.action_space = spaces.Box(low=-self.constraints.max_price_adjustment,
@@ -301,14 +174,13 @@ class PHANTOMEnv(gym.Env):
                     high=np.full((self.constraints.product_catelogue_size,), 1e6, dtype=np.float32),
                     dtype=np.float32),
             })
+            # TODO: define more features that we compute from the interaction data
         })
         self.commerce_platform = CommercePlatform(
             product_catelogue_size=self.constraints.product_catelogue_size,
             max_price=self.constraints.system_max_price,
             min_price=self.constraints.system_min_price,
-            constraints=self.constraints,
-            agent_detector=simple_agent_detector,
-            use_defense=use_defense)
+            constraints=self.constraints)
         self._rng = np.random.default_rng(self.constraints.seed)
         self.t = 0
         self._prev_prices: Optional[np.ndarray] = None
@@ -336,17 +208,13 @@ class PHANTOMEnv(gym.Env):
         new_prices = np.clip(base_prices * (1.0 + action.astype(np.float32)),
                            self.constraints.system_min_price,
                            self.constraints.system_max_price).astype(np.float32)
-        result = self.commerce_platform.run_pricing_simulation(new_prices)
-
-        if self.commerce_platform.use_defense:
-            demand_est = result["q_hat_defended"]
-            internal_err = result["internal_error_defended"]
-        else:
-            demand_est = result["q_hat_naive"]
-            internal_err = result["internal_error_naive"]
 
         self.state["elasticity"]["price"] = new_prices
-        self.state["elasticity"]["demand"] = demand_est
+        # TODO: use the commerce platform to simulate sessions
+        interactions_df = self.commerce_platform._simulate_sessions(new_prices)
+        result = self.commerce_platform.compute_interaction_features(interactions_df)
+        # TODO: implement COI computation to use in reward
+        COI = 0.0
 
         volatility = 0.0 if self._prev_prices is None else \
             float(np.mean(np.abs((new_prices - self._prev_prices) / (self._prev_prices + 1e-6))))
@@ -354,12 +222,13 @@ class PHANTOMEnv(gym.Env):
 
         revenue_observed = float(result["revenue_observed"])
         agent_loss = float(result["agent_loss"])
-        err_mean = float(np.mean(internal_err))
 
         reward = (revenue_observed
-                 - self.constraints.w_agent_loss * agent_loss
-                 - self.constraints.w_volatility * volatility
-                 - self.constraints.w_estimation_error * err_mean)
+                  - COI
+                  - self.constraints.w_agent_loss * agent_loss
+                  - self.constraints.w_volatility * volatility
+                  - self.constraints.w_estimation_error
+                  )
 
         terminated = self.t >= self.constraints.episode_length
         info = {
diff --git a/sim/rl/train.py b/sim/rl/train.py
new file mode 100644
index 0000000..41a87ab
--- /dev/null
+++ b/sim/rl/train.py
@@ -0,0 +1,149 @@
+import numpy as np
+import logging
+from pathlib import Path
+from typing import Dict, Type, Optional
+import pickle
+from torch import neg_
+from torch.utils.tensorboard import SummaryWriter
+from environment import PHANTOMEnv, FastTrainingConstraints, BusinessLogicConstraints
+from engine import (BasePricingEngine, WildPricingEngine, StaticPricingEngine,
+                   SimpleDemandEngine, RandomWalkEngine, ThompsonSamplingEngine)
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
+logger = logging.getLogger(__name__)
+
+
+
+"""
+Target training loop:
+have base prices p0 from env reset and run the env step, collect reward and metrics
+pass this to the pricing engine which computes the price action to take based on previous reward by learning
+the new action gets passed to the step
+so we alternate, step -> reward -> engine (produces price delta) -> step with price delta -> reward
+to make sure the reinforcement learning inside the engine can learn we need to have trajectory of prices
+CURRENT SOLUTION BELOW does not implement correct learning or updates.
+"""
+
+class EngineTrainer:
+    """wrapper to run pricing engines through episodes and collect metrics"""
+    def __init__(self, engine: BasePricingEngine, env: PHANTOMEnv,
+                 tb_writer: Optional[SummaryWriter] = None):
+        self.engine = engine
+        self.env = env
+        self.episode_metrics = []
+        self.tb_writer = tb_writer
+        self.global_step = 0
+
+    def train(self, n_episodes: int, seed: int = 42):
+
+        obs, _ = self.env.reset(seed=seed)
+        prices = None
+        for ep in range(n_episodes):
+            prices = self.engine.compute_prices(prices, obs
+            obs, reward, done, _, info = self.env.step(prices)
+            self.engine.update(obs, reward, done, info)
+        return self
+
+
+
+
+
+
+        return self.episode_metrics
+
+    def evaluate(self, n_episodes: int = 10, seed: int = 100) -> Dict:
+        """evaluate trained engine"""
+        results = {k: [] for k in ['total_reward', 'revenue_observed', 'revenue_oracle',
+                                   'agent_loss', 'ux_volatility', 'look_to_book']}
+        for ep in range(n_episodes):
+            metrics = self.run_episode(seed=seed + ep)
+            for k in results:                results[k].append(metrics[k])
+        return {k: (np.mean(v), np.std(v)) for k, v in results.items()}
+
+
+def make_env(fast: bool = True):
+    constraints = FastTrainingConstraints() if fast else BusinessLogicConstraints()
+    return PHANTOMEnv(constraints=constraints)
+
+
+def train_engine(engine_cls: Type[BasePricingEngine], env: PHANTOMEnv,
+                n_episodes: int, seed: int = 42,
+                tb_writer: Optional[SummaryWriter] = None) -> EngineTrainer:
+    constraints = env.constraints
+    engine = engine_cls(constraints=constraints, seed=seed)
+    trainer = EngineTrainer(engine, env, tb_writer=tb_writer)
+    trainer.train(n_episodes, seed=seed)
+    return trainer
+
+
+def save_trainer(trainer: EngineTrainer, path: Path):
+    """save engine state and metrics"""
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with open(path, 'wb') as f:
+        pickle.dump({
+            'engine': trainer.engine,
+            'metrics': trainer.episode_metrics
+        }, f)
+    logger.info(f"Saved trainer to {path}")
+
+
+def load_trainer(path: Path, env: PHANTOMEnv,
+                 tb_writer: Optional[SummaryWriter] = None) -> EngineTrainer:
+    """load saved engine"""
+    with open(path, 'rb') as f:
+        data = pickle.load(f)
+    trainer = EngineTrainer(data['engine'], env, tb_writer=tb_writer)
+    trainer.episode_metrics = data['metrics']
+    return trainer
+
+
+if __name__ == "__main__":
+    base_dir = Path("./runs")
+    base_dir.mkdir(exist_ok=True)
+
+    engines = {
+        "Wild": WildPricingEngine,
+        "Static": StaticPricingEngine,
+#        "SimpleDemand": SimpleDemandEngine,
+        "RandomWalk": RandomWalkEngine,
+        "ThompsonSampling": ThompsonSamplingEngine,
+    }
+    defenses = [False, True]
+    n_train_episodes = 50
+    n_eval_episodes = 10
+    seed = 42
+    fast_mode = True
+
+    logger.info(f"Training config: {n_train_episodes} episodes per engine, fast_mode={fast_mode}")
+
+    trained_trainers = {}
+
+    for engine_name, engine_cls in engines.items():
+        for use_defense in defenses:
+            defense_label = "defense_on" if use_defense else "defense_off"
+            run_name = f"{engine_name}_{defense_label}"
+            log_dir = base_dir / run_name
+            log_dir.mkdir(parents=True, exist_ok=True)
+
+            logger.info(f"Training {engine_name} with defense={use_defense}")
+            logger.info(f"Log directory: {log_dir}")
+
+            env = make_env(fast=fast_mode)
+            tb_writer = SummaryWriter(log_dir=str(log_dir))
+            trainer = train_engine(engine_cls, env, n_train_episodes, seed, tb_writer=tb_writer)
+            tb_writer.close()
+
+            save_path = log_dir / "trainer.pkl"
+            save_trainer(trainer, save_path)
+
+            trained_trainers[run_name] = (trainer, env)
+
+    logger.info("Starting evaluation")
+
+    for run_name, (trainer, env) in trained_trainers.items():
+        logger.info(f"Evaluating {run_name}")
+        results = trainer.evaluate(n_episodes=n_eval_episodes, seed=seed + 1000)
+        for metric, (mean, std) in results.items():
+            logger.info(f"  {metric:20s}: {mean:10.2f} ± {std:6.2f}")
+
+    logger.info(f"Results saved to: {base_dir}")

From 6a06a8af4a01a898a1897cbc9b2560044e201ee1 Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Wed, 17 Dec 2025 18:50:04 +0100
Subject: [PATCH 07/35] simple code cleanup

---
 sim/rl/engine.py | 7 +++++++
 sim/rl/train.py  | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/sim/rl/engine.py b/sim/rl/engine.py
index 6d913f3..e0caca8 100644
--- a/sim/rl/engine.py
+++ b/sim/rl/engine.py
@@ -1,3 +1,4 @@
+from os import kill
 import numpy as np
 import pandas as pd
 from abc import ABC, abstractmethod
@@ -5,6 +6,11 @@ from typing import Dict, Any
 from environment import BusinessLogicConstraints
 
 
+"""
+An angine by default should have its own demand estimation mechanism from the observed observations whihc are the computer feature.
+From these features we then follow the researc hstructure of q -> p with a testable and must be updatable mechanism.
+"""
+
 class BasePricingEngine(ABC):
     """base interface for all pricing engines"""
     def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
@@ -12,6 +18,7 @@ class BasePricingEngine(ABC):
         self.rng = np.random.default_rng(seed)
         self.step_count = 0
 
+
     @abstractmethod
     def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
         """compute new prices given current state and observation from environment
diff --git a/sim/rl/train.py b/sim/rl/train.py
index 41a87ab..ba257de 100644
--- a/sim/rl/train.py
+++ b/sim/rl/train.py
@@ -39,7 +39,7 @@ class EngineTrainer:
         obs, _ = self.env.reset(seed=seed)
         prices = None
         for ep in range(n_episodes):
-            prices = self.engine.compute_prices(prices, obs
+            prices = self.engine.compute_prices(prices, obs)
             obs, reward, done, _, info = self.env.step(prices)
             self.engine.update(obs, reward, done, info)
         return self

From ec4cf074e65478c32bea71a2f39e4eeda714a15f Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Fri, 9 Jan 2026 20:20:31 +0100
Subject: [PATCH 08/35] feature: MDP behavior mappers (unlinked)

---
 sim/rl/behavior_loader/loader.py |  63 ++++++++++++++
 sim/rl/behavior_loader/models.py | 137 +++++++++++++++++++++++++++++++
 2 files changed, 200 insertions(+)
 create mode 100644 sim/rl/behavior_loader/loader.py
 create mode 100644 sim/rl/behavior_loader/models.py

diff --git a/sim/rl/behavior_loader/loader.py b/sim/rl/behavior_loader/loader.py
new file mode 100644
index 0000000..99a1541
--- /dev/null
+++ b/sim/rl/behavior_loader/loader.py
@@ -0,0 +1,63 @@
+import os
+from pydantic import BaseModel as Base
+import json
+
+class PayloadModel(Base):
+    sessionId: str
+    experimentId: str | None
+    eventName: str
+    page: str | None
+    productId: str | None
+    metadata: dict
+    storeMode: str
+    userAgent: str
+    ts: str
+
+class ValueModel(Base):
+    payload: PayloadModel
+    encoding: str
+    isPayloadNull: bool
+    schemaId: int
+    size: int
+
+class InteractionModel(Base):
+    partitionID: int
+    offset: int
+    timestamp: int
+    compression: str
+    isTransactional: bool
+    headers: list
+    key: dict
+    value: ValueModel
+
+class Loader:
+    def __init__(self, src_dir: str):
+        self.src_dir = src_dir
+        self.entries = os.listdir(src_dir)
+        if not self.entries: raise ValueError("empty directory")
+        self.data = self._load_sessions()
+
+    def _is_admin_page(self, interaction: InteractionModel) -> bool:
+        page = interaction.value.payload.page
+        return page and page.startswith("/admin/")
+
+    def _load_sessions(self) -> dict:
+        sessions = {}
+        for entry in self.entries:
+            int_path = f"{self.src_dir}/{entry}/int.json"
+            raw = json.load(open(int_path))
+            ints = [InteractionModel(**i) for i in raw]
+            sessions[entry] = [i for i in ints if not self._is_admin_page(i)]
+        return sessions
+
+    def get_data(self) -> dict:
+        return self.data
+
+    def get_entries(self) -> tuple[list[str], int]:
+        return self.entries, len(self.entries)
+
+if __name__ == "__main__":
+    DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/"
+    loader = Loader(DIR)
+    _, n = loader.get_entries()
+    print(f"Loaded {n} sessions from {DIR}")
diff --git a/sim/rl/behavior_loader/models.py b/sim/rl/behavior_loader/models.py
new file mode 100644
index 0000000..f8e92b7
--- /dev/null
+++ b/sim/rl/behavior_loader/models.py
@@ -0,0 +1,137 @@
+from loader import Loader
+from collections import defaultdict
+from typing import Dict, List, Tuple, Set
+import numpy as np
+import graphviz
+
+DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/"
+
+class BehaviorModel:
+    def __init__(self, src_dir: str = DIR):
+        self.loader = Loader(src_dir)
+        self.data = self.loader.get_data()
+        self.entries, self.num_entries = self.loader.get_entries()
+        self.mdp = None
+
+    def _state_repr(self, evt) -> str:
+        p = evt.value.payload
+        return f"{p.page or 'unk'}|{p.productId or 'none'}|{p.eventName}"
+
+    def _extract_sessions(self):
+        # transform raw events into sequential state trajectories per session
+        trajectories = []
+        for sid, evts in self.data.items():
+            if len(evts) < 2: continue
+            states = [self._state_repr(e) for e in sorted(evts, key=lambda x: x.timestamp)]
+            trajectories.append(states)
+        return trajectories
+
+    def _calc_transitions(self, trajectories: List[List[str]]) -> Tuple[Dict, Set]:
+        trans = defaultdict(lambda: defaultdict(int))
+        states = set()
+        for traj in trajectories:
+            for i in range(len(traj) - 1):
+                s, s_next = traj[i], traj[i+1]
+                trans[s][s_next] += 1
+                states.update([s, s_next])
+        return trans, states
+
+    def _calc_rewards(self, trajectories: List[List[str]]) -> Dict:
+        # reward based on session progression depth
+        rwd = defaultdict(list)
+        for traj in trajectories:
+            n = len(traj)
+            for i, s in enumerate(traj):
+                rwd[s].append(i / n)
+        return rwd
+
+    def _normalize_trans(self, counts: Dict) -> Dict:
+        return {s: {s_n: cnt/sum(nxt.values()) for s_n, cnt in nxt.items()}
+                for s, nxt in counts.items()}
+
+    def build_MDP(self) -> Dict:
+        trajs = self._extract_sessions()
+        trans_cnt, states = self._calc_transitions(trajs)
+        trans_prob = self._normalize_trans(trans_cnt)
+        state_rwd = self._calc_rewards(trajs)
+        state_val = {s: np.mean(r) for s, r in state_rwd.items()}
+
+        self.mdp = {
+            'states': sorted(list(states)),
+            'num_states': len(states),
+            'transitions': trans_prob,
+            'state_values': state_val,
+            'state_rewards': state_rwd,
+            'trans_counts': trans_cnt,
+        }
+        return self.mdp
+
+    def transition_prob(self, s: str, s_next: str) -> float:
+        if not self.mdp: raise ValueError("build MDP first")
+        return self.mdp['transitions'].get(s, {}).get(s_next, 0.0)
+
+    def state_value(self, s: str) -> float:
+        if not self.mdp: raise ValueError("build MDP first")
+        return self.mdp['state_values'].get(s, 0.0)
+
+    def sample_traj(self, start: str, max_len: int = 50) -> List[str]:
+        if not self.mdp: raise ValueError("build MDP first")
+        path = [start]
+        curr = start
+        for _ in range(max_len):
+            nxt = self.mdp['transitions'].get(curr, {})
+            if not nxt: break
+            curr = np.random.choice(list(nxt.keys()), p=list(nxt.values()))
+            path.append(curr)
+        return path
+
+def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = "mdp_graph", fmt: str = "svg", view: bool = False):
+    """visualize MDP as directed graph using graphviz, aggregated by event type"""
+    if not model.mdp: raise ValueError("build MDP first")
+
+    # aggregate transitions by event type
+    evt_trans = defaultdict(lambda: defaultdict(float))
+    for s, trans in model.mdp['transitions'].items():
+        evt_src = s.split('|')[2]
+        for s_next, prob in trans.items():
+            evt_dst = s_next.split('|')[2]
+            evt_trans[evt_src][evt_dst] += prob
+
+    # normalize aggregated transitions
+    for evt_src in evt_trans:
+        total = sum(evt_trans[evt_src].values())
+        if total > 0:
+            for evt_dst in evt_trans[evt_src]:
+                evt_trans[evt_src][evt_dst] /= total
+
+    g = graphviz.Digraph(format=fmt)
+    g.attr(rankdir='LR', size='30')
+    g.attr('node', shape='circle', width='1', height='1')
+
+    # collect all event types
+    events = set(evt_trans.keys())
+    for trans in evt_trans.values():
+        events.update(trans.keys())
+
+    # add nodes for each event type
+    for evt in events:
+        g.node(evt)
+
+    # add edges above threshold
+    for evt_src in evt_trans:
+        for evt_dst, prob in evt_trans[evt_src].items():
+            if prob > threshold:
+                g.edge(evt_src, evt_dst, label=f'{prob:.2f}')
+
+    g.render(output, view=view, cleanup=True)
+    print(f"Saved MDP graph to {output}.{fmt}")
+    return g
+
+if __name__ == "__main__":
+    model = BehaviorModel(DIR)
+    mdp = model.build_MDP()
+    print(f"Built MDP: {mdp['num_states']} states, {sum(len(t) for t in mdp['transitions'].values())} transitions")
+    if not mdp['states']:
+        print("No states found")
+        exit(1)
+    visualize_mdp(model, threshold=0.05, output="mdp_viz", fmt="svg")

From 131323ef56984229063ce1efca763615f51cb5d0 Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Sat, 10 Jan 2026 10:33:56 +0100
Subject: [PATCH 09/35] featuer: dot exporter

---
 sim/rl/behavior_loader/models.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/sim/rl/behavior_loader/models.py b/sim/rl/behavior_loader/models.py
index f8e92b7..6e4201e 100644
--- a/sim/rl/behavior_loader/models.py
+++ b/sim/rl/behavior_loader/models.py
@@ -85,7 +85,7 @@ class BehaviorModel:
             path.append(curr)
         return path
 
-def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = "mdp_graph", fmt: str = "svg", view: bool = False):
+def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = "mdp_graph", fmt: str = "svg", view: bool = False, export_dot: bool = False):
     """visualize MDP as directed graph using graphviz, aggregated by event type"""
     if not model.mdp: raise ValueError("build MDP first")
 
@@ -125,6 +125,13 @@ def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = "
 
     g.render(output, view=view, cleanup=True)
     print(f"Saved MDP graph to {output}.{fmt}")
+
+    if export_dot:
+        dot_file = f"{output}.dot"
+        with open(dot_file, 'w') as f:
+            f.write(g.source)
+        print(f"Exported DOT source to {dot_file}")
+
     return g
 
 if __name__ == "__main__":
@@ -134,4 +141,4 @@ if __name__ == "__main__":
     if not mdp['states']:
         print("No states found")
         exit(1)
-    visualize_mdp(model, threshold=0.05, output="mdp_viz", fmt="svg")
+    visualize_mdp(model, threshold=0.05, output="mdp_viz", fmt="svg", export_dot=True)

From f9bf3de71eb691c9182c46f091ffe25b80d402de Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Sat, 10 Jan 2026 11:48:03 +0100
Subject: [PATCH 10/35] pdf rendering

---
 sim/rl/behavior_loader/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sim/rl/behavior_loader/models.py b/sim/rl/behavior_loader/models.py
index 6e4201e..bce2429 100644
--- a/sim/rl/behavior_loader/models.py
+++ b/sim/rl/behavior_loader/models.py
@@ -141,4 +141,4 @@ if __name__ == "__main__":
     if not mdp['states']:
         print("No states found")
         exit(1)
-    visualize_mdp(model, threshold=0.05, output="mdp_viz", fmt="svg", export_dot=True)
+    visualize_mdp(model, threshold=0.05, output="mdp_viz", fmt="pdf", export_dot=True)

From 8b429b7a8e5a7c1e671c62afa34b013358be4208 Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Mon, 12 Jan 2026 10:09:55 +0100
Subject: [PATCH 11/35] chore: refactor to better map end to end

---
 backend/provider/app.py                 | 71 ++++++++++++-------------
 experiments/procesing/pricers/simple.py | 59 +++++++++++++++++---
 experiments/procesing/steps/session.py  |  1 +
 lib/model_registry.py                   | 46 ++++++++++++++++
 web/src/app/api/pricing/route.ts        | 37 +++++++------
 5 files changed, 153 insertions(+), 61 deletions(-)

diff --git a/backend/provider/app.py b/backend/provider/app.py
index fb72a9d..6f9a55d 100644
--- a/backend/provider/app.py
+++ b/backend/provider/app.py
@@ -47,53 +47,52 @@ def health() -> dict:
 
 @app.get("/api/{mode}/price/{productId}", response_model=PriceResponse)
 def get_price(mode: Literal['hotel', 'airline'], productId: str, sessionId: Optional[str] = Query(None), experimentId: Optional[str] = Query(None)):
+    """
+    THIS is the fast lookup service (mechanism).
+    Priority: session-keyed price > global optimal price > base price
+    """
     product = supabase.table(f'{mode}_products').select("metadata").eq('id', productId).execute().data[0]
     if not product: raise HTTPException(404, f"Product {productId} not found")
 
     metadata = product['metadata']
     base_price = metadata.get('base_price', 100.0)
 
-    # fetch pre-computed prices from registry
+    # PRIORITY 1: session-aware price (computed by Airflow worker)
+    if sessionId:
+        session_price = registry.get_session_price(sessionId, productId)
+        if session_price is not None:
+            return PriceResponse(
+                productId=productId,
+                price=session_price,
+                base_price=base_price,
+                markup=session_price/base_price,
+                elasticity=None,
+                model_version='session-aware'
+            )
+
+    # PRIORITY 2: global pre-computed prices (surge pricing)
     prices_df = registry.get_prices('latest')
-    elasticity_df = registry.get_elasticity('latest')
-
-    if prices_df is None:
-        # fallback: no pre-computed prices available
-        return PriceResponse(
-            productId=productId,
-            price=base_price,
-            base_price=base_price,
-            markup=1.0,
-            elasticity=None
-        )
-
-    # lookup pre-computed price for this product
-    product_price_row = prices_df[prices_df['productId'] == productId]
-    if product_price_row.empty:
-        # product not in pre-computed prices, fallback to base
-        return PriceResponse(
-            productId=productId,
-            price=base_price,
-            base_price=base_price,
-            markup=1.0,
-            elasticity=None
-        )
-
-    optimal_price = float(product_price_row['optimal_price'].iloc[0]) # TODO: use optimal_price everywhere as  aresult
-
-    # get elasticity if available
-    product_elasticity = None
-    if elasticity_df is not None:
-        product_elasticity_row = elasticity_df[elasticity_df['productId'] == productId]
-        if not product_elasticity_row.empty:
-            product_elasticity = float(product_elasticity_row['elasticity'].iloc[0])
+    if prices_df is not None:
+        product_price_row = prices_df[prices_df['productId'] == productId]
+        if not product_price_row.empty:
+            optimal_price = float(product_price_row['optimal_price'].iloc[0])
+            return PriceResponse(
+                productId=productId,
+                price=optimal_price,
+                base_price=base_price,
+                markup=optimal_price/base_price,
+                elasticity=None,
+                model_version='surge'
+            )
 
+    # PRIORITY 3: fallback to base price
     return PriceResponse(
         productId=productId,
-        price=optimal_price,
+        price=base_price,
         base_price=base_price,
-        markup=optimal_price/base_price,
-        elasticity=product_elasticity
+        markup=1.0,
+        elasticity=None,
+        model_version='base'
     )
 
 @app.get("/models")
diff --git a/experiments/procesing/pricers/simple.py b/experiments/procesing/pricers/simple.py
index 39be37a..6bdd1ca 100644
--- a/experiments/procesing/pricers/simple.py
+++ b/experiments/procesing/pricers/simple.py
@@ -3,6 +3,46 @@ import pandas as pd
 from procesing.pricers.base import PricingFunction
 
 
+def session_features_to_demand(session_features: pd.DataFrame) -> float:
+    """
+    Map session behavioral features to demand proxy.
+    THIS is the critical θ̂ → D transformation for rule-based pricing.
+
+    Logic:
+      - High velocity → agent behavior → price up (revenue recovery)
+      - High cart ratio → purchase intent → price up
+      - Low activity → discount to convert
+
+    Returns: demand proxy score (0-20 range, higher = more demand)
+    """
+    if session_features.empty:
+        return 1.0
+
+    feat = session_features.iloc[0] if len(session_features) > 0 else {}
+
+    velocity = feat.get('interaction_velocity', 0)
+    cart_ratio = feat.get('cart_to_view_ratio', 0)
+    item_views = feat.get('item_views', 0)
+    cart_adds = feat.get('cart_adds', 0)
+
+    # baseline demand
+    demand = 1.0
+
+    # agent detection: high velocity → treat as high "demand" to price up
+    if velocity > 2.0:
+        demand += 10.0  # strong agent signal
+
+    # conversion intent: cart interaction → price up
+    if cart_ratio > 0.1 or cart_adds > 0:
+        demand += 5.0
+
+    # browsing depth: many views → interest signal
+    if item_views > 3:
+        demand += min(item_views, 5.0)
+
+    return min(demand, 20.0)  # cap at 20
+
+
 class StaticPricer(PricingFunction):
     """Static pricing: always return fixed base prices"""
 
@@ -67,21 +107,24 @@ class SimpleSurgePricer(PricingFunction):
         self.surge_multiplier = surge_multiplier
         self.discount_multiplier = discount_multiplier
 
-    def fit(self, market_data : pd.DataFrame):
+    def fit(self, market_data: pd.DataFrame):
         """Extract base prices from product catalog or historical averages"""
         self.base_prices = market_data['base_price'].to_numpy() if 'base_price' in market_data.columns else market_data['price'].values
-        self.demand_history = market_data['demand'].to_numpy() if 'demand' in market_data.columns else np.zeros_like(self.base_prices)
+        return self
 
-    def predict(self) -> np.ndarray:
+    def predict(self, state_space) -> np.ndarray:
         """
         Adjust prices based on current demand using surge rules.
-        state_space.demand: demand counts per product
-        state_space.prices: current prices (fallback if base_prices not set)
+        state_space.demand: demand proxy per product (from session features)
+        state_space.prices: base prices
         """
-        current_prices = self.base_prices if self.base_prices is not None else np.ones_like(demand_vector) * 99.99
-        demand = self.demand_history if self.demand_history is not None else np.zeros_like(current_prices)
-        new_prices = current_prices.copy()
+        demand = np.asarray(state_space.demand) if state_space and hasattr(state_space, 'demand') else np.array([0])
+        base = np.asarray(state_space.prices) if state_space and hasattr(state_space, 'prices') else self.base_prices
 
+        if base is None:
+            base = np.ones(len(demand)) * 99.99
+
+        new_prices = base.copy()
         high_mask = demand >= self.high_threshold
         new_prices[high_mask] *= self.surge_multiplier
 
diff --git a/experiments/procesing/steps/session.py b/experiments/procesing/steps/session.py
index 4b950aa..ec6f27c 100644
--- a/experiments/procesing/steps/session.py
+++ b/experiments/procesing/steps/session.py
@@ -135,6 +135,7 @@ class ExtractSessionFeaturesStep(BaseContextStep):
     Vectorized session feature extraction - replaces O(n^2) per-row loop.
     Input: interactions_df
     Output: session-level feature matrix
+    THIS is our main mapping from tau (trajectory) to some features vector theta - we need to do this very well. This is what will go into demand esimation.
     """
 
     def transform(self, X: pd.DataFrame) -> pd.DataFrame:
diff --git a/lib/model_registry.py b/lib/model_registry.py
index 92d7934..e833a1a 100755
--- a/lib/model_registry.py
+++ b/lib/model_registry.py
@@ -178,3 +178,49 @@ class ModelRegistry:
             return True
         except:
             return False
+
+    def set_session_prices(self, session_id: str, prices: Dict[str, float], ttl: int = 1800):
+        """
+        Store prices for a specific session.
+        THIS is the write path for session-aware pricing.
+
+        Args:
+            session_id: session identifier
+            prices: dict of {productId: price}
+            ttl: time-to-live in seconds (default 30min)
+        """
+        if not prices:
+            return
+
+        key = f"session:{session_id}:prices"
+        # use Redis hash for O(1) lookup per product
+        self.redis_client.hset(key, mapping={k: str(v) for k, v in prices.items()})
+        self.redis_client.expire(key, ttl)
+
+    def get_session_price(self, session_id: str, product_id: str) -> Optional[float]:
+        """
+        Lookup price for (sessionId, productId).
+        THIS is the read path for fast provider lookup.
+
+        Returns: price or None if not found
+        """
+        key = f"session:{session_id}:prices"
+        price_str = self.redis_client.hget(key, product_id)
+
+        if price_str is None:
+            return None
+
+        return float(price_str.decode('utf-8') if isinstance(price_str, bytes) else price_str)
+
+    def get_session_all_prices(self, session_id: str) -> Dict[str, float]:
+        """Get all prices for a session."""
+        key = f"session:{session_id}:prices"
+        prices_raw = self.redis_client.hgetall(key)
+
+        if not prices_raw:
+            return {}
+
+        return {
+            (k.decode('utf-8') if isinstance(k, bytes) else k): float(v.decode('utf-8') if isinstance(v, bytes) else v)
+            for k, v in prices_raw.items()
+        }
diff --git a/web/src/app/api/pricing/route.ts b/web/src/app/api/pricing/route.ts
index 1aec75b..6532131 100644
--- a/web/src/app/api/pricing/route.ts
+++ b/web/src/app/api/pricing/route.ts
@@ -30,6 +30,8 @@ export async function GET(req: NextRequest) {
     const providerUrl = process.env.PRICING_PROVIDER_URL || 'http://localhost:5001';
     try {
         const queryParams = new URLSearchParams();
+        // THIS is our entry point into the dynamic pricing where we reference the context of the sesion and experiment and ask for a price to assign to the trajectory which is expressed
+        // The whole pipeline gets triggered from here.
         if (sessionId) queryParams.append('sessionId', sessionId);
         if (experimentId) queryParams.append('experimentId', experimentId);
 
@@ -55,25 +57,26 @@ export async function GET(req: NextRequest) {
         price = Math.round(randomBase * 100) / 100;
     }
 
-    // log price to kafka for elasticity computation
+    // log price to kafka asynchronously (non-blocking)
     if (sessionId) {
         const backendUrl = process.env.BACKEND_URL || 'http://localhost:5000';
-        try {
-            await fetch(`${backendUrl}/api/kafka/price-log`, {
-                method: 'POST',
-                headers: { 'Content-Type': 'application/json' },
-                body: JSON.stringify({
-                    productId,
-                    price,
-                    sessionId,
-                    experimentId: experimentId || undefined,
-                    storeMode,
-                    ts: timestamp,
-                }),
-            });
-        } catch (err) {
-            console.error('[price-log-error]', err);
-        }
+        // fire and forget - don't await to avoid blocking response
+        fetch(`${backendUrl}/api/kafka/price-log`, {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({
+                productId,
+                price,
+                sessionId,
+                experimentId: experimentId || undefined,
+                storeMode,
+                ts: timestamp,
+            }),
+        }).catch(err => {
+            if (process.env.NODE_ENV === 'development') {
+                console.error('[price-log-error]', err);
+            }
+        });
     }
 
     if (process.env.NODE_ENV === 'development') {

From 62a4008c29f94b753089de69ba84bfb6eea8a0cc Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Mon, 12 Jan 2026 13:37:48 +0100
Subject: [PATCH 12/35] feat: integration of pipeline hooks into testing

---
 Makefile                                      |  2 ++
 backend/server/app.py                         |  6 ++++-
 docker-compose.yml                            |  2 ++
 .../airflow/dags/surge_pricing_pipeline.py    | 24 +++++++++++++++----
 experiments/procesing/pricers/simple.py       |  3 ++-
 tests/e2e/helpers/kafka.ts                    |  4 ++--
 tests/e2e/playwright.config.ts                |  4 ++--
 tests/e2e/scenarios/session-aware.spec.ts     | 21 ++++++++++------
 tests/e2e/scenarios/surge-pricing.spec.ts     | 11 +++++++--
 9 files changed, 58 insertions(+), 19 deletions(-)

diff --git a/Makefile b/Makefile
index 0c51bb3..879afb5 100644
--- a/Makefile
+++ b/Makefile
@@ -49,8 +49,10 @@ test.backend: $(VENV)
 test.e2e:
 	@cd tests/e2e && npm install
 	@cd tests/e2e && npx playwright install chromium
+	@test -f tests/e2e/.env || cp tests/e2e/.env.example tests/e2e/.env
 	@timeout 30 bash -c 'until curl -sf http://localhost:5000/health > /dev/null 2>&1; do sleep 1; done' || (echo "Backend not ready" && exit 1)
 	@timeout 30 bash -c 'until curl -sf http://localhost:3000 > /dev/null 2>&1; do sleep 1; done' || (echo "Web app not ready" && exit 1)
+	@timeout 30 bash -c 'until curl -sf http://localhost:8085/health > /dev/null 2>&1; do sleep 1; done' || (echo "Airflow not ready" && exit 1)
 	@cd tests/e2e && npm test
 
 .PHONY: test.all
diff --git a/backend/server/app.py b/backend/server/app.py
index d338408..f100811 100644
--- a/backend/server/app.py
+++ b/backend/server/app.py
@@ -198,12 +198,16 @@ def dump_logs(
             auto_offset_reset='earliest',
             enable_auto_commit=False,
             value_deserializer=lambda x: json.loads(x.decode('utf-8')),
-            consumer_timeout_ms=5000
+            consumer_timeout_ms=30000,
+            fetch_max_wait_ms=10000,
+            max_poll_records=1000
         )
 
         events = []
         for msg in consumer:
             events.append(msg.value)
+            if last_n and len(events) >= last_n * 2:
+                break
 
         consumer.close()
 
diff --git a/docker-compose.yml b/docker-compose.yml
index f72f415..561c393 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -144,6 +144,7 @@ services:
       - AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true
       - AIRFLOW__WEBSERVER__EXPOSE_CONFIG=true
       - AIRFLOW__WEBSERVER__SECRET_KEY=${AIRFLOW_SECRET_KEY}
+      - AIRFLOW__API__AUTH_BACKENDS=airflow.api.auth.backend.basic_auth
       - KAFKA_HOST=kafka
       - KAFKA_PORT=29092
       - BACKEND_URL=http://backend:5000
@@ -180,6 +181,7 @@ services:
       - AIRFLOW__CORE__LOAD_EXAMPLES=false
       - AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true
       - AIRFLOW__WEBSERVER__SECRET_KEY=${AIRFLOW_SECRET_KEY}
+      - AIRFLOW__API__AUTH_BACKENDS=airflow.api.auth.backend.basic_auth
       - KAFKA_HOST=kafka
       - KAFKA_PORT=29092
       - BACKEND_URL=http://backend:5000
diff --git a/experiments/airflow/dags/surge_pricing_pipeline.py b/experiments/airflow/dags/surge_pricing_pipeline.py
index b1d7c61..1a3b3d0 100644
--- a/experiments/airflow/dags/surge_pricing_pipeline.py
+++ b/experiments/airflow/dags/surge_pricing_pipeline.py
@@ -120,15 +120,31 @@ def apply_surge_pricing(**kwargs):
     # rename demand_score to demand for pricer compatibility
     data = product_features.rename(columns={'demand_score': 'demand'})
 
+    high_thresh = dag_conf.get('high_threshold', 10)
+    low_thresh = dag_conf.get('low_threshold', 2)
+    surge_mult = dag_conf.get('surge_multiplier', 1.2)
+    discount_mult = dag_conf.get('discount_multiplier', 0.9)
+
+    logging.info(f"Surge pricing config: high_thresh={high_thresh}, low_thresh={low_thresh}, surge_mult={surge_mult}, discount_mult={discount_mult}")
+    logging.info(f"Demand stats: min={data['demand'].min():.2f}, max={data['demand'].max():.2f}, mean={data['demand'].mean():.2f}")
+    logging.info(f"Products with high demand (>={high_thresh}): {(data['demand'] >= high_thresh).sum()}")
+    logging.info(f"Products with low demand (<={low_thresh}): {(data['demand'] <= low_thresh).sum()}")
+
     surge_pricer = SimpleSurgePricer(
-        high_threshold=dag_conf.get('high_threshold', 10),
-        low_threshold=dag_conf.get('low_threshold', 2),
-        surge_multiplier=dag_conf.get('surge_multiplier', 1.2),
-        discount_multiplier=dag_conf.get('discount_multiplier', 0.9)
+        high_threshold=high_thresh,
+        low_threshold=low_thresh,
+        surge_multiplier=surge_mult,
+        discount_multiplier=discount_mult
     )
     surge_pricer.fit(data)
     data['optimal_price'] = surge_pricer.predict()
 
+    base_avg = data['base_price'].mean()
+    optimal_avg = data['optimal_price'].mean()
+    price_change_pct = ((optimal_avg - base_avg) / base_avg) * 100
+
+    logging.info(f"Price adjustment: base_avg={base_avg:.2f}, optimal_avg={optimal_avg:.2f}, change={price_change_pct:+.1f}%")
+
     prices_df = data[['productId', 'price', 'base_price', 'optimal_price', 'demand']].rename(columns={
         'price': 'current_price',
         'demand': 'demand_score'
diff --git a/experiments/procesing/pricers/simple.py b/experiments/procesing/pricers/simple.py
index 6bdd1ca..1a03f9f 100644
--- a/experiments/procesing/pricers/simple.py
+++ b/experiments/procesing/pricers/simple.py
@@ -124,7 +124,8 @@ class SimpleSurgePricer(PricingFunction):
         if base is None:
             base = np.ones(len(demand)) * 99.99
 
-        new_prices = base.copy()
+        # ensure float dtype to allow multiplication by float multipliers
+        new_prices = base.astype(np.float64).copy()
         high_mask = demand >= self.high_threshold
         new_prices[high_mask] *= self.surge_multiplier
 
diff --git a/tests/e2e/helpers/kafka.ts b/tests/e2e/helpers/kafka.ts
index c0a95dd..18b977d 100644
--- a/tests/e2e/helpers/kafka.ts
+++ b/tests/e2e/helpers/kafka.ts
@@ -9,8 +9,8 @@ interface InteractionEvent {
 const dumpKafkaTopic = async (backendUrl: string, topic: string) => {
   const resp = await fetch(`${backendUrl}/api/kafka/dump?topic=${topic}`);
   if (!resp.ok) throw new Error(`Kafka dump failed: ${resp.status}`);
-  const { messages = [] } = await resp.json();
-  return messages as any[];
+  const { data = [] } = await resp.json();
+  return data as any[];
 };
 
 export const waitForInteractionEvent = async (
diff --git a/tests/e2e/playwright.config.ts b/tests/e2e/playwright.config.ts
index 54a5561..dc3c815 100644
--- a/tests/e2e/playwright.config.ts
+++ b/tests/e2e/playwright.config.ts
@@ -5,14 +5,14 @@ export default defineConfig({
   fullyParallel: true,
   forbidOnly: !!process.env.CI,
   retries: 0,
-  workers: 5,
+  workers: 1,
   reporter: 'list',
   use: {
     baseURL: process.env.WEB_URL || 'http://localhost:3000',
     trace: 'retain-on-failure',
     screenshot: 'only-on-failure',
   },
-  timeout: 60000,
+  timeout: 180000,
   expect: {
     timeout: 10000,
   },
diff --git a/tests/e2e/scenarios/session-aware.spec.ts b/tests/e2e/scenarios/session-aware.spec.ts
index b204984..5c27747 100644
--- a/tests/e2e/scenarios/session-aware.spec.ts
+++ b/tests/e2e/scenarios/session-aware.spec.ts
@@ -9,6 +9,7 @@ import {
   addToCart,
 } from '../helpers/interactions';
 import { getSessionEvents } from '../helpers/kafka';
+import { runSessionPricing } from '../helpers/airflow';
 
 test.describe('SessionAwarePricer E2E', () => {
   const STORE_TYPE = 'hotel';
@@ -23,6 +24,9 @@ test.describe('SessionAwarePricer E2E', () => {
     await page.waitForTimeout(1500);
 
     const productId2 = await humanLikeViewProduct(page, STORE_TYPE);
+
+    await runSessionPricing(STORE_TYPE);
+
     const secondPrice = await getPriceFromDOM(page);
     expect(await verifySessionConsistency(page, sessionId)).toBeTruthy();
 
@@ -40,11 +44,13 @@ test.describe('SessionAwarePricer E2E', () => {
     await rapidViewProductViaFlow(page, 8, 100, STORE_TYPE);
     expect(await verifySessionConsistency(page, sessionId)).toBeTruthy();
 
-    await page.waitForTimeout(2500);
+    await page.waitForTimeout(1000);
 
     const events = await getSessionEvents(backendUrl, sessionId);
     expect(events.length).toBeGreaterThanOrEqual(8);
 
+    await runSessionPricing(STORE_TYPE);
+
     await page.goto(`/products/${productId}`);
     await page.waitForLoadState('networkidle');
     const agentPrice = await getPriceFromDOM(page);
@@ -59,14 +65,12 @@ test.describe('SessionAwarePricer E2E', () => {
     const productId = await viewProductViaFlow(page, STORE_TYPE);
     const baselinePrice = await getPriceFromDOM(page);
 
-    const startTime = Date.now();
     await rapidViewProductViaFlow(page, 10, 80, STORE_TYPE);
-    const duration = (Date.now() - startTime) / 1000;
 
-    const eventsPerSec = 10 / duration;
-    expect(eventsPerSec).toBeGreaterThan(2.0);
+    const events = await getSessionEvents(backendUrl, sessionId);
+    expect(events.length).toBeGreaterThanOrEqual(10);
 
-    await page.waitForTimeout(2000);
+    await runSessionPricing(STORE_TYPE);
 
     await page.goto(`/products/${productId}`);
     await page.waitForLoadState('networkidle');
@@ -105,8 +109,11 @@ test.describe('SessionAwarePricer E2E', () => {
 
     await rapidViewProductViaFlow(page, 2, 150, STORE_TYPE);
 
-    await page.waitForTimeout(1500);
+    await page.waitForTimeout(1000);
     await humanLikeViewProduct(page, STORE_TYPE);
+
+    await runSessionPricing(STORE_TYPE);
+
     const finalPrice = await getPriceFromDOM(page);
 
     expect(Math.abs(finalPrice - baselinePrice) / baselinePrice).toBeLessThan(0.3);
diff --git a/tests/e2e/scenarios/surge-pricing.spec.ts b/tests/e2e/scenarios/surge-pricing.spec.ts
index e3e2f8d..26d29d3 100644
--- a/tests/e2e/scenarios/surge-pricing.spec.ts
+++ b/tests/e2e/scenarios/surge-pricing.spec.ts
@@ -7,6 +7,7 @@ import {
   verifySessionConsistency,
 } from '../helpers/interactions';
 import { waitForInteractionEvent, countProductViews } from '../helpers/kafka';
+import { runSurgePricing } from '../helpers/airflow';
 
 test.describe('SimpleSurgePricer E2E', () => {
   const STORE_TYPE = 'hotel';
@@ -29,7 +30,7 @@ test.describe('SimpleSurgePricer E2E', () => {
 
     await rapidViewProductViaFlow(page, 5, 200, STORE_TYPE);
 
-    await page.waitForTimeout(2000);
+    await page.waitForTimeout(1000);
 
     const evt = await waitForInteractionEvent(backendUrl, sessionId, 'view_item_page');
     expect(evt).not.toBeNull();
@@ -37,6 +38,8 @@ test.describe('SimpleSurgePricer E2E', () => {
     const viewCount = await countProductViews(backendUrl, productId);
     expect(viewCount).toBeGreaterThanOrEqual(5);
 
+    await runSurgePricing(STORE_TYPE, 3, 1);
+
     await page.goto(`/products/${productId}`);
     await page.waitForLoadState('networkidle');
     const surgedPrice = await getPriceFromDOM(page);
@@ -72,7 +75,9 @@ test.describe('SimpleSurgePricer E2E', () => {
 
     await rapidViewProductViaFlow(page, 5, 150, STORE_TYPE);
 
-    await page.waitForTimeout(1500);
+    await page.waitForTimeout(1000);
+
+    await runSurgePricing(STORE_TYPE, 3, 1);
 
     await page.goto(`/products/${productId}`);
     await page.waitForLoadState('networkidle');
@@ -81,6 +86,8 @@ test.describe('SimpleSurgePricer E2E', () => {
 
     await page.waitForTimeout(12000);
 
+    await runSurgePricing(STORE_TYPE, 3, 1);
+
     await page.goto(`/products/${productId}`);
     await page.waitForLoadState('networkidle');
     const decayedPrice = await getPriceFromDOM(page);

From e89cb263d49375f0b0a628810d41c86617ae5386 Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Mon, 12 Jan 2026 20:59:09 +0100
Subject: [PATCH 13/35] planning

---
 .../airflow/dags/surge_pricing_factory.py     | 10 +++++++
 experiments/procesing/pricers/base.py         | 29 +++++++++----------
 2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/experiments/airflow/dags/surge_pricing_factory.py b/experiments/airflow/dags/surge_pricing_factory.py
index a886d5b..b61e65c 100644
--- a/experiments/airflow/dags/surge_pricing_factory.py
+++ b/experiments/airflow/dags/surge_pricing_factory.py
@@ -1,3 +1,4 @@
+from pandas.core.algorithms import factorize_array
 from airflow import DAG
 from airflow.operators.python import PythonOperator
 from airflow.utils.dates import days_ago
@@ -208,3 +209,12 @@ def create_surge_pricing_dag(store_mode: str) -> DAG:
 # instantiate DAGs for Airflow to discover
 dag_airline = create_surge_pricing_dag('airline')
 dag_hotel = create_surge_pricing_dag('hotel')
+
+# TODO: Refactor this factory from a surge pricing factory to a general pricing factory
+# We will do this by passing a pricing strategy class to the factory, since the generic pipeline is:
+# take all interaction data, group by sessionId and assign a new price vector to each session
+# in the grouping we get a subset of the interactions per sessionId and we can map that to some Features
+# we define a custom _get_features(interactions .) methodin the strategy class
+# we then run only the inference which is the .predict(trajectory) per-session which will give us a new price vector
+# this we then publish for each sessionId group
+# this might include no deleting most of the pricers we have defined and starting with a super simple surge-pricing algorithm that is no-fit only predict. This we can then test end-to-end and observe changes to prices according to a desired strategy - we have to define this one as a very short term strategy because we run sessions that take only a few minutes.
diff --git a/experiments/procesing/pricers/base.py b/experiments/procesing/pricers/base.py
index 6569556..ecaabed 100644
--- a/experiments/procesing/pricers/base.py
+++ b/experiments/procesing/pricers/base.py
@@ -7,15 +7,6 @@ import pandas as pd
 class PricingFunction(ABC):
     """
     Abstract base for pricing functions.
-
-    Defines mapping: f(Q_t, P_t, S_t, H_t) -> P_{t+1}
-
-    Where:
-        Q_t ∈ R^n: demand vector at time t
-        P_t ∈ R^n: price vector at time t
-        S_t: session features (behavioral signals, interactions)
-        H_t = {Q_{t-k}, P_{t-k}, S_{t-k}}: historical state trajectory
-
     Objective:
         maximize E[R_T] = E[Σ P_t^T · Q_t]
         subject to:
@@ -28,10 +19,10 @@ class PricingFunction(ABC):
     def fit(self, *kwargs):
         """
         Offline training on historical data.
+        This is where we can think about some maximization of expected revenue
+        over historical trajectories to learn parameters of the pricing function.
+        (This however we cover move in the RL side of things)
 
-        Args:
-            historical_data: DataFrame with elasticity, prices, demand signals
-            **kwargs: additional training parameters
         """
         pass
 
@@ -39,12 +30,18 @@ class PricingFunction(ABC):
     def predict(self, *kwargs) -> np.ndarray:
         """
         Generate optimal prices given current state.
+        This is an abstract method that transitions from τ -> P*
+        which is the mapping from the trajectory to optimal prices under
+        some subset of session grouping (so, per sessionId)
+        """
+        pass
 
-        Args:
-            state_space: StateSpace object containing Q_t, P_t, S_t, H_t
-
+    @abstractmethod
+    def _get_features(self, *kwargs) -> np.ndarray:
+        """
+        Extract features from trajectory for pricing decision.
         Returns:
-            P_{t+1}: price vector in R^n
+            np.ndarray of shape (n_products, n_features)
         """
         pass
 

From 3c141a4b6c5d13bc4078807c381082fbc7ce625d Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Mon, 12 Jan 2026 22:33:47 +0100
Subject: [PATCH 14/35] chore: better test consistency before agnet

---
 docker-compose.yml                            | 20 ++++++++--
 experiments/procesing/pricers/elasticity.py   | 10 +++++
 .../procesing/pricers/session_aware.py        | 39 +++++++++++++++++++
 experiments/procesing/pricers/simple.py       | 23 +++++++++++
 4 files changed, 89 insertions(+), 3 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 561c393..ba2e8a3 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -112,11 +112,14 @@ services:
     depends_on:
       - postgres
     environment:
-      - AIRFLOW__CORE__EXECUTOR=SequentialExecutor
+      - AIRFLOW__CORE__EXECUTOR=LocalExecutor
       - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow
       - AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY}
       - AIRFLOW__CORE__LOAD_EXAMPLES=false
       - AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true
+      - AIRFLOW__CORE__PARALLELISM=16
+      - AIRFLOW__CORE__DAG_CONCURRENCY=8
+      - AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG=4
       - _AIRFLOW_DB_MIGRATE=true
       - _AIRFLOW_WWW_USER_CREATE=true
       - _AIRFLOW_WWW_USER_USERNAME=admin
@@ -136,12 +139,17 @@ services:
       - airflow-init
       - redis
     environment:
-      - AIRFLOW__CORE__EXECUTOR=SequentialExecutor
+      - AIRFLOW__CORE__EXECUTOR=LocalExecutor
       - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow
       - AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY}
       - AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION=true
       - AIRFLOW__CORE__LOAD_EXAMPLES=false
       - AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true
+      - AIRFLOW__CORE__PARALLELISM=16
+      - AIRFLOW__CORE__DAG_CONCURRENCY=8
+      - AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG=4
+      - AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL=30
+      - AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL=60
       - AIRFLOW__WEBSERVER__EXPOSE_CONFIG=true
       - AIRFLOW__WEBSERVER__SECRET_KEY=${AIRFLOW_SECRET_KEY}
       - AIRFLOW__API__AUTH_BACKENDS=airflow.api.auth.backend.basic_auth
@@ -174,12 +182,18 @@ services:
       redis:
         condition: service_started
     environment:
-      - AIRFLOW__CORE__EXECUTOR=SequentialExecutor
+      - AIRFLOW__CORE__EXECUTOR=LocalExecutor
       - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow
       - AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY}
       - AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION=true
       - AIRFLOW__CORE__LOAD_EXAMPLES=false
       - AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true
+      - AIRFLOW__CORE__PARALLELISM=16
+      - AIRFLOW__CORE__DAG_CONCURRENCY=8
+      - AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG=4
+      - AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL=30
+      - AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL=60
+      - AIRFLOW__SCHEDULER__PARSING_PROCESSES=2
       - AIRFLOW__WEBSERVER__SECRET_KEY=${AIRFLOW_SECRET_KEY}
       - AIRFLOW__API__AUTH_BACKENDS=airflow.api.auth.backend.basic_auth
       - KAFKA_HOST=kafka
diff --git a/experiments/procesing/pricers/elasticity.py b/experiments/procesing/pricers/elasticity.py
index b203159..3ce3b42 100644
--- a/experiments/procesing/pricers/elasticity.py
+++ b/experiments/procesing/pricers/elasticity.py
@@ -57,3 +57,13 @@ class ElasticityBasedPricer(PricingFunction):
         # enforce bounds
         prices = np.clip(prices, self.price_floor, self.price_ceil)
         return prices
+
+    def _get_features(self, state_space=None) -> np.ndarray:
+        """Extract elasticity, demand, and demand deviation for each product"""
+        if state_space is None or self.elasticity is None:
+            n = len(self.elasticity) if self.elasticity is not None else 0
+            return np.zeros((n, 3))
+
+        demand = np.asarray(state_space.demand)
+        demand_dev = (demand - self.mean_demand) / (self.mean_demand + 1e-6)
+        return np.column_stack([self.elasticity, demand, demand_dev])
diff --git a/experiments/procesing/pricers/session_aware.py b/experiments/procesing/pricers/session_aware.py
index 40343a7..dbc859f 100644
--- a/experiments/procesing/pricers/session_aware.py
+++ b/experiments/procesing/pricers/session_aware.py
@@ -107,6 +107,36 @@ class SessionAwarePricer(PricingFunction):
 
         return prices
 
+    def _get_features(self, state_space=None) -> np.ndarray:
+        """Extract elasticity, demand, and session features"""
+        if state_space is None or self.elasticity is None:
+            n = len(self.elasticity) if self.elasticity is not None else 0
+            return np.zeros((n, 5))
+
+        demand = np.asarray(state_space.demand)
+        n_products = len(demand)
+
+        # extract session features
+        velocity = 0.0
+        view_depth = 0.0
+        cart_to_view = 0.0
+
+        if not state_space.session_features.empty:
+            sf = state_space.session_features.iloc[0]
+            velocity = sf.get('interaction_velocity', 0.0)
+            view_depth = sf.get('product_view_depth', 0.0)
+            cart_to_view = sf.get('cart_to_view_ratio', 0.0)
+
+        # broadcast session features to all products
+        features = np.column_stack([
+            self.elasticity,
+            demand,
+            np.full(n_products, velocity),
+            np.full(n_products, view_depth),
+            np.full(n_products, cart_to_view)
+        ])
+        return features
+
 
 class ProductSpecificSessionPricer(PricingFunction):
     """
@@ -170,3 +200,12 @@ class ProductSpecificSessionPricer(PricingFunction):
 
         prices = np.clip(base_prices, self.price_floor, self.price_ceil)
         return prices
+
+    def _get_features(self, state_space=None) -> np.ndarray:
+        """Extract elasticity and demand features for product-specific pricing"""
+        if state_space is None or self.elasticity is None:
+            n = len(self.elasticity) if self.elasticity is not None else 0
+            return np.zeros((n, 2))
+
+        demand = np.asarray(state_space.demand)
+        return np.column_stack([self.elasticity, demand])
diff --git a/experiments/procesing/pricers/simple.py b/experiments/procesing/pricers/simple.py
index 1a03f9f..d7fa699 100644
--- a/experiments/procesing/pricers/simple.py
+++ b/experiments/procesing/pricers/simple.py
@@ -65,6 +65,11 @@ class StaticPricer(PricingFunction):
             raise ValueError("Must call fit() or provide base_prices in constructor")
         return self.base_prices.copy()
 
+    def _get_features(self, state_space=None) -> np.ndarray:
+        """Static pricer uses no features, returns empty array"""
+        n = len(self.base_prices) if self.base_prices is not None else 0
+        return np.zeros((n, 0))
+
 
 class RandomPricer(PricingFunction):
     """Random pricing within bounds (for baseline comparison)"""
@@ -87,6 +92,11 @@ class RandomPricer(PricingFunction):
             self.n_products = len(state_space.demand)
         return self.rng.uniform(self.price_min, self.price_max, size=self.n_products)
 
+    def _get_features(self, state_space=None) -> np.ndarray:
+        """Random pricer uses no features"""
+        n = self.n_products if self.n_products else 0
+        return np.zeros((n, 0))
+
 
 class SimpleSurgePricer(PricingFunction):
     """
@@ -133,3 +143,16 @@ class SimpleSurgePricer(PricingFunction):
         new_prices[low_mask] *= self.discount_multiplier
 
         return new_prices
+
+    def _get_features(self, state_space=None) -> np.ndarray:
+        """Extract demand and base price features for each product"""
+        if state_space is None:
+            n = len(self.base_prices) if self.base_prices is not None else 0
+            return np.zeros((n, 2))
+
+        demand = np.asarray(state_space.demand) if hasattr(state_space, 'demand') else np.array([0])
+        base = np.asarray(state_space.prices) if hasattr(state_space, 'prices') else self.base_prices
+        if base is None:
+            base = np.ones(len(demand)) * 99.99
+
+        return np.column_stack([demand, base])

From 4c368d48f2a489595a9c8bc0375f348568305782 Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Tue, 13 Jan 2026 15:05:33 +0100
Subject: [PATCH 15/35] chore: fixing visual bugs in cart

---
 web/src/app/cart/page.tsx | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/web/src/app/cart/page.tsx b/web/src/app/cart/page.tsx
index 30ac3f2..dbcb30b 100644
--- a/web/src/app/cart/page.tsx
+++ b/web/src/app/cart/page.tsx
@@ -32,7 +32,8 @@ export default function CartPage() {
                     {itemCount > 0 && (
                         <button
                             onClick={clearCart}
-                            className="text-sm text-red-600 hover:underline"
+                            className="text-sm hover:underline"
+                            style={{ color: 'var(--accent-warning)' }}
                         >
                             Clear cart
                         </button>
@@ -42,7 +43,7 @@ export default function CartPage() {
                 {itemCount === 0 ? (
                     <div className="text-center py-12">
                         <p className="text-gray-500 mb-4">Your cart is empty</p>
-                        <a href="/" className="text-blue-600 hover:underline">Browse our selection</a>
+                        <a href="/" className="hover:underline" style={{ color: 'var(--text-accent)' }}>Browse our selection</a>
                     </div>
                 ) : (
                     <>
@@ -54,15 +55,11 @@ export default function CartPage() {
                                 >
                                     <div className="flex-1">
                                         <div className="flex items-center gap-2 mb-1">
-                                            <span className="px-2 py-0.5 text-xs font-medium rounded bg-blue-100 text-blue-800">
-                                                {item.type}
-                                            </span>
                                             <h3 className="font-semibold">{item.name}</h3>
                                         </div>
 
                                         {item.type === 'hotel' && (
                                             <div className="text-sm text-gray-600">
-                                                <p>{String(item.metadata.roomType)}</p>
                                                 <p>{String(item.metadata.checkIn)} - {String(item.metadata.checkOut)}</p>
                                                 <p>{String(item.metadata.nights)} night{Number(item.metadata.nights) > 1 ? 's' : ''}</p>
                                             </div>
@@ -81,7 +78,8 @@ export default function CartPage() {
                                         <p className="text-xl font-bold mb-2">${item.price}</p>
                                         <button
                                             onClick={() => handleRemove(item.id, item.type)}
-                                            className="text-sm text-red-600 hover:underline"
+                                            className="text-sm hover:underline"
+                                            style={{ color: 'var(--accent-warning)' }}
                                         >
                                             Remove
                                         </button>
@@ -100,7 +98,7 @@ export default function CartPage() {
                                     dispatchInteraction('checkout_start', undefined, { total, itemCount });
                                     window.location.href = '/checkout';
                                 }}
-                                className="w-full py-3 bg-blue-600 hover:bg-blue-700 text-white rounded-lg font-medium transition-colors"
+                                className="btn-primary w-full"
                             >
                                 Proceed to Checkout
                             </button>

From 61dd621532fbe91eb5afccf01fbb87488e55978a Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Tue, 13 Jan 2026 15:09:52 +0100
Subject: [PATCH 16/35] chore: styling and title updates

---
 web/src/app/globals.css              | 3 +++
 web/src/app/layout.tsx               | 4 ++--
 web/src/components/ui/Navigation.tsx | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/web/src/app/globals.css b/web/src/app/globals.css
index 4a5b0c9..457b974 100644
--- a/web/src/app/globals.css
+++ b/web/src/app/globals.css
@@ -8,6 +8,9 @@
   --bg-secondary: #f5f5f5;
   --text-primary: #333333;
   --text-secondary: #666666;
+  --accent-primary: #007aff;
+  --accent-primary-hover: #0051d5;
+  --accent-primary-light: #e6f2ff;
   --spacing-sm: 8px;
   --spacing-md: 16px;
   --spacing-lg: 32px;
diff --git a/web/src/app/layout.tsx b/web/src/app/layout.tsx
index e9f9b63..5ff49ae 100644
--- a/web/src/app/layout.tsx
+++ b/web/src/app/layout.tsx
@@ -15,8 +15,8 @@ const geistMono = Geist_Mono({
 });
 
 export const metadata: Metadata = {
-  title: "Create Next App",
-  description: "Generated by create next app",
+  title: "Travel Booking Platform",
+  description: "Book flights and hotels with dynamic pricing",
 };
 
 export default function RootLayout({
diff --git a/web/src/components/ui/Navigation.tsx b/web/src/components/ui/Navigation.tsx
index 9d9d4cf..6f0ecbb 100644
--- a/web/src/components/ui/Navigation.tsx
+++ b/web/src/components/ui/Navigation.tsx
@@ -20,7 +20,7 @@ const NavLink = ({ href, children }: { href: string; children: React.ReactNode }
       href={href}
       className={`px-4 py-2 rounded-md transition-colors ${
         isActive
-          ? 'bg-[var(--accent-primary)] font-semibold'
+          ? 'bg-[var(--accent-primary)] text-white font-semibold'
           : 'hover:bg-[var(--accent-primary-light)] text-[var(--text-primary)]'
       }`}
     >

From eb9506038096497a0377636ce082f1be2f9e6840 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Alves=20R=C3=B6sel?=
 <60182044+velocitatem@users.noreply.github.com>
Date: Tue, 13 Jan 2026 15:35:27 +0100
Subject: [PATCH 17/35] Pre run web refactors (#43)

* chore: refactor date utilities

* feat: improve images of hotel rooms

* fix: adding date utils
---
 web/src/components/feats/hotel/HotelCard.tsx  |  5 +-
 .../components/feats/hotel/HotelDetails.tsx   |  5 +-
 web/src/lib/airline-utils.ts                  | 24 +--------
 web/src/lib/date-utils.ts                     | 23 ++++++++
 web/src/lib/hotel-utils.ts                    | 52 +++++++++++--------
 5 files changed, 60 insertions(+), 49 deletions(-)
 create mode 100644 web/src/lib/date-utils.ts

diff --git a/web/src/components/feats/hotel/HotelCard.tsx b/web/src/components/feats/hotel/HotelCard.tsx
index 5bf234d..847e1b2 100644
--- a/web/src/components/feats/hotel/HotelCard.tsx
+++ b/web/src/components/feats/hotel/HotelCard.tsx
@@ -2,6 +2,7 @@
 
 import type { EventName } from '@/lib/events';
 import type { Hotel } from '@/lib/hotel-utils';
+import { getHotelImageUrl } from '@/lib/hotel-utils';
 import { useHoverTracking } from '@/hooks/useHoverTracking';
 import PriceDisplay from '@/components/ui/PriceDisplay';
 
@@ -47,8 +48,6 @@ export default function HotelCard({ hotel }: { hotel: Hotel }) {
         window.location.href = `/hotel/products/${hotel.id}`;
     };
 
-    const imageUrl = `https://images.unsplash.com/photo-1551882547-ff40c63fe5fa?w=400&h=300&fit=crop`;
-
     return (
         <div
             className="hotel-card cursor-pointer"
@@ -56,7 +55,7 @@ export default function HotelCard({ hotel }: { hotel: Hotel }) {
         >
             <div className="hotel-image relative overflow-hidden">
                 <img
-                    src={imageUrl}
+                    src={getHotelImageUrl(hotel.id, { w: 400, h: 300 })}
                     alt={hotel.name}
                     className="w-full h-full object-cover"
                     onError={(e) => {
diff --git a/web/src/components/feats/hotel/HotelDetails.tsx b/web/src/components/feats/hotel/HotelDetails.tsx
index 6cdbbdd..030769f 100644
--- a/web/src/components/feats/hotel/HotelDetails.tsx
+++ b/web/src/components/feats/hotel/HotelDetails.tsx
@@ -2,6 +2,7 @@
 
 import { useState, useEffect } from 'react';
 import type { Hotel } from '@/lib/hotel-utils';
+import { getHotelImageUrl } from '@/lib/hotel-utils';
 import PriceDisplay from '@/components/ui/PriceDisplay';
 
 interface HotelDetailsProps {
@@ -43,13 +44,11 @@ const PriceTotalDisplay = ({ productId, nights }: { productId: string; nights: n
 };
 
 export default function HotelDetails({ product, onAddToCart, addedToCart }: HotelDetailsProps) {
-  const imageUrl = `https://images.unsplash.com/photo-1566073771259-6a8506099945?w=800&h=600&fit=crop`;
-
   return (
     <div className="w-full flex flex-col lg:flex-row gap-12 py-8">
       <div className="w-full lg:w-1/2 rounded-lg aspect-[4/3] overflow-hidden shrink-0">
         <img
-          src={imageUrl}
+          src={getHotelImageUrl(product.id, { w: 800, h: 600 })}
           alt={product.name}
           className="w-full h-full object-cover"
           onError={(e) => {
diff --git a/web/src/lib/airline-utils.ts b/web/src/lib/airline-utils.ts
index 74a1916..b801e14 100644
--- a/web/src/lib/airline-utils.ts
+++ b/web/src/lib/airline-utils.ts
@@ -31,7 +31,7 @@ export interface Flight {
   availability: number;
 }
 
-const EPOCH = new Date(0);
+import { dateToDaysFromToday, dateToIndex, todayIndex } from './date-utils';
 
 export const transformProduct = (p: AirlineProduct): Flight => {
   const { id, flight_type, date_index, metadata, availability } = p;
@@ -52,24 +52,4 @@ export const transformProduct = (p: AirlineProduct): Flight => {
   };
 };
 
-// convert date string to days from today
-export const dateToDaysFromToday = (dateStr: string): number => {
-  const target = new Date(dateStr);
-  target.setHours(0, 0, 0, 0);
-  const today = new Date();
-  today.setHours(0, 0, 0, 0);
-  return Math.floor((target.getTime() - today.getTime()) / 86400000);
-};
-
-// convert date string to date_index (days since epoch)
-export const dateToIndex = (dateStr: string): number => {
-  const d = new Date(dateStr);
-  return Math.floor((d.getTime() - EPOCH.getTime()) / 86400000);
-};
-
-// get current date_index
-export const todayIndex = (): number => {
-  const now = new Date();
-  now.setHours(0, 0, 0, 0);
-  return Math.floor((now.getTime() - EPOCH.getTime()) / 86400000);
-};
+export { dateToDaysFromToday, dateToIndex, todayIndex };
diff --git a/web/src/lib/date-utils.ts b/web/src/lib/date-utils.ts
new file mode 100644
index 0000000..bad1a90
--- /dev/null
+++ b/web/src/lib/date-utils.ts
@@ -0,0 +1,23 @@
+const EPOCH = new Date(0);
+const MS_PER_DAY = 86400000;
+
+export const dateToDaysFromToday = (dateStr: string): number => {
+  const target = new Date(dateStr);
+  target.setHours(0, 0, 0, 0);
+  const today = new Date();
+  today.setHours(0, 0, 0, 0);
+  return Math.floor((target.getTime() - today.getTime()) / MS_PER_DAY);
+};
+
+export const dateToIndex = (dateStr: string): number => {
+  const d = new Date(dateStr);
+  return Math.floor((d.getTime() - EPOCH.getTime()) / MS_PER_DAY);
+};
+
+export const todayIndex = (): number => {
+  const now = new Date();
+  now.setHours(0, 0, 0, 0);
+  return Math.floor((now.getTime() - EPOCH.getTime()) / MS_PER_DAY);
+};
+
+export { EPOCH, MS_PER_DAY };
diff --git a/web/src/lib/hotel-utils.ts b/web/src/lib/hotel-utils.ts
index b59994a..e5ba5c2 100644
--- a/web/src/lib/hotel-utils.ts
+++ b/web/src/lib/hotel-utils.ts
@@ -25,7 +25,7 @@ export interface Hotel {
   nights: number;
 }
 
-const EPOCH = new Date(0);
+import { EPOCH, MS_PER_DAY, dateToDaysFromToday, dateToIndex, todayIndex } from './date-utils';
 
 export const transformProduct = (p: HotelProduct): Hotel => {
   const { id, room_type, date_index, metadata } = p;
@@ -37,14 +37,14 @@ export const transformProduct = (p: HotelProduct): Hotel => {
     // legacy: treat as offset from today
     const today = new Date();
     today.setHours(0, 0, 0, 0);
-    checkIn = new Date(today.getTime() + date_index * 86400000);
+    checkIn = new Date(today.getTime() + date_index * MS_PER_DAY);
   } else {
     // proper: days since epoch
-    checkIn = new Date(EPOCH.getTime() + date_index * 86400000);
+    checkIn = new Date(EPOCH.getTime() + date_index * MS_PER_DAY);
   }
 
   const nights = 1;
-  const checkOut = new Date(checkIn.getTime() + nights * 86400000);
+  const checkOut = new Date(checkIn.getTime() + nights * MS_PER_DAY);
 
   const formatOpts: Intl.DateTimeFormatOptions = {
     month: 'short',
@@ -65,24 +65,34 @@ export const transformProduct = (p: HotelProduct): Hotel => {
   };
 };
 
-// convert date string to days from today
-export const dateToDaysFromToday = (dateStr: string): number => {
-  const target = new Date(dateStr);
-  target.setHours(0, 0, 0, 0);
-  const today = new Date();
-  today.setHours(0, 0, 0, 0);
-  return Math.floor((target.getTime() - today.getTime()) / 86400000);
+const hotelImagePool = [
+  'photo-1566073771259-6a8506099945',
+  'photo-1551882547-ff40c63fe5fa',
+  'photo-1590490360182-c33d57733427',
+  'photo-1582719478250-c89cae4dc85b',
+  'photo-1596701062351-8c2c14d1fdd0',
+  'photo-1631049307264-da0ec9d70304',
+  'photo-1578683010236-d716f9a3f461',
+  'photo-1540518614846-7eded433c457',
+  'photo-1505693416388-ac5ce068fe85',
+  'photo-1522771739844-6a9f6d5f14af',
+  'photo-1562438668-bcf0ca6578f0',
+  'photo-1595576508898-0ad5c879a061',
+];
+
+const hashString = (s: string): number => {
+  let h = 0;
+  for (let i = 0; i < s.length; i++) {
+    h = ((h << 5) - h) + s.charCodeAt(i);
+    h = h & h;
+  }
+  return Math.abs(h);
 };
 
-// convert date string to date_index (days since epoch)
-export const dateToIndex = (dateStr: string): number => {
-  const d = new Date(dateStr);
-  return Math.floor((d.getTime() - EPOCH.getTime()) / 86400000);
+export const getHotelImageUrl = (hotelId: string, size: { w: number; h: number } = { w: 400, h: 300 }): string => {
+  const idx = hashString(hotelId) % hotelImagePool.length;
+  const photoId = hotelImagePool[idx];
+  return `https://images.unsplash.com/${photoId}?w=${size.w}&h=${size.h}&fit=crop`;
 };
 
-// get current date_index
-export const todayIndex = (): number => {
-  const now = new Date();
-  now.setHours(0, 0, 0, 0);
-  return Math.floor((now.getTime() - EPOCH.getTime()) / 86400000);
-};
+export { dateToDaysFromToday, dateToIndex, todayIndex };

From 7c330a19c698340341131561934653dc9e109d33 Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Tue, 13 Jan 2026 15:36:20 +0100
Subject: [PATCH 18/35] feat: added a runner script for agent orchestration

---
 experiments/agents/run.py | 117 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 experiments/agents/run.py

diff --git a/experiments/agents/run.py b/experiments/agents/run.py
new file mode 100644
index 0000000..823c3d9
--- /dev/null
+++ b/experiments/agents/run.py
@@ -0,0 +1,117 @@
+from supabase import create_client, Client
+import os
+import random
+import asyncio
+import json
+from dotenv import load_dotenv
+
+from experiments.agents.agent import get_agent, AgentTypes
+from lib.kafka_client import get_interactions
+
+load_dotenv()
+
+RESULTS="/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
+
+client = create_client(
+    os.getenv("NEXT_PUBLIC_SUPABASE_URL"),
+    os.getenv("NEXT_PUBLIC_SUPABASE_ANON_KEY")
+)
+def pick_random_task():
+    mode = 'hotel'
+    tasks = client.table("tasks").select("*").execute().data
+    if mode == 'hotel':
+        # drop all that have 'flight' in the description
+        tasks = [task for task in tasks if 'flight' not in task['task_description'].lower()]
+    return random.choice(tasks) if tasks else None
+
+def clear_kafka_data():
+    """Delete and recreate Kafka topics to clear all data"""
+    from kafka.admin import KafkaAdminClient, NewTopic
+    from kafka.errors import UnknownTopicOrPartitionError
+    import time
+
+    kafka_host = os.getenv('KAFKA_HOST', 'localhost')
+    kafka_port = os.getenv('KAFKA_PORT', '9092')
+    broker = f'{kafka_host}:{kafka_port}'
+
+    admin = KafkaAdminClient(bootstrap_servers=broker)
+    topics = ['user-interactions', 'price-logs']
+
+    try:
+        admin.delete_topics(topics, timeout_ms=5000)
+        print(f"Deleted topics: {topics}")
+        time.sleep(2)
+    except UnknownTopicOrPartitionError:
+        print("Topics don't exist, skipping delete")
+    except Exception as e:
+        print(f"Error deleting topics: {e}")
+
+    new_topics = [
+        NewTopic(name='user-interactions', num_partitions=3, replication_factor=1),
+        NewTopic(name='price-logs', num_partitions=3, replication_factor=1)
+    ]
+
+    try:
+        admin.create_topics(new_topics=new_topics, validate_only=False)
+        print(f"Recreated topics: {topics}")
+    except Exception as e:
+        print(f"Error creating topics: {e}")
+    finally:
+        admin.close()
+
+def create_new_experiment(task_id):
+    import uuid
+    subject_name = f"agent_{str(uuid.uuid4())[:8]}"
+    experiment = {
+        "subject_name": subject_name,
+        "xp_human_only": False,
+        "xp_market_mode": "hotel",
+        "xp_task_id": task_id,
+    }
+    response = client.table("experiments").insert(experiment).execute()
+    return response.data[0] if response.data else None
+
+if __name__ == "__main__":
+    clear_kafka_data()
+
+    task = pick_random_task()
+    if not task:
+        print("No tasks available")
+        exit(1)
+
+    experiment = create_new_experiment(task['id'])
+    exp_id = experiment['id']
+    exp_dir = f"{RESULTS}{exp_id}"
+    os.makedirs(exp_dir, exist_ok=True)
+
+    # construct experiment URL with uuid param
+    base_url = os.getenv('NEXT_PUBLIC_API_BASE', 'http://localhost:3000')
+    agent_url = f"{base_url}/start-task?uuid={exp_id}"
+
+    print(f"Created experiment {exp_id} for task {task['id']}")
+    print(f"Agent will interact with: {agent_url}")
+
+    # instantiate and run agent
+    agent = get_agent(
+        AgentTypes.GENERIC_BROWSER_USE_AGENT,
+        goal=task['task_description'],
+        url=agent_url,
+        timeout=300,
+        headless=True
+    )
+
+    result = asyncio.run(agent.act())
+    print(f"Agent result: {result}")
+
+    # export interaction and price data from kafka
+    interactions = get_interactions(topic='user-interactions', timeout_ms=3000)
+    prices = get_interactions(topic='price-logs', timeout_ms=3000)
+
+    with open(f"{exp_dir}/int.json", 'w') as f:
+        json.dump(interactions, f, indent=2)
+
+    with open(f"{exp_dir}/price.json", 'w') as f:
+        json.dump(prices, f, indent=2)
+
+    print(f"Experiment {exp_id} completed.")
+    print(f"Exported {len(interactions)} interactions and {len(prices)} price logs to {exp_dir}")

From 9cb2b0fc4431f1a10af457d8fc17e1bb6e706032 Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Tue, 13 Jan 2026 15:37:06 +0100
Subject: [PATCH 19/35] feat: forgot airflow helper staging

---
 tests/e2e/helpers/airflow.ts | 61 ++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 tests/e2e/helpers/airflow.ts

diff --git a/tests/e2e/helpers/airflow.ts b/tests/e2e/helpers/airflow.ts
new file mode 100644
index 0000000..82d4a75
--- /dev/null
+++ b/tests/e2e/helpers/airflow.ts
@@ -0,0 +1,61 @@
+const AIRFLOW_URL = process.env.AIRFLOW_URL || 'http://localhost:8085';
+const AUTH = 'Basic ' + Buffer.from(`${process.env.AIRFLOW_USER || 'admin'}:${process.env.AIRFLOW_PASS || 'admin'}`).toString('base64');
+
+const req = (path: string, opts: any = {}) => {
+  const headers = { Authorization: AUTH, ...opts.headers };
+  return fetch(`${AIRFLOW_URL}${path}`, { ...opts, headers });
+};
+
+export const triggerDag = async (dagId: string, conf = {}) => {
+  const r = await req(`/api/v1/dags/${dagId}/dagRuns`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({ conf }),
+  });
+  if (!r.ok) throw new Error(`Trigger DAG failed: ${r.status}`);
+  return (await r.json()).dag_run_id;
+};
+
+export const getDagStatus = async (dagId: string, runId: string) => {
+  const r = await req(`/api/v1/dags/${dagId}/dagRuns/${runId}`);
+  if (!r.ok) throw new Error(`Get status failed: ${r.status}`);
+  return (await r.json()).state;
+};
+
+export const cancelDag = async (dagId: string, runId: string) => {
+  const r = await req(`/api/v1/dags/${dagId}/dagRuns/${runId}`, {
+    method: 'PATCH',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({ state: 'failed' }),
+  });
+  if (!r.ok) console.warn(`Failed to cancel DAG ${runId}: ${r.status}`);
+};
+
+export const waitForDag = async (dagId: string, runId: string, maxMs = 30000, pollMs = 1000) => {
+  const t0 = Date.now();
+  while (Date.now() - t0 < maxMs) {
+    const state = await getDagStatus(dagId, runId);
+    if (state === 'success') return;
+    if (state === 'failed') throw new Error(`DAG ${runId} failed`);
+    await new Promise(r => setTimeout(r, pollMs));
+  }
+  await cancelDag(dagId, runId);
+  throw new Error(`DAG ${runId} timeout`);
+};
+
+export const runDag = async (dagId: string, conf = {}, maxMs = 60000) => {
+  const runId = await triggerDag(dagId, conf);
+  await waitForDag(dagId, runId, maxMs);
+};
+
+export const runSessionPricing = (mode = 'hotel') =>
+  runDag('session_pricing_pipeline', { store_mode: mode, session_limit: 10 }, 90000);
+
+export const runSurgePricing = (mode = 'hotel', highThresh = 10, lowThresh = 2) =>
+  runDag('surge_pricing_pipeline', {
+    store_mode: mode,
+    high_threshold: highThresh,
+    low_threshold: lowThresh,
+    surge_multiplier: 1.2,
+    discount_multiplier: 0.9
+  }, 90000);

From af23d2f736a854fe2f835addc3112c31d3d38e30 Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Tue, 13 Jan 2026 15:57:05 +0100
Subject: [PATCH 20/35] feat: introduction of agentinc MDPs and KL divergence
 of > 2

---
 sim/rl/behavior_loader/loader.py | 20 +++++++
 sim/rl/behavior_loader/models.py | 89 ++++++++++++++++++++++++++++----
 2 files changed, 98 insertions(+), 11 deletions(-)

diff --git a/sim/rl/behavior_loader/loader.py b/sim/rl/behavior_loader/loader.py
index 99a1541..bd18442 100644
--- a/sim/rl/behavior_loader/loader.py
+++ b/sim/rl/behavior_loader/loader.py
@@ -56,7 +56,27 @@ class Loader:
     def get_entries(self) -> tuple[list[str], int]:
         return self.entries, len(self.entries)
 
+class AgentLoader(Loader):
+    """Loader for agent interaction data with simplified schema (direct PayloadModel format)"""
+
+    def _is_admin_page_simple(self, interaction: PayloadModel) -> bool:
+        return interaction.page and interaction.page.startswith("/admin/")
+
+    def _load_sessions(self) -> dict:
+        sessions = {}
+        for entry in self.entries:
+            int_path = f"{self.src_dir}/{entry}/int.json"
+            raw = json.load(open(int_path))
+            ints = [PayloadModel(**i) for i in raw]
+            sessions[entry] = [i for i in ints if not self._is_admin_page_simple(i)]
+        return sessions
+
 if __name__ == "__main__":
+    DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
+    loader = AgentLoader(DIR)
+    _, n = loader.get_entries()
+    print(f"Loaded {n} sessions from {DIR}")
+
     DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/"
     loader = Loader(DIR)
     _, n = loader.get_entries()
diff --git a/sim/rl/behavior_loader/models.py b/sim/rl/behavior_loader/models.py
index bce2429..7254606 100644
--- a/sim/rl/behavior_loader/models.py
+++ b/sim/rl/behavior_loader/models.py
@@ -1,10 +1,12 @@
-from loader import Loader
+from experiments.agents.base import Agent
+from loader import Loader, AgentLoader
 from collections import defaultdict
 from typing import Dict, List, Tuple, Set
 import numpy as np
 import graphviz
 
 DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/"
+AGENT_DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
 
 class BehaviorModel:
     def __init__(self, src_dir: str = DIR):
@@ -85,13 +87,32 @@ class BehaviorModel:
             path.append(curr)
         return path
 
-def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = "mdp_graph", fmt: str = "svg", view: bool = False, export_dot: bool = False):
-    """visualize MDP as directed graph using graphviz, aggregated by event type"""
-    if not model.mdp: raise ValueError("build MDP first")
+class AgentBehaviorModel(BehaviorModel):
+    """behavior model for agent interaction data (simplified PayloadModel schema)"""
 
-    # aggregate transitions by event type
+    def __init__(self, src_dir: str = AGENT_DIR):
+        self.loader = AgentLoader(src_dir)
+        self.data = self.loader.get_data()
+        self.entries, self.num_entries = self.loader.get_entries()
+        self.mdp = None
+
+    def _state_repr(self, evt) -> str:
+        # direct access to PayloadModel fields (no .value.payload nesting)
+        return f"{evt.page or 'unk'}|{evt.productId or 'none'}|{evt.eventName}"
+
+    def _extract_sessions(self):
+        trajectories = []
+        for sid, evts in self.data.items():
+            if len(evts) < 2: continue
+            # sort by timestamp string (ISO format sorts lexicographically)
+            states = [self._state_repr(e) for e in sorted(evts, key=lambda x: x.ts)]
+            trajectories.append(states)
+        return trajectories
+
+def aggregate_event_transitions(mdp: Dict) -> Dict[str, Dict[str, float]]:
+    """aggregate state transitions by event type and normalize"""
     evt_trans = defaultdict(lambda: defaultdict(float))
-    for s, trans in model.mdp['transitions'].items():
+    for s, trans in mdp['transitions'].items():
         evt_src = s.split('|')[2]
         for s_next, prob in trans.items():
             evt_dst = s_next.split('|')[2]
@@ -103,6 +124,13 @@ def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = "
         if total > 0:
             for evt_dst in evt_trans[evt_src]:
                 evt_trans[evt_src][evt_dst] /= total
+    return dict(evt_trans)
+
+def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = "mdp_graph", fmt: str = "svg", view: bool = False, export_dot: bool = False):
+    """visualize MDP as directed graph using graphviz, aggregated by event type"""
+    if not model.mdp: raise ValueError("build MDP first")
+
+    evt_trans = aggregate_event_transitions(model.mdp)
 
     g = graphviz.Digraph(format=fmt)
     g.attr(rankdir='LR', size='30')
@@ -134,11 +162,50 @@ def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = "
 
     return g
 
+
+def kl_divergence(p: Dict[str, float], q: Dict[str, float]) -> float:
+    """Compute KL divergence D_KL(P || Q) for discrete distributions P and Q."""
+    epsilon = 1e-10  # small constant to avoid log(0)
+    kl_div = 0.0
+    for key in p:
+        p_val = p[key] + epsilon
+        q_val = q.get(key, 0.0) + epsilon
+        kl_div += p_val * np.log(p_val / q_val)
+    return kl_div
+
 if __name__ == "__main__":
-    model = BehaviorModel(DIR)
-    mdp = model.build_MDP()
-    print(f"Built MDP: {mdp['num_states']} states, {sum(len(t) for t in mdp['transitions'].values())} transitions")
-    if not mdp['states']:
+    human_model = BehaviorModel(DIR)
+    human_mdp = human_model.build_MDP()
+    print(f"Built MDP: {human_mdp['num_states']} states, {sum(len(t) for t in human_mdp['transitions'].values())} transitions")
+    if not human_mdp['states']:
         print("No states found")
         exit(1)
-    visualize_mdp(model, threshold=0.05, output="mdp_viz", fmt="pdf", export_dot=True)
+    visualize_mdp(human_model, threshold=0.05, output="human_mdp_viz", fmt="pdf", export_dot=True)
+
+    agent_model = AgentBehaviorModel()
+    agent_mdp = agent_model.build_MDP()
+    print(f"AGENT... Built MDP: {agent_mdp['num_states']} states, {sum(len(t) for t in agent_mdp['transitions'].values())} transitions")
+    if not agent_mdp['states']:
+        print("No states found")
+        exit(1)
+    visualize_mdp(agent_model, threshold=0.05, output="agent_mdp_viz", fmt="pdf", export_dot=True)
+
+    # aggregate transitions by event type for both models
+    human_evt_trans = aggregate_event_transitions(human_mdp)
+    agent_evt_trans = aggregate_event_transitions(agent_mdp)
+
+    common_evts = set(human_evt_trans.keys()) & set(agent_evt_trans.keys())
+    if not common_evts: import sys; sys.exit("No common event types for KL divergence analysis")
+
+    kl_divs = []
+    for evt in common_evts:
+        kl = kl_divergence(human_evt_trans[evt], agent_evt_trans[evt])
+        kl_divs.append((evt, kl))
+
+    kl_divs.sort(key=lambda x: x[1], reverse=True)
+    avg_kl = np.mean([kl for _, kl in kl_divs])
+
+    print(f"Average KL divergence: {avg_kl:.4f}")
+    print(f"\nMost divergent event types:")
+    for evt, kl in kl_divs:
+        print(f"  {evt}: {kl:.4f}")

From 87a35fad2c9c0954de5332edf4a55b53ca6b7049 Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Tue, 13 Jan 2026 16:42:50 +0100
Subject: [PATCH 21/35] feat: joint loader

---
 sim/rl/behavior_loader/loader.py | 47 ++++++++++++++++++++++++++------
 sim/rl/behavior_loader/models.py | 32 +++++++++++++++++++++-
 2 files changed, 70 insertions(+), 9 deletions(-)

diff --git a/sim/rl/behavior_loader/loader.py b/sim/rl/behavior_loader/loader.py
index bd18442..620576c 100644
--- a/sim/rl/behavior_loader/loader.py
+++ b/sim/rl/behavior_loader/loader.py
@@ -71,13 +71,44 @@ class AgentLoader(Loader):
             sessions[entry] = [i for i in ints if not self._is_admin_page_simple(i)]
         return sessions
 
-if __name__ == "__main__":
-    DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
-    loader = AgentLoader(DIR)
-    _, n = loader.get_entries()
-    print(f"Loaded {n} sessions from {DIR}")
+class JointLoader:
+    """Loader for combined human (Kafka) and agent (direct) data without discrimination"""
 
-    DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/"
-    loader = Loader(DIR)
+    def __init__(self, human_dir: str, agent_dir: str):
+        self.human_dir = human_dir
+        self.agent_dir = agent_dir
+        self.human_loader = Loader(human_dir)
+        self.agent_loader = AgentLoader(agent_dir)
+        self.data = self._load_joint_sessions()
+        self.entries = list(self.data.keys())
+
+    def _load_joint_sessions(self) -> dict:
+        sessions = {}
+        # load human sessions (unwrap from Kafka format to PayloadModel)
+        for sid, evts in self.human_loader.get_data().items():
+            sessions[f"human_{sid}"] = [evt.value.payload for evt in evts]
+        # load agent sessions (already PayloadModel)
+        for sid, evts in self.agent_loader.get_data().items():
+            sessions[f"agent_{sid}"] = evts
+        return sessions
+
+    def get_data(self) -> dict:
+        return self.data
+
+    def get_entries(self) -> tuple[list[str], int]:
+        return self.entries, len(self.entries)
+
+if __name__ == "__main__":
+    AGENT_DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
+    loader = AgentLoader(AGENT_DIR)
     _, n = loader.get_entries()
-    print(f"Loaded {n} sessions from {DIR}")
+    print(f"Loaded {n} agent sessions from {AGENT_DIR}")
+
+    HUMAN_DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/"
+    loader = Loader(HUMAN_DIR)
+    _, n = loader.get_entries()
+    print(f"Loaded {n} human sessions from {HUMAN_DIR}")
+
+    joint_loader = JointLoader(HUMAN_DIR, AGENT_DIR)
+    _, n = joint_loader.get_entries()
+    print(f"Loaded {n} total sessions (combined) from joint loader")
diff --git a/sim/rl/behavior_loader/models.py b/sim/rl/behavior_loader/models.py
index 7254606..46ac99d 100644
--- a/sim/rl/behavior_loader/models.py
+++ b/sim/rl/behavior_loader/models.py
@@ -1,5 +1,5 @@
 from experiments.agents.base import Agent
-from loader import Loader, AgentLoader
+from loader import Loader, AgentLoader, JointLoader
 from collections import defaultdict
 from typing import Dict, List, Tuple, Set
 import numpy as np
@@ -109,6 +109,28 @@ class AgentBehaviorModel(BehaviorModel):
             trajectories.append(states)
         return trajectories
 
+class JointBehaviorModel(BehaviorModel):
+    """behavior model for combined human+agent data (flat PayloadModel distribution)"""
+
+    def __init__(self, human_dir: str = DIR, agent_dir: str = AGENT_DIR):
+        self.loader = JointLoader(human_dir, agent_dir)
+        self.data = self.loader.get_data()
+        self.entries, self.num_entries = self.loader.get_entries()
+        self.mdp = None
+
+    def _state_repr(self, evt) -> str:
+        # direct access to PayloadModel fields (JointLoader unwraps to PayloadModel)
+        return f"{evt.page or 'unk'}|{evt.productId or 'none'}|{evt.eventName}"
+
+    def _extract_sessions(self):
+        trajectories = []
+        for sid, evts in self.data.items():
+            if len(evts) < 2: continue
+            # sort by timestamp string (ISO format sorts lexicographically)
+            states = [self._state_repr(e) for e in sorted(evts, key=lambda x: x.ts)]
+            trajectories.append(states)
+        return trajectories
+
 def aggregate_event_transitions(mdp: Dict) -> Dict[str, Dict[str, float]]:
     """aggregate state transitions by event type and normalize"""
     evt_trans = defaultdict(lambda: defaultdict(float))
@@ -209,3 +231,11 @@ if __name__ == "__main__":
     print(f"\nMost divergent event types:")
     for evt, kl in kl_divs:
         print(f"  {evt}: {kl:.4f}")
+
+    # build joint model (combined distribution)
+    print("\n=== Joint Model (Human + Agent Combined) ===")
+    joint_model = JointBehaviorModel()
+    joint_mdp = joint_model.build_MDP()
+    print(f"Built joint MDP: {joint_mdp['num_states']} states, {sum(len(t) for t in joint_mdp['transitions'].values())} transitions")
+    if joint_mdp['states']:
+        visualize_mdp(joint_model, threshold=0.05, output="joint_mdp_viz", fmt="pdf", export_dot=True)

From 82b54428b7494858597dfd91acaa7733378362aa Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Tue, 13 Jan 2026 16:46:17 +0100
Subject: [PATCH 22/35] chore: refactor the loader class

---
 sim/rl/behavior_loader/loader.py | 67 ++++++++++++--------------------
 1 file changed, 25 insertions(+), 42 deletions(-)

diff --git a/sim/rl/behavior_loader/loader.py b/sim/rl/behavior_loader/loader.py
index 620576c..3336956 100644
--- a/sim/rl/behavior_loader/loader.py
+++ b/sim/rl/behavior_loader/loader.py
@@ -1,6 +1,6 @@
 import os
-from pydantic import BaseModel as Base
 import json
+from pydantic import BaseModel as Base
 
 class PayloadModel(Base):
     sessionId: str
@@ -30,6 +30,9 @@ class InteractionModel(Base):
     key: dict
     value: ValueModel
 
+def _is_admin(page: str | None) -> bool:
+    return page is not None and page.startswith("/admin/")
+
 class Loader:
     def __init__(self, src_dir: str):
         self.src_dir = src_dir
@@ -37,17 +40,13 @@ class Loader:
         if not self.entries: raise ValueError("empty directory")
         self.data = self._load_sessions()
 
-    def _is_admin_page(self, interaction: InteractionModel) -> bool:
-        page = interaction.value.payload.page
-        return page and page.startswith("/admin/")
-
     def _load_sessions(self) -> dict:
         sessions = {}
         for entry in self.entries:
-            int_path = f"{self.src_dir}/{entry}/int.json"
-            raw = json.load(open(int_path))
+            with open(f"{self.src_dir}/{entry}/int.json") as f:
+                raw = json.load(f)
             ints = [InteractionModel(**i) for i in raw]
-            sessions[entry] = [i for i in ints if not self._is_admin_page(i)]
+            sessions[entry] = [i for i in ints if not _is_admin(i.value.payload.page)]
         return sessions
 
     def get_data(self) -> dict:
@@ -57,40 +56,29 @@ class Loader:
         return self.entries, len(self.entries)
 
 class AgentLoader(Loader):
-    """Loader for agent interaction data with simplified schema (direct PayloadModel format)"""
-
-    def _is_admin_page_simple(self, interaction: PayloadModel) -> bool:
-        return interaction.page and interaction.page.startswith("/admin/")
-
     def _load_sessions(self) -> dict:
         sessions = {}
         for entry in self.entries:
-            int_path = f"{self.src_dir}/{entry}/int.json"
-            raw = json.load(open(int_path))
+            with open(f"{self.src_dir}/{entry}/int.json") as f:
+                raw = json.load(f)
             ints = [PayloadModel(**i) for i in raw]
-            sessions[entry] = [i for i in ints if not self._is_admin_page_simple(i)]
+            sessions[entry] = [i for i in ints if not _is_admin(i.page)]
         return sessions
 
 class JointLoader:
-    """Loader for combined human (Kafka) and agent (direct) data without discrimination"""
-
     def __init__(self, human_dir: str, agent_dir: str):
-        self.human_dir = human_dir
-        self.agent_dir = agent_dir
         self.human_loader = Loader(human_dir)
         self.agent_loader = AgentLoader(agent_dir)
-        self.data = self._load_joint_sessions()
+        self.data = self._merge()
         self.entries = list(self.data.keys())
 
-    def _load_joint_sessions(self) -> dict:
-        sessions = {}
-        # load human sessions (unwrap from Kafka format to PayloadModel)
-        for sid, evts in self.human_loader.get_data().items():
-            sessions[f"human_{sid}"] = [evt.value.payload for evt in evts]
-        # load agent sessions (already PayloadModel)
-        for sid, evts in self.agent_loader.get_data().items():
-            sessions[f"agent_{sid}"] = evts
-        return sessions
+    def _merge(self) -> dict:
+        return {
+            **{f"human_{sid}": [e.value.payload for e in evts]
+               for sid, evts in self.human_loader.get_data().items()},
+            **{f"agent_{sid}": evts
+               for sid, evts in self.agent_loader.get_data().items()}
+        }
 
     def get_data(self) -> dict:
         return self.data
@@ -99,16 +87,11 @@ class JointLoader:
         return self.entries, len(self.entries)
 
 if __name__ == "__main__":
-    AGENT_DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
-    loader = AgentLoader(AGENT_DIR)
-    _, n = loader.get_entries()
-    print(f"Loaded {n} agent sessions from {AGENT_DIR}")
+    agent_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
+    human_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/"
 
-    HUMAN_DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/"
-    loader = Loader(HUMAN_DIR)
-    _, n = loader.get_entries()
-    print(f"Loaded {n} human sessions from {HUMAN_DIR}")
-
-    joint_loader = JointLoader(HUMAN_DIR, AGENT_DIR)
-    _, n = joint_loader.get_entries()
-    print(f"Loaded {n} total sessions (combined) from joint loader")
+    for name, cls, path in [("agent", AgentLoader, agent_dir),
+                             ("human", Loader, human_dir),
+                             ("joint", lambda d: JointLoader(human_dir, d), agent_dir)]:
+        ldr = cls(path) if name != "joint" else cls(agent_dir)
+        print(f"Loaded {len(ldr.get_entries()[0])} {name} sessions")

From e9cf5f07367e3ad85b94caaf038eb7a0e6f8d852 Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Tue, 13 Jan 2026 16:51:00 +0100
Subject: [PATCH 23/35] refactor models computations

---
 sim/rl/behavior_loader/models.py | 186 ++++++++++++-------------------
 1 file changed, 69 insertions(+), 117 deletions(-)

diff --git a/sim/rl/behavior_loader/models.py b/sim/rl/behavior_loader/models.py
index 46ac99d..84c2fe4 100644
--- a/sim/rl/behavior_loader/models.py
+++ b/sim/rl/behavior_loader/models.py
@@ -1,16 +1,12 @@
-from experiments.agents.base import Agent
 from loader import Loader, AgentLoader, JointLoader
 from collections import defaultdict
 from typing import Dict, List, Tuple, Set
 import numpy as np
 import graphviz
 
-DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/"
-AGENT_DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
-
 class BehaviorModel:
-    def __init__(self, src_dir: str = DIR):
-        self.loader = Loader(src_dir)
+    def __init__(self, src_dir: str, loader_cls=Loader):
+        self.loader = loader_cls(src_dir)
         self.data = self.loader.get_data()
         self.entries, self.num_entries = self.loader.get_entries()
         self.mdp = None
@@ -19,50 +15,48 @@ class BehaviorModel:
         p = evt.value.payload
         return f"{p.page or 'unk'}|{p.productId or 'none'}|{p.eventName}"
 
-    def _extract_sessions(self):
-        # transform raw events into sequential state trajectories per session
-        trajectories = []
-        for sid, evts in self.data.items():
-            if len(evts) < 2: continue
-            states = [self._state_repr(e) for e in sorted(evts, key=lambda x: x.timestamp)]
-            trajectories.append(states)
-        return trajectories
+    def _sort_key(self, evt):
+        return evt.timestamp
 
-    def _calc_transitions(self, trajectories: List[List[str]]) -> Tuple[Dict, Set]:
-        trans = defaultdict(lambda: defaultdict(int))
-        states = set()
-        for traj in trajectories:
-            for i in range(len(traj) - 1):
-                s, s_next = traj[i], traj[i+1]
+    def _extract_sessions(self) -> List[List[str]]:
+        trajs = []
+        for evts in self.data.values():
+            if len(evts) < 2: continue
+            states = [self._state_repr(e) for e in sorted(evts, key=self._sort_key)]
+            trajs.append(states)
+        return trajs
+
+    def _calc_transitions(self, trajs: List[List[str]]) -> Tuple[Dict, Set]:
+        trans, states = defaultdict(lambda: defaultdict(int)), set()
+        for traj in trajs:
+            for s, s_next in zip(traj, traj[1:]):
                 trans[s][s_next] += 1
                 states.update([s, s_next])
         return trans, states
 
-    def _calc_rewards(self, trajectories: List[List[str]]) -> Dict:
-        # reward based on session progression depth
+    def _calc_rewards(self, trajs: List[List[str]]) -> Dict:
         rwd = defaultdict(list)
-        for traj in trajectories:
+        for traj in trajs:
             n = len(traj)
             for i, s in enumerate(traj):
                 rwd[s].append(i / n)
         return rwd
 
-    def _normalize_trans(self, counts: Dict) -> Dict:
+    def _normalize_trans(self, cnts: Dict) -> Dict:
         return {s: {s_n: cnt/sum(nxt.values()) for s_n, cnt in nxt.items()}
-                for s, nxt in counts.items()}
+                for s, nxt in cnts.items()}
 
     def build_MDP(self) -> Dict:
         trajs = self._extract_sessions()
         trans_cnt, states = self._calc_transitions(trajs)
         trans_prob = self._normalize_trans(trans_cnt)
         state_rwd = self._calc_rewards(trajs)
-        state_val = {s: np.mean(r) for s, r in state_rwd.items()}
 
         self.mdp = {
-            'states': sorted(list(states)),
+            'states': sorted(states),
             'num_states': len(states),
             'transitions': trans_prob,
-            'state_values': state_val,
+            'state_values': {s: np.mean(r) for s, r in state_rwd.items()},
             'state_rewards': state_rwd,
             'trans_counts': trans_cnt,
         }
@@ -78,8 +72,7 @@ class BehaviorModel:
 
     def sample_traj(self, start: str, max_len: int = 50) -> List[str]:
         if not self.mdp: raise ValueError("build MDP first")
-        path = [start]
-        curr = start
+        path, curr = [start], start
         for _ in range(max_len):
             nxt = self.mdp['transitions'].get(curr, {})
             if not nxt: break
@@ -88,154 +81,113 @@ class BehaviorModel:
         return path
 
 class AgentBehaviorModel(BehaviorModel):
-    """behavior model for agent interaction data (simplified PayloadModel schema)"""
-
-    def __init__(self, src_dir: str = AGENT_DIR):
-        self.loader = AgentLoader(src_dir)
-        self.data = self.loader.get_data()
-        self.entries, self.num_entries = self.loader.get_entries()
-        self.mdp = None
+    def __init__(self, src_dir: str):
+        super().__init__(src_dir, AgentLoader)
 
     def _state_repr(self, evt) -> str:
-        # direct access to PayloadModel fields (no .value.payload nesting)
         return f"{evt.page or 'unk'}|{evt.productId or 'none'}|{evt.eventName}"
 
-    def _extract_sessions(self):
-        trajectories = []
-        for sid, evts in self.data.items():
-            if len(evts) < 2: continue
-            # sort by timestamp string (ISO format sorts lexicographically)
-            states = [self._state_repr(e) for e in sorted(evts, key=lambda x: x.ts)]
-            trajectories.append(states)
-        return trajectories
+    def _sort_key(self, evt):
+        return evt.ts
 
 class JointBehaviorModel(BehaviorModel):
-    """behavior model for combined human+agent data (flat PayloadModel distribution)"""
-
-    def __init__(self, human_dir: str = DIR, agent_dir: str = AGENT_DIR):
+    def __init__(self, human_dir: str, agent_dir: str):
         self.loader = JointLoader(human_dir, agent_dir)
         self.data = self.loader.get_data()
         self.entries, self.num_entries = self.loader.get_entries()
         self.mdp = None
 
     def _state_repr(self, evt) -> str:
-        # direct access to PayloadModel fields (JointLoader unwraps to PayloadModel)
         return f"{evt.page or 'unk'}|{evt.productId or 'none'}|{evt.eventName}"
 
-    def _extract_sessions(self):
-        trajectories = []
-        for sid, evts in self.data.items():
-            if len(evts) < 2: continue
-            # sort by timestamp string (ISO format sorts lexicographically)
-            states = [self._state_repr(e) for e in sorted(evts, key=lambda x: x.ts)]
-            trajectories.append(states)
-        return trajectories
+    def _sort_key(self, evt):
+        return evt.ts
 
 def aggregate_event_transitions(mdp: Dict) -> Dict[str, Dict[str, float]]:
-    """aggregate state transitions by event type and normalize"""
     evt_trans = defaultdict(lambda: defaultdict(float))
     for s, trans in mdp['transitions'].items():
-        evt_src = s.split('|')[2]
+        src = s.split('|')[2]
         for s_next, prob in trans.items():
-            evt_dst = s_next.split('|')[2]
-            evt_trans[evt_src][evt_dst] += prob
+            dst = s_next.split('|')[2]
+            evt_trans[src][dst] += prob
 
-    # normalize aggregated transitions
-    for evt_src in evt_trans:
-        total = sum(evt_trans[evt_src].values())
+    for src in evt_trans:
+        total = sum(evt_trans[src].values())
         if total > 0:
-            for evt_dst in evt_trans[evt_src]:
-                evt_trans[evt_src][evt_dst] /= total
+            evt_trans[src] = {dst: p/total for dst, p in evt_trans[src].items()}
     return dict(evt_trans)
 
-def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = "mdp_graph", fmt: str = "svg", view: bool = False, export_dot: bool = False):
-    """visualize MDP as directed graph using graphviz, aggregated by event type"""
+def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = "mdp_graph",
+                  fmt: str = "svg", view: bool = False, export_dot: bool = False):
     if not model.mdp: raise ValueError("build MDP first")
 
     evt_trans = aggregate_event_transitions(model.mdp)
-
     g = graphviz.Digraph(format=fmt)
     g.attr(rankdir='LR', size='30')
     g.attr('node', shape='circle', width='1', height='1')
 
-    # collect all event types
-    events = set(evt_trans.keys())
-    for trans in evt_trans.values():
-        events.update(trans.keys())
-
-    # add nodes for each event type
+    events = set(evt_trans.keys()) | {e for trans in evt_trans.values() for e in trans.keys()}
     for evt in events:
         g.node(evt)
 
-    # add edges above threshold
-    for evt_src in evt_trans:
-        for evt_dst, prob in evt_trans[evt_src].items():
+    for src, dsts in evt_trans.items():
+        for dst, prob in dsts.items():
             if prob > threshold:
-                g.edge(evt_src, evt_dst, label=f'{prob:.2f}')
+                g.edge(src, dst, label=f'{prob:.2f}')
 
     g.render(output, view=view, cleanup=True)
     print(f"Saved MDP graph to {output}.{fmt}")
 
     if export_dot:
-        dot_file = f"{output}.dot"
-        with open(dot_file, 'w') as f:
+        with open(f"{output}.dot", 'w') as f:
             f.write(g.source)
-        print(f"Exported DOT source to {dot_file}")
+        print(f"Exported DOT source to {output}.dot")
 
     return g
 
-
 def kl_divergence(p: Dict[str, float], q: Dict[str, float]) -> float:
-    """Compute KL divergence D_KL(P || Q) for discrete distributions P and Q."""
-    epsilon = 1e-10  # small constant to avoid log(0)
-    kl_div = 0.0
-    for key in p:
-        p_val = p[key] + epsilon
-        q_val = q.get(key, 0.0) + epsilon
-        kl_div += p_val * np.log(p_val / q_val)
-    return kl_div
+    eps = 1e-10
+    return sum((p[k] + eps) * np.log((p[k] + eps) / (q.get(k, 0.0) + eps)) for k in p)
 
 if __name__ == "__main__":
-    human_model = BehaviorModel(DIR)
+    base_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments"
+    human_dir, agent_dir = f"{base_dir}/collected_data/", f"{base_dir}/agents/collected_data/"
+
+    human_model = BehaviorModel(human_dir)
     human_mdp = human_model.build_MDP()
-    print(f"Built MDP: {human_mdp['num_states']} states, {sum(len(t) for t in human_mdp['transitions'].values())} transitions")
+    print(f"Built MDP: {human_mdp['num_states']} states, "
+          f"{sum(len(t) for t in human_mdp['transitions'].values())} transitions")
     if not human_mdp['states']:
-        print("No states found")
-        exit(1)
+        exit("No states found")
     visualize_mdp(human_model, threshold=0.05, output="human_mdp_viz", fmt="pdf", export_dot=True)
 
-    agent_model = AgentBehaviorModel()
+    agent_model = AgentBehaviorModel(agent_dir)
     agent_mdp = agent_model.build_MDP()
-    print(f"AGENT... Built MDP: {agent_mdp['num_states']} states, {sum(len(t) for t in agent_mdp['transitions'].values())} transitions")
+    print(f"AGENT... Built MDP: {agent_mdp['num_states']} states, "
+          f"{sum(len(t) for t in agent_mdp['transitions'].values())} transitions")
     if not agent_mdp['states']:
-        print("No states found")
-        exit(1)
+        exit("No states found")
     visualize_mdp(agent_model, threshold=0.05, output="agent_mdp_viz", fmt="pdf", export_dot=True)
 
-    # aggregate transitions by event type for both models
-    human_evt_trans = aggregate_event_transitions(human_mdp)
-    agent_evt_trans = aggregate_event_transitions(agent_mdp)
+    human_evt = aggregate_event_transitions(human_mdp)
+    agent_evt = aggregate_event_transitions(agent_mdp)
+    common = set(human_evt.keys()) & set(agent_evt.keys())
 
-    common_evts = set(human_evt_trans.keys()) & set(agent_evt_trans.keys())
-    if not common_evts: import sys; sys.exit("No common event types for KL divergence analysis")
+    if not common:
+        exit("No common event types for KL divergence analysis")
 
-    kl_divs = []
-    for evt in common_evts:
-        kl = kl_divergence(human_evt_trans[evt], agent_evt_trans[evt])
-        kl_divs.append((evt, kl))
+    kl_divs = sorted([(e, kl_divergence(human_evt[e], agent_evt[e])) for e in common],
+                     key=lambda x: x[1], reverse=True)
 
-    kl_divs.sort(key=lambda x: x[1], reverse=True)
-    avg_kl = np.mean([kl for _, kl in kl_divs])
-
-    print(f"Average KL divergence: {avg_kl:.4f}")
-    print(f"\nMost divergent event types:")
+    print(f"Average KL divergence: {np.mean([kl for _, kl in kl_divs]):.4f}")
+    print("\nMost divergent event types:")
     for evt, kl in kl_divs:
         print(f"  {evt}: {kl:.4f}")
 
-    # build joint model (combined distribution)
     print("\n=== Joint Model (Human + Agent Combined) ===")
-    joint_model = JointBehaviorModel()
+    joint_model = JointBehaviorModel(human_dir, agent_dir)
     joint_mdp = joint_model.build_MDP()
-    print(f"Built joint MDP: {joint_mdp['num_states']} states, {sum(len(t) for t in joint_mdp['transitions'].values())} transitions")
+    print(f"Built joint MDP: {joint_mdp['num_states']} states, "
+          f"{sum(len(t) for t in joint_mdp['transitions'].values())} transitions")
     if joint_mdp['states']:
         visualize_mdp(joint_model, threshold=0.05, output="joint_mdp_viz", fmt="pdf", export_dot=True)

From 0ce12fbc3beb086f627b1269e470170f76c319d2 Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Tue, 13 Jan 2026 19:50:36 +0100
Subject: [PATCH 24/35] chore: ignores

---
 .gitignore | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/.gitignore b/.gitignore
index 9db7742..ef6746f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,18 +5,22 @@
 **/.virtual_documents/
 **/session_*.svg
 **/*graph.svg
-paper/src/bib/auto
+**/auto/*.el
+*.old
+**/package-lock.json
+**/*.parquet
 
-# Airflow logs - exclude DAG run logs
+paper/src/auto/*
+paper/src/bib/auto
+docs/goals/*.md
+PHANTOM.wiki/
 experiments/airflow/logs/*
 experiments/airflow/logs/scheduler/
 experiments/airflow/logs/dag_processor_manager/
-experiments/collected_data/*
-
-paper/src/auto/*
-lib/
-docs/goals/*.md
-PHANTOM.wiki/
+experiments/collected_data/
+experiments/agents/collected_data/
+sim/rl/behavior_loader/*.dot
+sim/rl/behavior_loader/*.png
+sim/rl/behavior_loader/*.svg
+sim/rl/behavior_loader/*.pdf
 tests/e2e/node_modules/**
-**/auto/*.el
-*.old

From 7b2d80ac4c96f4583028e4a049265be459662a72 Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Tue, 20 Jan 2026 21:00:47 +0100
Subject: [PATCH 25/35] feat: wip contaminator

---
 experiments/procesing/contaminator.py | 44 +++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 experiments/procesing/contaminator.py

diff --git a/experiments/procesing/contaminator.py b/experiments/procesing/contaminator.py
new file mode 100644
index 0000000..0a3651d
--- /dev/null
+++ b/experiments/procesing/contaminator.py
@@ -0,0 +1,44 @@
+import pandas as pd
+import random
+from sim.rl.behavior_loader import AgentBehaviorModel
+
+base_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments"
+human_dir, agent_dir = f"{base_dir}/collected_data/", f"{base_dir}/agents/collected_data/"
+
+
+
+def remap_schema(df : pd.DataFrame, mapping: dict, on: str = "event_type"):
+    df = df.copy()
+    df[on] = df[on].map(mapping).fillna(df[on])
+    return df
+
+
+def contaminate_dataset(df : pd.DataFrame, on : str = "event_type",
+                        contamination_rate: float = 0.1) -> pd.DataFrame:
+    model = AgentBehaviorModel(agent_dir)
+    target_df_schema = df[on].unique().tolist()
+    mapping = {
+        'view': 'view_page'
+        # TODO: define properly for the given dataset
+    }
+    OG_event_distribution = df[on].value_counts(normalize=True).to_dict()
+    # normalize to weights
+    OG_event_distribution = {k: v / sum(OG_event_distribution.values()) for k, v in OG_event_distribution.items()}
+    mapped_df = remap_schema(df, mapping, on=on)
+    N = len(df)
+    N_final = N / (1 - contamination_rate) # TODO: explain this in paper
+    N_contaminate = int(N_final - N)
+    start_event_types = random.choices(list(OG_event_distribution.keys()),
+                                    weights=list(OG_event_distribution.values()), k=N_contaminate)
+    # it makes sense
+    new_trajectories = []
+    for start_event in start_event_types:
+        # sample from og start
+        start = None # TODO: defin start accoding to dataset (randomly sample with weights of event distr)
+        trajectory = model.sample_trajectory(start) # TODO: explain this method in paper
+        new_trajectories.extend(trajectory)
+
+    # TODO: make sure the new trajctories schema conforms with dataset
+    contaminate_df = pd.DataFrame(new_trajectories)
+    df = pd.concat([df, contaminate_df], ignore_index=True)
+    return df

From b2f0746c01585a4fc6189feed7b0244be4d5be3b Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Wed, 21 Jan 2026 11:11:49 +0100
Subject: [PATCH 26/35] chore: extra commenting

---
 experiments/procesing/contaminator.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/experiments/procesing/contaminator.py b/experiments/procesing/contaminator.py
index 0a3651d..da44c3d 100644
--- a/experiments/procesing/contaminator.py
+++ b/experiments/procesing/contaminator.py
@@ -1,9 +1,9 @@
 import pandas as pd
 import random
-from sim.rl.behavior_loader import AgentBehaviorModel
+from sim.rl.behavior_loader import AgentBehaviorModel # TODO: proper import this
 
 base_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments"
-human_dir, agent_dir = f"{base_dir}/collected_data/", f"{base_dir}/agents/collected_data/"
+agent_dir = f"{base_dir}/agents/collected_data/"
 
 
 
@@ -21,6 +21,7 @@ def contaminate_dataset(df : pd.DataFrame, on : str = "event_type",
         'view': 'view_page'
         # TODO: define properly for the given dataset
     }
+    # think about replacing with freqdist method from library
     OG_event_distribution = df[on].value_counts(normalize=True).to_dict()
     # normalize to weights
     OG_event_distribution = {k: v / sum(OG_event_distribution.values()) for k, v in OG_event_distribution.items()}

From 04907df393149c61ff3efd3ffcdfa37bed2d8db5 Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Wed, 21 Jan 2026 11:27:03 +0100
Subject: [PATCH 27/35] feat: weak train scaffold

---
 experiments/ml/arch.py       | 117 +++--------------------------------
 experiments/ml/weak.train.py |  30 +++++++++
 2 files changed, 39 insertions(+), 108 deletions(-)
 create mode 100644 experiments/ml/weak.train.py

diff --git a/experiments/ml/arch.py b/experiments/ml/arch.py
index 4f36e18..a187959 100644
--- a/experiments/ml/arch.py
+++ b/experiments/ml/arch.py
@@ -12,111 +12,12 @@ TASK = 'classification'
 LABELS = ['human', 'agent']
 
 
-class BaseAgentClassifier(BaseEstimator, ClassifierMixin, ABC):
-    """Base class for tree-based agent detection classifiers with common logic"""
-
-    def __init__(self, context: Optional[PipelineContext] = None, n_estimators: int = 200,
-                 max_depth: int = 6, learning_rate: float = 0.05,
-                 early_stopping_rounds: int = 20):
-        self.context = context
-        self.n_estimators = n_estimators
-        self.max_depth = max_depth
-        self.learning_rate = learning_rate
-        self.early_stopping_rounds = early_stopping_rounds
-        self.model_ = None
-        self.feature_names_ = None
-
-    def _to_array(self, X):
-        """Convert pandas structures to numpy arrays"""
-        return X.values if isinstance(X, (pd.DataFrame, pd.Series)) else X
-
-    def _compute_pos_weight(self, y_arr):
-        """Calculate scale_pos_weight for class imbalance handling"""
-        n_neg, n_pos = (y_arr == 0).sum(), (y_arr == 1).sum()
-        return n_neg / n_pos if n_pos > 0 else 1.0
-
-    def _prepare_eval_set(self, eval_set):
-        """Convert eval_set to numpy arrays if needed"""
-        if not eval_set:
-            return None
-        X_val, y_val = eval_set[0]
-        return [(self._to_array(X_val), self._to_array(y_val))]
-
-    @abstractmethod
-    def _build_model(self, scale_pos: float):
-        """Build the underlying model instance (must be implemented by subclasses)"""
-        pass
-
-    @abstractmethod
-    def _fit_with_eval(self, X_arr, y_arr, eval_arr):
-        """Fit model with evaluation set (must be implemented by subclasses)"""
-        pass
-
-    def fit(self, X, y, eval_set=None):
-        X_arr, y_arr = self._to_array(X), self._to_array(y)
-
-        if isinstance(X, pd.DataFrame):
-            self.feature_names_ = X.columns.tolist()
-
-        scale_pos = self._compute_pos_weight(y_arr)
-        self.model_ = self._build_model(scale_pos)
-
-        eval_arr = self._prepare_eval_set(eval_set)
-        if eval_arr:
-            self._fit_with_eval(X_arr, y_arr, eval_arr)
-        else:
-            self.model_.fit(X_arr, y_arr)
-
-        return self
-
-    def predict(self, X):
-        return self.model_.predict(self._to_array(X))
-
-    def predict_proba(self, X):
-        return self.model_.predict_proba(self._to_array(X))
-
-    @property
-    def feature_importances_(self):
-        return self.model_.feature_importances_ if self.model_ else None
-
-
-class XGBoostAgentClassifier(BaseAgentClassifier):
-    """XGBoost binary classifier for agent detection with class imbalance handling"""
-
-    def _build_model(self, scale_pos: float):
-        return xgb.XGBClassifier(
-            n_estimators=self.n_estimators,
-            max_depth=self.max_depth,
-            learning_rate=self.learning_rate,
-            scale_pos_weight=scale_pos,
-            eval_metric='auc',
-            early_stopping_rounds=self.early_stopping_rounds,
-            random_state=42,
-            tree_method='hist',
-            enable_categorical=False
-        )
-
-    def _fit_with_eval(self, X_arr, y_arr, eval_arr):
-        self.model_.fit(X_arr, y_arr, eval_set=eval_arr, verbose=False)
-
-
-class LightGBMAgentClassifier(BaseAgentClassifier):
-    """LightGBM binary classifier for agent detection with class imbalance handling"""
-
-    def _build_model(self, scale_pos: float):
-        return lgb.LGBMClassifier(
-            n_estimators=self.n_estimators,
-            max_depth=self.max_depth,
-            learning_rate=self.learning_rate,
-            scale_pos_weight=scale_pos,
-            metric='auc',
-            random_state=42,
-            verbosity=-1
-        )
-
-    def _fit_with_eval(self, X_arr, y_arr, eval_arr):
-        self.model_.fit(
-            X_arr, y_arr,
-            eval_set=eval_arr,
-            callbacks=[lgb.early_stopping(self.early_stopping_rounds, verbose=False)]
-        )
+class WeakClassifier(BaseEstimator, ClassifierMixin, ABC):
+    # a simple contrastive machine learning model
+    # this model should learn to distinguish between human and agent behavior
+    # using a weakly supervised approach and contrastive learning + augmentation
+    #
+    def __init__(self, **kwargs):
+        super().__init__()
+        self.model = None
+        self.kwargs = kwargs
diff --git a/experiments/ml/weak.train.py b/experiments/ml/weak.train.py
new file mode 100644
index 0000000..36e11ee
--- /dev/null
+++ b/experiments/ml/weak.train.py
@@ -0,0 +1,30 @@
+from sim.rl.behavior_loader.loader import AgentLoader, Loader, JointLoader
+from sim.rl.behavior_loader.loader import PayloadModel
+from arch import WeakClassifier
+
+agent_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
+human_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/"
+
+def augment_trajectory(trajectory : list[PayloadModel], augmentation_rate: float = 0.1) -> list[PayloadModel]:
+    # augmentations possible:
+    # return a sub-trajectory window of the original trajectory
+    # insert random noise events
+    # shuffle a few events (find a few indices and swap them with i+1 neighbor)
+    # adjust metadata
+    return trajectory
+
+
+def train():
+    pass
+
+
+
+if __name__ == "__main__":
+    joint_loader = JointLoader(human_dir, agent_dir)
+    data = joint_loader.get_data()
+    entries, num_entries = joint_loader.get_entries()
+    print(f"Loaded {num_entries} entries")
+    # TODO: augment
+    # fit model
+    model = WeakClassifier()
+    model.fit(data)

From b05b510f7098778c84ea84636f7958ded3e558d3 Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Wed, 21 Jan 2026 14:05:30 +0100
Subject: [PATCH 28/35] strong dataset gathering

---
 sim/strong_learner/data.py | 99 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 99 insertions(+)
 create mode 100644 sim/strong_learner/data.py

diff --git a/sim/strong_learner/data.py b/sim/strong_learner/data.py
new file mode 100644
index 0000000..80129aa
--- /dev/null
+++ b/sim/strong_learner/data.py
@@ -0,0 +1,99 @@
+import os, requests, py7zr
+import pandas as pd
+from typing import Generator
+try:
+    from sim.rl.behavior_loader.loader import PayloadModel, ValueModel, InteractionModel, Loader
+except ImportError:
+    from loader import PayloadModel, ValueModel, InteractionModel, Loader
+
+class YooChooseLoader(Loader):
+    URL = "https://s3-eu-west-1.amazonaws.com/yc-rdata/yoochoose-data.7z"
+    CLICK_COLS = ['session_id', 'ts', 'item_id', 'category']
+    BUY_COLS = ['session_id', 'ts', 'item_id', 'price', 'quantity']
+
+    def __init__(self, root_dir: str = "data/yoochoose", chunk_size: int = 500_000, max_sessions: int = 1000):
+        self.root = root_dir
+        self.chunk_size = chunk_size
+        self.max_sessions = max_sessions
+        self.click_path = f"{root_dir}/yoochoose-clicks.dat"
+        self.buy_path = f"{root_dir}/yoochoose-buys.dat"
+        if not os.path.exists(self.click_path): self._setup()
+        self.data = self._load_sessions(max_sessions)
+        self.entries = list(self.data.keys())
+
+    def _setup(self):
+        os.makedirs(self.root, exist_ok=True)
+        zip_path = f"{self.root}/temp.7z"
+        with requests.get(self.URL, stream=True) as r:
+            with open(zip_path, 'wb') as f:
+                for chunk in r.iter_content(8192): f.write(chunk)
+        with py7zr.SevenZipFile(zip_path, 'r') as z: z.extractall(self.root)
+        os.remove(zip_path)
+
+    def _make_interaction(self, sid: str, ts: str, item_id: str, event: str, page: str, meta: dict) -> InteractionModel:
+        payload = PayloadModel(
+            sessionId=sid, experimentId=None, eventName=event,
+            page=page, productId=item_id, metadata=meta,
+            storeMode="yoochoose", userAgent="dataset", ts=ts
+        )
+        return InteractionModel(
+            partitionID=0, offset=0, timestamp=0, compression="",
+            isTransactional=False, headers=[], key={},
+            value=ValueModel(payload=payload, encoding="json", isPayloadNull=False, schemaId=1, size=0)
+        )
+
+    def _parse_category(self, cat) -> str:
+        if pd.isna(cat) or cat == "0": return "unknown"
+        if cat == "S": return "special_offer"
+        try:
+            n = int(cat)
+            return f"category_{n}" if 1 <= n <= 12 else f"brand_{n}"
+        except: return str(cat)
+
+    def stream_clicks(self) -> Generator[InteractionModel, None, None]:
+        with pd.read_csv(self.click_path, names=self.CLICK_COLS, chunksize=self.chunk_size, header=None) as reader:
+            for chunk in reader:
+                for r in chunk.itertuples(index=False):
+                    yield self._make_interaction(
+                        str(r.session_id), r.ts, str(r.item_id),
+                        "view_item_page", self._parse_category(r.category), {}
+                    )
+
+    def stream_buys(self) -> Generator[InteractionModel, None, None]:
+        with pd.read_csv(self.buy_path, names=self.BUY_COLS, chunksize=self.chunk_size, header=None) as reader:
+            for chunk in reader:
+                for r in chunk.itertuples(index=False):
+                    yield self._make_interaction(
+                        str(r.session_id), r.ts, str(r.item_id),
+                        "purchase_complete", "/checkout", {"price": r.price, "quantity": r.quantity}
+                    )
+
+    def stream(self) -> Generator[InteractionModel, None, None]:
+        yield from self.stream_clicks()
+        yield from self.stream_buys()
+
+    def _load_sessions(self, max_sessions: int | None = None) -> dict:
+        sessions = {}
+        for interaction in self.stream():
+            sid = interaction.value.payload.sessionId
+            if sid not in sessions:
+                if max_sessions and len(sessions) >= max_sessions: continue
+                sessions[sid] = []
+            sessions[sid].append(interaction)
+        for sid in sessions: sessions[sid].sort(key=lambda x: x.value.payload.ts)
+        return sessions
+
+    def get_data(self) -> dict:
+        return self.data
+
+    def get_entries(self) -> tuple[list[str], int]:
+        return self.entries, len(self.entries)
+
+if __name__ == "__main__":
+    loader = YooChooseLoader(max_sessions=100)
+    views, purchases = 0, 0
+    for sid, evts in loader.get_data().items():
+        for e in evts:
+            if e.value.payload.eventName == "view_item_page": views += 1
+            elif e.value.payload.eventName == "purchase_complete": purchases += 1
+    print(f"Loaded {len(loader.entries)} sessions: {views} view_item_page, {purchases} purchase_complete")

From 440371dba40e39ce5159cb1edc21a899fcd0740b Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Wed, 21 Jan 2026 14:05:39 +0100
Subject: [PATCH 29/35] feat: initial feature engineering of trajectories

---
 sim/rl/behavior_loader/models.py | 49 +++++++++++++++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/sim/rl/behavior_loader/models.py b/sim/rl/behavior_loader/models.py
index 84c2fe4..4c6bf21 100644
--- a/sim/rl/behavior_loader/models.py
+++ b/sim/rl/behavior_loader/models.py
@@ -1,4 +1,7 @@
-from loader import Loader, AgentLoader, JointLoader
+try:
+    from loader import Loader, AgentLoader, JointLoader
+except ImportError:
+    from sim.rl.behavior_loader.loader import Loader, AgentLoader, JointLoader
 from collections import defaultdict
 from typing import Dict, List, Tuple, Set
 import numpy as np
@@ -80,6 +83,50 @@ class BehaviorModel:
             path.append(curr)
         return path
 
+    def extract_trajectory_features(self, events: List, max_trans_dim: int = 50) -> np.ndarray:
+        """Convert trajectory to feature vector using MDP structure for contrastive learning"""
+        if not self.mdp:
+            self.build_MDP()
+
+        states = [self._state_repr(e) for e in sorted(events, key=self._sort_key)]
+        features = []
+
+        # transition histogram over MDP state space
+        trans_counts = defaultdict(int)
+        for s, s_next in zip(states, states[1:]):
+            trans_counts[(s, s_next)] += 1
+        all_trans = [(s, t) for s in self.mdp['states'] for t in self.mdp['transitions'].get(s, {}).keys()]
+        trans_vec = [trans_counts.get(tr, 0) for tr in all_trans[:max_trans_dim]]
+        trans_vec = trans_vec + [0] * (max_trans_dim - len(trans_vec))  # pad
+        total_trans = sum(trans_counts.values()) or 1
+        features.extend([v / total_trans for v in trans_vec])
+
+        # state coverage ratio
+        visited = set(states)
+        features.append(len(visited) / max(self.mdp['num_states'], 1))
+
+        # temporal entropy of transitions
+        if len(states) > 1:
+            trans_probs = [self.transition_prob(s, s_n) for s, s_n in zip(states, states[1:])]
+            entropy = -sum(p * np.log(p + 1e-10) for p in trans_probs if p > 0)
+            features.append(entropy / max(len(states), 1))
+        else:
+            features.append(0.0)
+
+        # trajectory length and unique state count
+        features.append(len(states))
+        features.append(len(visited))
+
+        # state value statistics along trajectory
+        vals = [self.state_value(s) for s in states]
+        if vals:
+            features.extend([np.mean(vals), np.std(vals), np.min(vals), np.max(vals)])
+        else:
+            features.extend([0.0, 0.0, 0.0, 0.0])
+
+        return np.array(features, dtype=np.float32)
+
+
 class AgentBehaviorModel(BehaviorModel):
     def __init__(self, src_dir: str):
         super().__init__(src_dir, AgentLoader)

From 00e3eff2fadbc4b6153220971c68729464b8b46b Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Wed, 21 Jan 2026 18:22:31 +0100
Subject: [PATCH 30/35] migrating weak learning

---
 experiments/ml/weak.train.py |  30 -----
 experiments/ml/weak_train.py | 246 +++++++++++++++++++++++++++++++++++
 2 files changed, 246 insertions(+), 30 deletions(-)
 delete mode 100644 experiments/ml/weak.train.py
 create mode 100644 experiments/ml/weak_train.py

diff --git a/experiments/ml/weak.train.py b/experiments/ml/weak.train.py
deleted file mode 100644
index 36e11ee..0000000
--- a/experiments/ml/weak.train.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from sim.rl.behavior_loader.loader import AgentLoader, Loader, JointLoader
-from sim.rl.behavior_loader.loader import PayloadModel
-from arch import WeakClassifier
-
-agent_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
-human_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/"
-
-def augment_trajectory(trajectory : list[PayloadModel], augmentation_rate: float = 0.1) -> list[PayloadModel]:
-    # augmentations possible:
-    # return a sub-trajectory window of the original trajectory
-    # insert random noise events
-    # shuffle a few events (find a few indices and swap them with i+1 neighbor)
-    # adjust metadata
-    return trajectory
-
-
-def train():
-    pass
-
-
-
-if __name__ == "__main__":
-    joint_loader = JointLoader(human_dir, agent_dir)
-    data = joint_loader.get_data()
-    entries, num_entries = joint_loader.get_entries()
-    print(f"Loaded {num_entries} entries")
-    # TODO: augment
-    # fit model
-    model = WeakClassifier()
-    model.fit(data)
diff --git a/experiments/ml/weak_train.py b/experiments/ml/weak_train.py
new file mode 100644
index 0000000..eb87a9c
--- /dev/null
+++ b/experiments/ml/weak_train.py
@@ -0,0 +1,246 @@
+import sys
+sys.path.insert(0, "/home/velocitatem/Documents/Projects/PHANTOM/sim/rl/behavior_loader")
+sys.path.insert(0, "/home/velocitatem/Documents/Projects/PHANTOM/experiments/ml")
+
+from sim.rl.behavior_loader.loader import AgentLoader, Loader, JointLoader, PayloadModel
+from sim.rl.behavior_loader.models import JointBehaviorModel
+from arch import ContrastiveWeakClassifier, contrastive_loss, featurize_trajectory
+from typing import List, Optional, Dict
+from datetime import datetime, timedelta
+from copy import deepcopy
+import numpy as np
+import random
+import torch
+from torch.utils.data import Dataset, DataLoader
+from torch.optim import Adam
+from torch.utils.tensorboard import SummaryWriter
+
+RUNS_DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/ml/runs"
+agent_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
+human_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/"
+
+
+def _perturb_ts(evt: PayloadModel, jitter_ms: int = 500) -> PayloadModel:
+    """Add random jitter to event timestamp"""
+    new_evt = deepcopy(evt)
+    try:
+        ts = datetime.fromisoformat(evt.ts.replace('Z', '+00:00'))
+        delta = timedelta(milliseconds=random.randint(-jitter_ms, jitter_ms))
+        new_evt.ts = (ts + delta).isoformat()
+    except:
+        pass
+    return new_evt
+
+
+def augment_trajectory(trajectory: List[PayloadModel], rate: float = 0.1) -> List[PayloadModel]:
+    """Apply random augmentation to trajectory for contrastive learning"""
+    if len(trajectory) < 2:
+        return trajectory
+
+    aug_type = random.choice(['window', 'shuffle', 'noise', 'drop'])
+
+    if aug_type == 'window':  # random contiguous sub-sequence (70-100% length)
+        min_len = max(2, int(len(trajectory) * 0.7))
+        sub_len = random.randint(min_len, len(trajectory))
+        start = random.randint(0, len(trajectory) - sub_len)
+        return trajectory[start:start + sub_len]
+
+    elif aug_type == 'shuffle':  # swap adjacent pairs with probability rate
+        result = list(trajectory)
+        for i in range(len(result) - 1):
+            if random.random() < rate:
+                result[i], result[i + 1] = result[i + 1], result[i]
+        return result
+
+    elif aug_type == 'drop':  # drop events with probability rate
+        result = [e for e in trajectory if random.random() > rate]
+        return result if len(result) >= 2 else trajectory[:2]
+
+    elif aug_type == 'noise':  # perturb timestamps
+        return [_perturb_ts(e, jitter_ms=500) for e in trajectory]
+
+    return trajectory
+
+
+class TripletDataset(Dataset):
+    """Generate (anchor, positive, negative) triplets on-the-fly with augmentation"""
+    def __init__(self, data: Dict[str, List[PayloadModel]], mdp: Optional[Dict], augment_fn, input_dim: int = 64, multiplier: int = 10):
+        self.sessions = list(data.items())
+        self.human_ids = [i for i, (sid, _) in enumerate(self.sessions) if sid.startswith('human_')]
+        self.agent_ids = [i for i, (sid, _) in enumerate(self.sessions) if sid.startswith('agent_')]
+        self.mdp = mdp
+        self.augment = augment_fn
+        self.input_dim = input_dim
+        self.multiplier = multiplier
+
+        if not self.human_ids or not self.agent_ids:
+            raise ValueError(f"Need both human ({len(self.human_ids)}) and agent ({len(self.agent_ids)}) sessions")
+
+    def __len__(self) -> int:
+        return len(self.sessions) * self.multiplier
+
+    def __getitem__(self, idx: int):
+        anchor_idx = idx % len(self.sessions)
+        sid, events = self.sessions[anchor_idx]
+        is_human = sid.startswith('human_')
+
+        anchor = featurize_trajectory(events, self.mdp, self.input_dim)
+        positive = featurize_trajectory(self.augment(events), self.mdp, self.input_dim)
+
+        neg_pool = self.agent_ids if is_human else self.human_ids
+        neg_idx = random.choice(neg_pool)
+        negative = featurize_trajectory(self.sessions[neg_idx][1], self.mdp, self.input_dim)
+
+        label = 0 if is_human else 1  # 0=human, 1=agent
+        return (torch.tensor(anchor, dtype=torch.float32),
+                torch.tensor(positive, dtype=torch.float32),
+                torch.tensor(negative, dtype=torch.float32),
+                torch.tensor(label, dtype=torch.long))
+
+
+def train(epochs: int = 100, lr: float = 1e-3, batch_size: int = 4, input_dim: int = 64,
+          embed_dim: int = 32, margin: float = 0.3, verbose: bool = True, run_name: str = None):
+    """Train contrastive weak classifier on human/agent trajectories"""
+    joint = JointLoader(human_dir, agent_dir)
+    data = joint.get_data()
+    if verbose:
+        print(f"Loaded {len(data)} sessions")
+
+    joint_model = JointBehaviorModel(human_dir, agent_dir)
+    ref_mdp = joint_model.build_MDP()
+
+    dataset = TripletDataset(data, ref_mdp, augment_trajectory, input_dim=input_dim)
+    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)
+
+    model = ContrastiveWeakClassifier(input_dim=input_dim, embed_dim=embed_dim, margin=margin)
+    model.to_device()
+
+    run_name = run_name or f"d{input_dim}_e{embed_dim}_lr{lr}_m{margin}_{datetime.now():%Y%m%d_%H%M%S}"
+    writer = SummaryWriter(f"{RUNS_DIR}/train/{run_name}")
+
+    optimizer = Adam(list(model.encoder.parameters()) + list(model.classifier.parameters()), lr=lr)
+    ce_loss_fn = torch.nn.CrossEntropyLoss()
+
+    best_loss = float('inf')
+    for epoch in range(epochs):
+        model.encoder.train()
+        model.classifier.train()
+        total_loss, n_batches = 0.0, 0
+
+        for anchor, positive, negative, labels in loader:
+            anchor, positive, negative, labels = [t.to(model.device) for t in [anchor, positive, negative, labels]]
+            z_a, z_p, z_n = [model.encoder(t.unsqueeze(1)) for t in [anchor, positive, negative]]
+
+            trip_loss = contrastive_loss(z_a, z_p, z_n, margin=model.margin)
+            ce = ce_loss_fn(model.classifier(z_a), labels)
+            loss = trip_loss + 0.5 * ce
+
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+            total_loss += loss.item()
+            n_batches += 1
+
+        avg_loss = total_loss / max(n_batches, 1)
+        writer.add_scalar('loss', avg_loss, epoch)
+
+        if verbose and (epoch + 1) % 10 == 0:
+            print(f"Epoch {epoch+1}/{epochs}: loss={avg_loss:.4f}")
+        if avg_loss < best_loss:
+            best_loss = avg_loss
+
+    writer.close()
+    if verbose:
+        print(f"Done. Best={best_loss:.4f} TB:{RUNS_DIR}/train/{run_name}")
+
+    return model, ref_mdp
+
+
+def evaluate_loocv(input_dim: int = 64, embed_dim: int = 32, epochs_per_fold: int = 50,
+                   lr: float = 1e-3, margin: float = 0.3, run_name: str = None):
+    """Leave-one-out cross-validation given limited samples"""
+    joint = JointLoader(human_dir, agent_dir)
+    data = joint.get_data()
+    session_ids = list(data.keys())
+
+    joint_model = JointBehaviorModel(human_dir, agent_dir)
+    ref_mdp = joint_model.build_MDP()
+
+    run_name = run_name or f"loocv_d{input_dim}_e{embed_dim}_m{margin}_{datetime.now():%Y%m%d_%H%M%S}"
+    writer = SummaryWriter(f"{RUNS_DIR}/eval/{run_name}")
+
+    predictions, actuals = [], []
+
+    for fold_idx, test_sid in enumerate(session_ids):
+        train_data = {k: v for k, v in data.items() if k != test_sid}
+        test_events = data[test_sid]
+        test_label = 0 if test_sid.startswith('human_') else 1
+
+        n_human = sum(1 for k in train_data if k.startswith('human_'))
+        n_agent = sum(1 for k in train_data if k.startswith('agent_'))
+        if n_human == 0 or n_agent == 0:
+            continue
+
+        try:
+            dataset = TripletDataset(train_data, ref_mdp, augment_trajectory, input_dim=input_dim, multiplier=5)
+            loader = DataLoader(dataset, batch_size=2, shuffle=True, drop_last=True)
+
+            model = ContrastiveWeakClassifier(input_dim=input_dim, embed_dim=embed_dim, margin=margin)
+            model.to_device()
+            optimizer = Adam(list(model.encoder.parameters()) + list(model.classifier.parameters()), lr=lr)
+
+            model.encoder.train()
+            model.classifier.train()
+            for _ in range(epochs_per_fold):
+                for anchor, positive, negative, labels in loader:
+                    z_a, z_p, z_n = [model.encoder(t.unsqueeze(1).to(model.device)) for t in [anchor, positive, negative]]
+                    loss = contrastive_loss(z_a, z_p, z_n, margin=margin)
+                    optimizer.zero_grad()
+                    loss.backward()
+                    optimizer.step()
+
+            test_feat = featurize_trajectory(test_events, ref_mdp, input_dim)
+            pred = model.predict(test_feat.reshape(1, -1))[0]
+            predictions.append(pred)
+            actuals.append(test_label)
+            print(f"  {test_sid[:12]}...: pred={pred}, actual={test_label}, {'OK' if pred == test_label else 'MISS'}")
+
+        except Exception as e:
+            print(f"Error: {e}")
+
+    if predictions:
+        acc = sum(p == a for p, a in zip(predictions, actuals)) / len(predictions)
+        tp = sum(1 for p, a in zip(predictions, actuals) if p == 1 and a == 1)
+        fp = sum(1 for p, a in zip(predictions, actuals) if p == 1 and a == 0)
+        fn = sum(1 for p, a in zip(predictions, actuals) if p == 0 and a == 1)
+        prec, rec = tp / max(tp + fp, 1), tp / max(tp + fn, 1)
+        f1 = 2 * prec * rec / max(prec + rec, 1e-10)
+        writer.add_scalar('accuracy', acc, 0)
+        writer.add_scalar('f1', f1, 0)
+        writer.add_scalar('precision', prec, 0)
+        writer.add_scalar('recall', rec, 0)
+        writer.close()
+        print(f"\nAccuracy: {acc:.2%} F1: {f1:.3f} TB:{RUNS_DIR}/eval/{run_name}")
+        return acc, predictions, actuals
+    writer.close()
+    return 0.0, [], []
+
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--mode', choices=['train', 'eval'], default='train')
+    parser.add_argument('--epochs', type=int, default=100)
+    parser.add_argument('--lr', type=float, default=1e-3)
+    parser.add_argument('--margin', type=float, default=0.3)
+    parser.add_argument('--input-dim', type=int, default=64)
+    parser.add_argument('--embed-dim', type=int, default=32)
+    parser.add_argument('--run-name', type=str, default=None)
+    args = parser.parse_args()
+
+    if args.mode == 'train':
+        model, mdp = train(epochs=args.epochs, lr=args.lr, input_dim=args.input_dim,
+                           embed_dim=args.embed_dim, margin=args.margin, run_name=args.run_name)
+    else:
+        evaluate_loocv(input_dim=args.input_dim, embed_dim=args.embed_dim, epochs_per_fold=args.epochs,
+                       lr=args.lr, margin=args.margin, run_name=args.run_name)

From ccc19f349385511e3b0e9f0cb3a9290c11095bb3 Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Wed, 21 Jan 2026 18:22:39 +0100
Subject: [PATCH 31/35] acapting some architectures

---
 experiments/ml/__init__.py |  16 ++-
 experiments/ml/arch.py     | 242 +++++++++++++++++++++++++++++++++++--
 2 files changed, 247 insertions(+), 11 deletions(-)

diff --git a/experiments/ml/__init__.py b/experiments/ml/__init__.py
index 11b65df..c97eaa9 100644
--- a/experiments/ml/__init__.py
+++ b/experiments/ml/__init__.py
@@ -1,11 +1,21 @@
 from .evals import evaluate
 from .arch import (
     XGBoostAgentClassifier,
-    LightGBMAgentClassifier
+    LightGBMAgentClassifier,
+    ContrastiveWeakClassifier,
+    TrajectoryEncoder,
+    WeakClassifier,
+    contrastive_loss,
+    featurize_trajectory,
 )
 
-__all__ =[
+__all__ = [
     'evaluate',
     'XGBoostAgentClassifier',
-    'LightGBMAgentClassifier'
+    'LightGBMAgentClassifier',
+    'ContrastiveWeakClassifier',
+    'TrajectoryEncoder',
+    'WeakClassifier',
+    'contrastive_loss',
+    'featurize_trajectory',
 ]
diff --git a/experiments/ml/arch.py b/experiments/ml/arch.py
index a187959..4ceb2e0 100644
--- a/experiments/ml/arch.py
+++ b/experiments/ml/arch.py
@@ -1,23 +1,249 @@
 # sklearn compatible models for agent detection
 from sklearn.base import BaseEstimator, ClassifierMixin
-from procesing.context import PipelineContext
-from typing import Any, Optional, Tuple
+from typing import Any, Optional, Tuple, Dict, List
 from abc import ABC, abstractmethod
-import xgboost as xgb
-import lightgbm as lgb
+from collections import defaultdict
 import numpy as np
 import pandas as pd
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
 
 TASK = 'classification'
 LABELS = ['human', 'agent']
 
 
 class WeakClassifier(BaseEstimator, ClassifierMixin, ABC):
-    # a simple contrastive machine learning model
-    # this model should learn to distinguish between human and agent behavior
-    # using a weakly supervised approach and contrastive learning + augmentation
-    #
+    # a simple contrastive machine learning model learns to distinguish human/agent behavior
+    # using weakly supervised contrastive learning + augmentation
     def __init__(self, **kwargs):
         super().__init__()
         self.model = None
         self.kwargs = kwargs
+
+
+class TrajectoryEncoder(nn.Module):
+    """Encode variable-length event sequences to fixed-dim embedding via bidirectional LSTM"""
+    def __init__(self, input_dim: int, embed_dim: int = 32, hidden_dim: int = 64):
+        super().__init__()
+        self.event_embed = nn.Linear(input_dim, hidden_dim)
+        self.lstm = nn.LSTM(hidden_dim, hidden_dim, batch_first=True, bidirectional=True)
+        self.proj = nn.Linear(hidden_dim * 2, embed_dim)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:  # x: (batch, seq_len, input_dim)
+        h = F.relu(self.event_embed(x))
+        _, (hn, _) = self.lstm(h)
+        hn = torch.cat([hn[-2], hn[-1]], dim=1)  # concat bidirectional hidden states
+        return F.normalize(self.proj(hn), dim=1)  # L2 normalized
+
+
+class ContrastiveWeakClassifier(WeakClassifier):
+    """Contrastive learning classifier for human/agent trajectory discrimination"""
+    def __init__(self, input_dim: int = 64, embed_dim: int = 32, margin: float = 1.0, **kwargs):
+        super().__init__(**kwargs)
+        self.input_dim = input_dim
+        self.embed_dim = embed_dim
+        self.margin = margin
+        self.encoder = TrajectoryEncoder(input_dim, embed_dim)
+        self.classifier = nn.Linear(embed_dim, 2)
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self._fitted = False
+
+    def to_device(self):
+        self.encoder.to(self.device)
+        self.classifier.to(self.device)
+        return self
+
+    def encode(self, x: torch.Tensor) -> torch.Tensor:
+        return self.encoder(x.to(self.device))
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        emb = self.encode(x)
+        return self.classifier(emb)
+
+    def fit(self, X, y=None):  # sklearn interface - actual training in weak.train.py
+        self._fitted = True
+        return self
+
+    def predict(self, X: np.ndarray) -> np.ndarray:
+        self.encoder.eval()
+        self.classifier.eval()
+        with torch.no_grad():
+            x = torch.tensor(X, dtype=torch.float32).unsqueeze(1).to(self.device)
+            logits = self.forward(x)
+            return torch.argmax(logits, dim=1).cpu().numpy()
+
+    def predict_proba(self, X: np.ndarray) -> np.ndarray:
+        self.encoder.eval()
+        self.classifier.eval()
+        with torch.no_grad():
+            x = torch.tensor(X, dtype=torch.float32).unsqueeze(1).to(self.device)
+            logits = self.forward(x)
+            return F.softmax(logits, dim=1).cpu().numpy()
+
+
+def contrastive_loss(anchor: torch.Tensor, positive: torch.Tensor, negative: torch.Tensor, margin: float = 0.3) -> torch.Tensor:
+    """Triplet loss using cosine similarity (for L2-normalized embeddings). margin in [0,1] range."""
+    pos_sim = F.cosine_similarity(anchor, positive)  # higher = more similar
+    neg_sim = F.cosine_similarity(anchor, negative)
+    return F.relu(neg_sim - pos_sim + margin).mean()  # want pos_sim > neg_sim + margin
+
+
+def nt_xent_loss(z_i: torch.Tensor, z_j: torch.Tensor, temperature: float = 0.5) -> torch.Tensor:
+    """Normalized temperature-scaled cross entropy loss (SimCLR style)"""
+    batch_size = z_i.size(0)
+    z = torch.cat([z_i, z_j], dim=0)  # (2N, embed_dim)
+    sim = F.cosine_similarity(z.unsqueeze(1), z.unsqueeze(0), dim=2) / temperature
+    mask = torch.eye(2 * batch_size, dtype=torch.bool, device=z.device)
+    sim.masked_fill_(mask, -float('inf'))
+    labels = torch.arange(batch_size, device=z.device)
+    labels = torch.cat([labels + batch_size, labels])  # positive pairs
+    return F.cross_entropy(sim, labels)
+
+
+# feature extraction utilities for trajectory -> feature vector
+def transition_histogram(events: List, state_fn, max_states: int = 50) -> np.ndarray:
+    """Compute normalized histogram of state transitions in trajectory"""
+    if len(events) < 2:
+        return np.zeros(max_states)
+    states = [state_fn(e) for e in events]
+    trans_counts = defaultdict(int)
+    for s, s_next in zip(states, states[1:]):
+        trans_counts[(s, s_next)] += 1
+    total = sum(trans_counts.values())
+    hist = np.array(list(trans_counts.values())[:max_states], dtype=np.float32)
+    hist = np.pad(hist, (0, max(0, max_states - len(hist))))
+    return hist / (total + 1e-10)
+
+
+def temporal_signature(events: List, ts_fn) -> np.ndarray:
+    """Extract temporal features: mean/std/skew of inter-event times"""
+    if len(events) < 2:
+        return np.zeros(4, dtype=np.float32)
+    times = sorted([ts_fn(e) for e in events])
+    diffs = np.diff(times).astype(np.float32)
+    if len(diffs) == 0:
+        return np.zeros(4, dtype=np.float32)
+    mean_dt, std_dt = np.mean(diffs), np.std(diffs) + 1e-10
+    skew = np.mean(((diffs - mean_dt) / std_dt) ** 3) if std_dt > 1e-8 else 0.0
+    return np.array([mean_dt, std_dt, skew, len(diffs)], dtype=np.float32)
+
+
+def state_coverage(events: List, state_fn, mdp_states: set) -> float:
+    """Fraction of MDP states visited by trajectory"""
+    if not mdp_states:
+        return 0.0
+    visited = set(state_fn(e) for e in events)
+    return len(visited & mdp_states) / len(mdp_states)
+
+
+def transition_entropy(events: List, state_fn) -> float:
+    """Compute entropy of transition distribution (randomness of navigation)"""
+    if len(events) < 2:
+        return 0.0
+    states = [state_fn(e) for e in events]
+    trans_counts = defaultdict(int)
+    for s, s_next in zip(states, states[1:]):
+        trans_counts[(s, s_next)] += 1
+    total = sum(trans_counts.values())
+    probs = [c / total for c in trans_counts.values()]
+    return -sum(p * np.log(p + 1e-10) for p in probs)
+
+
+def featurize_trajectory(events: List, mdp: Optional[Dict] = None, input_dim: int = 64) -> np.ndarray:
+    """Convert trajectory to fixed-dim feature vector"""
+    def _state_repr(e):
+        return f"{getattr(e, 'page', None) or 'unk'}|{getattr(e, 'productId', None) or 'none'}|{e.eventName}"
+
+    def _ts_fn(e):
+        ts = getattr(e, 'ts', None)
+        if isinstance(ts, str):
+            from datetime import datetime
+            try:
+                return datetime.fromisoformat(ts.replace('Z', '+00:00')).timestamp()
+            except:
+                return 0.0
+        return float(ts) if ts else 0.0
+
+    feats = []
+    feats.extend(transition_histogram(events, _state_repr, max_states=40))  # 40 dims
+    feats.extend(temporal_signature(events, _ts_fn))  # 4 dims
+    mdp_states = set(mdp.get('states', [])) if mdp else set()
+    feats.append(state_coverage(events, _state_repr, mdp_states))  # 1 dim
+    feats.append(transition_entropy(events, _state_repr))  # 1 dim
+    feats.append(len(events))  # trajectory length
+    feats.append(len(set(_state_repr(e) for e in events)))  # unique states
+
+    # event type distribution (page_view, hover, cart, purchase indicators)
+    event_names = [e.eventName for e in events]
+    feats.append(sum(1 for n in event_names if 'page' in n.lower()) / (len(events) + 1))
+    feats.append(sum(1 for n in event_names if 'hover' in n.lower()) / (len(events) + 1))
+    feats.append(sum(1 for n in event_names if 'cart' in n.lower()) / (len(events) + 1))
+    feats.append(sum(1 for n in event_names if 'purchase' in n.lower() or 'checkout' in n.lower()) / (len(events) + 1))
+
+    # pad/truncate to input_dim
+    feats = np.array(feats[:input_dim], dtype=np.float32)
+    if len(feats) < input_dim:
+        feats = np.pad(feats, (0, input_dim - len(feats)))
+    return feats
+
+
+# gradient boosting classifiers for comparison baselines
+class XGBoostAgentClassifier(BaseEstimator, ClassifierMixin):
+    """XGBoost classifier for human/agent detection from session features"""
+    def __init__(self, n_estimators: int = 100, max_depth: int = 6, learning_rate: float = 0.1, **kwargs):
+        self.n_estimators = n_estimators
+        self.max_depth = max_depth
+        self.learning_rate = learning_rate
+        self.model = None
+        self.kwargs = kwargs
+
+    def fit(self, X: np.ndarray, y: np.ndarray):
+        try:
+            import xgboost as xgb
+            self.model = xgb.XGBClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth,
+                                           learning_rate=self.learning_rate, **self.kwargs)
+            self.model.fit(X, y)
+        except ImportError:
+            raise ImportError("xgboost required for XGBoostAgentClassifier")
+        return self
+
+    def predict(self, X: np.ndarray) -> np.ndarray:
+        if self.model is None:
+            raise ValueError("fit the model first")
+        return self.model.predict(X)
+
+    def predict_proba(self, X: np.ndarray) -> np.ndarray:
+        if self.model is None:
+            raise ValueError("fit the model first")
+        return self.model.predict_proba(X)
+
+
+class LightGBMAgentClassifier(BaseEstimator, ClassifierMixin):
+    """LightGBM classifier for human/agent detection from session features"""
+    def __init__(self, n_estimators: int = 100, max_depth: int = -1, learning_rate: float = 0.1, **kwargs):
+        self.n_estimators = n_estimators
+        self.max_depth = max_depth
+        self.learning_rate = learning_rate
+        self.model = None
+        self.kwargs = kwargs
+
+    def fit(self, X: np.ndarray, y: np.ndarray):
+        try:
+            import lightgbm as lgb
+            self.model = lgb.LGBMClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth,
+                                            learning_rate=self.learning_rate, verbose=-1, **self.kwargs)
+            self.model.fit(X, y)
+        except ImportError:
+            raise ImportError("lightgbm required for LightGBMAgentClassifier")
+        return self
+
+    def predict(self, X: np.ndarray) -> np.ndarray:
+        if self.model is None:
+            raise ValueError("fit the model first")
+        return self.model.predict(X)
+
+    def predict_proba(self, X: np.ndarray) -> np.ndarray:
+        if self.model is None:
+            raise ValueError("fit the model first")
+        return self.model.predict_proba(X)

From 22a2c255bd23f8717275fe99a34a65253deed3c8 Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Wed, 21 Jan 2026 19:11:54 +0100
Subject: [PATCH 32/35] chore: remove boilerplate

---
 web/src/app/page.tsx | 64 ++------------------------------------------
 1 file changed, 2 insertions(+), 62 deletions(-)

diff --git a/web/src/app/page.tsx b/web/src/app/page.tsx
index 295f8fd..c97c8ed 100644
--- a/web/src/app/page.tsx
+++ b/web/src/app/page.tsx
@@ -1,65 +1,5 @@
-import Image from "next/image";
+import { redirect } from 'next/navigation';
 
 export default function Home() {
-  return (
-    <div className="flex min-h-screen items-center justify-center bg-zinc-50 font-sans dark:bg-black">
-      <main className="flex min-h-screen w-full max-w-3xl flex-col items-center justify-between py-32 px-16 bg-white dark:bg-black sm:items-start">
-        <Image
-          className="dark:invert"
-          src="/next.svg"
-          alt="Next.js logo"
-          width={100}
-          height={20}
-          priority
-        />
-        <div className="flex flex-col items-center gap-6 text-center sm:items-start sm:text-left">
-          <h1 className="max-w-xs text-3xl font-semibold leading-10 tracking-tight text-black dark:text-zinc-50">
-            To get started, edit the page.tsx file.
-          </h1>
-          <p className="max-w-md text-lg leading-8 text-zinc-600 dark:text-zinc-400">
-            Looking for a starting point or more instructions? Head over to{" "}
-            <a
-              href="https://vercel.com/templates?framework=next.js&utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
-              className="font-medium text-zinc-950 dark:text-zinc-50"
-            >
-              Templates
-            </a>{" "}
-            or the{" "}
-            <a
-              href="https://nextjs.org/learn?utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
-              className="font-medium text-zinc-950 dark:text-zinc-50"
-            >
-              Learning
-            </a>{" "}
-            center.
-          </p>
-        </div>
-        <div className="flex flex-col gap-4 text-base font-medium sm:flex-row">
-          <a
-            className="flex h-12 w-full items-center justify-center gap-2 rounded-full bg-foreground px-5 text-background transition-colors hover:bg-[#383838] dark:hover:bg-[#ccc] md:w-[158px]"
-            href="https://vercel.com/new?utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
-            target="_blank"
-            rel="noopener noreferrer"
-          >
-            <Image
-              className="dark:invert"
-              src="/vercel.svg"
-              alt="Vercel logomark"
-              width={16}
-              height={16}
-            />
-            Deploy Now
-          </a>
-          <a
-            className="flex h-12 w-full items-center justify-center rounded-full border border-solid border-black/[.08] px-5 transition-colors hover:border-transparent hover:bg-black/[.04] dark:border-white/[.145] dark:hover:bg-[#1a1a1a] md:w-[158px]"
-            href="https://nextjs.org/docs?utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
-            target="_blank"
-            rel="noopener noreferrer"
-          >
-            Documentation
-          </a>
-        </div>
-      </main>
-    </div>
-  );
+  redirect('/hotel');
 }

From ee70f02a1f2feae8c52f02aeb8d61837a0ad1787 Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Wed, 21 Jan 2026 19:12:11 +0100
Subject: [PATCH 33/35] chore: export repeated methods into lib

---
 lib/__init__.py     |  41 +++++++++++++++
 lib/config.py       |  65 +++++++++++++++++++++++
 lib/features.py     | 125 ++++++++++++++++++++++++++++++++++++++++++++
 lib/kafka_client.py |  54 +++++++++++++++++++
 lib/state.py        |  72 +++++++++++++++++++++++++
 5 files changed, 357 insertions(+)
 create mode 100644 lib/__init__.py
 create mode 100644 lib/config.py
 create mode 100644 lib/features.py
 create mode 100755 lib/kafka_client.py
 create mode 100644 lib/state.py

diff --git a/lib/__init__.py b/lib/__init__.py
new file mode 100644
index 0000000..7f8ec2d
--- /dev/null
+++ b/lib/__init__.py
@@ -0,0 +1,41 @@
+"""PHANTOM shared library
+Exports unified utilities for features, state, config, kafka, and model registry
+"""
+from .config import (
+    PROJECT_ROOT, DATA_DIR, EXPERIMENTS_DIR,
+    AGENT_DATA_DIR, HUMAN_DATA_DIR, SIM_RUNS_DIR, MODEL_REGISTRY_DIR,
+    COLLECTED_DATA_DIR, NOTEBOOK_OUTPUT_DIR,
+    ensure_dir, get_data_path, get_experiments_path, get_sim_path,
+    KAFKA_HOST, KAFKA_PORT, KAFKA_BROKER,
+    REDIS_HOST, REDIS_PORT,
+    SUPABASE_URL, SUPABASE_ANON_KEY,
+    BACKEND_PORT, PROVIDER_PORT
+)
+from .state import (
+    make_state_repr, event_to_state, parse_state,
+    get_event_name, get_timestamp,
+    create_state_fn, create_event_name_fn, create_timestamp_fn
+)
+from .features import (
+    transition_histogram, temporal_signature, state_coverage, transition_entropy,
+    event_type_distribution, featurize_trajectory, parse_timestamp
+)
+
+__all__ = [
+    # config
+    'PROJECT_ROOT', 'DATA_DIR', 'EXPERIMENTS_DIR',
+    'AGENT_DATA_DIR', 'HUMAN_DATA_DIR', 'SIM_RUNS_DIR', 'MODEL_REGISTRY_DIR',
+    'COLLECTED_DATA_DIR', 'NOTEBOOK_OUTPUT_DIR',
+    'ensure_dir', 'get_data_path', 'get_experiments_path', 'get_sim_path',
+    'KAFKA_HOST', 'KAFKA_PORT', 'KAFKA_BROKER',
+    'REDIS_HOST', 'REDIS_PORT',
+    'SUPABASE_URL', 'SUPABASE_ANON_KEY',
+    'BACKEND_PORT', 'PROVIDER_PORT',
+    # state
+    'make_state_repr', 'event_to_state', 'parse_state',
+    'get_event_name', 'get_timestamp',
+    'create_state_fn', 'create_event_name_fn', 'create_timestamp_fn',
+    # features
+    'transition_histogram', 'temporal_signature', 'state_coverage', 'transition_entropy',
+    'event_type_distribution', 'featurize_trajectory', 'parse_timestamp',
+]
diff --git a/lib/config.py b/lib/config.py
new file mode 100644
index 0000000..a27ffd9
--- /dev/null
+++ b/lib/config.py
@@ -0,0 +1,65 @@
+"""Unified path configuration for PHANTOM project
+All hardcoded paths should reference this module
+Paths can be overridden via environment variables
+"""
+import os
+from pathlib import Path
+
+# project root (directory containing lib/, experiments/, sim/, web/, backend/)
+PROJECT_ROOT = Path(__file__).parent.parent.resolve()
+
+# data directories
+DATA_DIR = Path(os.getenv('PHANTOM_DATA_DIR', PROJECT_ROOT / 'data'))
+EXPERIMENTS_DIR = Path(os.getenv('PHANTOM_EXPERIMENTS_DIR', PROJECT_ROOT / 'experiments'))
+
+# agent/human interaction data
+AGENT_DATA_DIR = Path(os.getenv('PHANTOM_AGENT_DATA_DIR', DATA_DIR / 'agents'))
+HUMAN_DATA_DIR = Path(os.getenv('PHANTOM_HUMAN_DATA_DIR', DATA_DIR / 'humans'))
+
+# RL simulation runs
+SIM_RUNS_DIR = Path(os.getenv('PHANTOM_SIM_RUNS_DIR', PROJECT_ROOT / 'sim' / 'rl' / 'runs'))
+
+# model artifacts
+MODEL_REGISTRY_DIR = Path(os.getenv('PHANTOM_MODEL_REGISTRY_DIR', DATA_DIR / 'models'))
+
+# collected experiment data
+COLLECTED_DATA_DIR = Path(os.getenv('PHANTOM_COLLECTED_DATA_DIR', EXPERIMENTS_DIR / 'agents' / 'collected_data'))
+
+# notebook outputs
+NOTEBOOK_OUTPUT_DIR = Path(os.getenv('PHANTOM_NOTEBOOK_OUTPUT_DIR', EXPERIMENTS_DIR / 'notebooks' / 'outputs'))
+
+
+def ensure_dir(path: Path) -> Path:
+    """ensure directory exists, create if needed"""
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+
+
+def get_data_path(*parts: str) -> Path:
+    """construct path relative to DATA_DIR"""
+    return DATA_DIR.joinpath(*parts)
+
+
+def get_experiments_path(*parts: str) -> Path:
+    """construct path relative to EXPERIMENTS_DIR"""
+    return EXPERIMENTS_DIR.joinpath(*parts)
+
+
+def get_sim_path(*parts: str) -> Path:
+    """construct path relative to SIM_RUNS_DIR"""
+    return SIM_RUNS_DIR.joinpath(*parts)
+
+
+# service configuration (from .env)
+KAFKA_HOST = os.getenv('KAFKA_HOST', 'localhost')
+KAFKA_PORT = os.getenv('KAFKA_PORT', '9092')
+KAFKA_BROKER = f"{KAFKA_HOST}:{KAFKA_PORT}"
+
+REDIS_HOST = os.getenv('REDIS_HOST', 'localhost')
+REDIS_PORT = int(os.getenv('REDIS_PORT', '6379'))
+
+SUPABASE_URL = os.getenv('NEXT_PUBLIC_SUPABASE_URL', '')
+SUPABASE_ANON_KEY = os.getenv('NEXT_PUBLIC_SUPABASE_ANON_KEY', '')
+
+BACKEND_PORT = int(os.getenv('BACKEND_PORT', '5000'))
+PROVIDER_PORT = int(os.getenv('PROVIDER_PORT', '5001'))
diff --git a/lib/features.py b/lib/features.py
new file mode 100644
index 0000000..f2d88f5
--- /dev/null
+++ b/lib/features.py
@@ -0,0 +1,125 @@
+"""Unified featurization utilities for trajectory -> feature vector conversion
+Used by both experiments/ml/ and sim/rl/ components
+"""
+import numpy as np
+from collections import defaultdict
+from typing import List, Dict, Callable, Optional, Any, Set
+from datetime import datetime
+
+
+def transition_histogram(events: List, state_fn: Callable, max_states: int = 50) -> np.ndarray:
+    """compute normalized histogram of state transitions in trajectory
+    events: list of event objects/dicts
+    state_fn: function mapping event -> state string
+    max_states: maximum dimensions for histogram
+    """
+    if len(events) < 2:
+        return np.zeros(max_states, dtype=np.float32)
+    states = [state_fn(e) for e in events]
+    trans_counts = defaultdict(int)
+    for s, s_next in zip(states, states[1:]):
+        trans_counts[(s, s_next)] += 1
+    total = sum(trans_counts.values())
+    hist = np.array(list(trans_counts.values())[:max_states], dtype=np.float32)
+    hist = np.pad(hist, (0, max(0, max_states - len(hist))))
+    return hist / (total + 1e-10)
+
+
+def temporal_signature(events: List, ts_fn: Callable) -> np.ndarray:
+    """extract temporal features: mean/std/skew of inter-event times plus count
+    events: list of event objects/dicts
+    ts_fn: function mapping event -> timestamp (float seconds)
+    returns: [mean_dt, std_dt, skew, n_intervals] array
+    """
+    if len(events) < 2:
+        return np.zeros(4, dtype=np.float32)
+    times = sorted([ts_fn(e) for e in events])
+    diffs = np.diff(times).astype(np.float32)
+    if len(diffs) == 0:
+        return np.zeros(4, dtype=np.float32)
+    mean_dt, std_dt = np.mean(diffs), np.std(diffs) + 1e-10
+    skew = np.mean(((diffs - mean_dt) / std_dt) ** 3) if std_dt > 1e-8 else 0.0
+    return np.array([mean_dt, std_dt, skew, len(diffs)], dtype=np.float32)
+
+
+def state_coverage(events: List, state_fn: Callable, mdp_states: Set[str]) -> float:
+    """fraction of MDP states visited by trajectory
+    events: list of event objects/dicts
+    state_fn: function mapping event -> state string
+    mdp_states: set of all possible MDP states
+    """
+    if not mdp_states:
+        return 0.0
+    visited = set(state_fn(e) for e in events)
+    return len(visited & mdp_states) / len(mdp_states)
+
+
+def transition_entropy(events: List, state_fn: Callable) -> float:
+    """compute entropy of transition distribution (randomness of navigation)
+    higher entropy = more random browsing pattern
+    """
+    if len(events) < 2:
+        return 0.0
+    states = [state_fn(e) for e in events]
+    trans_counts = defaultdict(int)
+    for s, s_next in zip(states, states[1:]):
+        trans_counts[(s, s_next)] += 1
+    total = sum(trans_counts.values())
+    probs = [c / total for c in trans_counts.values()]
+    return -sum(p * np.log(p + 1e-10) for p in probs)
+
+
+def event_type_distribution(events: List, event_name_fn: Callable) -> np.ndarray:
+    """compute proportions of different event type categories
+    returns: [page_view_ratio, hover_ratio, cart_ratio, purchase_ratio]
+    """
+    if not events:
+        return np.zeros(4, dtype=np.float32)
+    n = len(events)
+    names = [event_name_fn(e).lower() for e in events]
+    return np.array([
+        sum(1 for nm in names if 'page' in nm or 'view' in nm) / n,
+        sum(1 for nm in names if 'hover' in nm) / n,
+        sum(1 for nm in names if 'cart' in nm) / n,
+        sum(1 for nm in names if 'purchase' in nm or 'checkout' in nm) / n
+    ], dtype=np.float32)
+
+
+def featurize_trajectory(events: List, state_fn: Callable, ts_fn: Callable,
+                         event_name_fn: Callable, mdp_states: Optional[Set[str]] = None,
+                         output_dim: int = 64) -> np.ndarray:
+    """convert trajectory to fixed-dimension feature vector
+    events: list of event objects/dicts
+    state_fn: function mapping event -> state string
+    ts_fn: function mapping event -> timestamp (float)
+    event_name_fn: function mapping event -> event name string
+    mdp_states: optional set of all MDP states for coverage calculation
+    output_dim: desired output dimension (will pad/truncate)
+    """
+    feats = []
+    feats.extend(transition_histogram(events, state_fn, max_states=40))  # 40 dims
+    feats.extend(temporal_signature(events, ts_fn))  # 4 dims
+    feats.append(state_coverage(events, state_fn, mdp_states or set()))  # 1 dim
+    feats.append(transition_entropy(events, state_fn))  # 1 dim
+    feats.append(float(len(events)))  # trajectory length
+    feats.append(float(len(set(state_fn(e) for e in events))))  # unique states
+    feats.extend(event_type_distribution(events, event_name_fn))  # 4 dims
+
+    feats = np.array(feats[:output_dim], dtype=np.float32)
+    if len(feats) < output_dim:
+        feats = np.pad(feats, (0, output_dim - len(feats)))
+    return feats
+
+
+def parse_timestamp(ts: Any) -> float:
+    """parse various timestamp formats to float seconds"""
+    if ts is None:
+        return 0.0
+    if isinstance(ts, (int, float)):
+        return float(ts)
+    if isinstance(ts, str):
+        try:
+            return datetime.fromisoformat(ts.replace('Z', '+00:00')).timestamp()
+        except ValueError:
+            return 0.0
+    return 0.0
diff --git a/lib/kafka_client.py b/lib/kafka_client.py
new file mode 100755
index 0000000..d61cd9e
--- /dev/null
+++ b/lib/kafka_client.py
@@ -0,0 +1,54 @@
+from kafka import KafkaConsumer
+import json
+import os
+from dotenv import load_dotenv
+load_dotenv()
+
+def get_interactions(
+    topic='user-interactions',
+    bootstrap_servers=None,
+    from_beginning=True,
+    max_records=None,
+    timeout_ms=5000
+):
+    """Consume interaction events from Kafka.
+
+    Args:
+        topic: Kafka topic name
+        bootstrap_servers: Kafka broker address (default from env)
+        from_beginning: Start from earliest offset if True
+        max_records: Max number of records to fetch (None = all available)
+        timeout_ms: Consumer poll timeout
+
+    Returns:
+        List of parsed interaction event dicts
+    """
+    if not bootstrap_servers:
+        host = os.getenv('KAFKA_HOST', 'localhost')
+        port = os.getenv('KAFKA_PORT', '9092')
+        bootstrap_servers = f'{host}:{port}'
+
+    consumer = KafkaConsumer(
+        topic,
+        bootstrap_servers=bootstrap_servers,
+        auto_offset_reset='earliest' if from_beginning else 'latest',
+        enable_auto_commit=False,
+        value_deserializer=lambda m: json.loads(m.decode('utf-8')),
+        consumer_timeout_ms=timeout_ms
+    )
+
+    events = []
+    try:
+        for msg in consumer:
+            events.append(msg.value)
+            if max_records and len(events) >= max_records:
+                break
+    finally:
+        consumer.close()
+
+    return events
+
+if __name__ == '__main__':
+    interactions = get_interactions(max_records=10)
+    for event in interactions:
+        print(event)
diff --git a/lib/state.py b/lib/state.py
new file mode 100644
index 0000000..cfb4251
--- /dev/null
+++ b/lib/state.py
@@ -0,0 +1,72 @@
+"""Unified state representation utilities for MDP state encoding
+Used by both experiments/ and sim/ components for consistent state handling
+"""
+from typing import Any, Callable
+
+
+def make_state_repr(page: str = None, product_id: str = None, event_name: str = None) -> str:
+    """create canonical state representation string from components
+    format: page|productId|eventName
+    """
+    p = page or 'unk'
+    pid = product_id or 'none'
+    en = event_name or 'unknown'
+    return f"{p}|{pid}|{en}"
+
+
+def event_to_state(evt: Any) -> str:
+    """convert event object/dict to state string
+    supports both object attributes and dict keys
+    """
+    if isinstance(evt, dict):
+        return make_state_repr(
+            page=evt.get('page'),
+            product_id=evt.get('productId'),
+            event_name=evt.get('eventName') or evt.get('event_type')
+        )
+    return make_state_repr(
+        page=getattr(evt, 'page', None),
+        product_id=getattr(evt, 'productId', None),
+        event_name=getattr(evt, 'eventName', None) or getattr(evt, 'event_type', None)
+    )
+
+
+def parse_state(state_str: str) -> dict:
+    """parse state string back to components
+    returns: {'page': str, 'productId': str, 'eventName': str}
+    """
+    parts = state_str.split('|')
+    return {
+        'page': parts[0] if len(parts) > 0 and parts[0] != 'unk' else None,
+        'productId': parts[1] if len(parts) > 1 and parts[1] != 'none' else None,
+        'eventName': parts[2] if len(parts) > 2 and parts[2] != 'unknown' else None
+    }
+
+
+def get_event_name(evt: Any) -> str:
+    """extract event name from event object/dict"""
+    if isinstance(evt, dict):
+        return evt.get('eventName') or evt.get('event_type') or ''
+    return getattr(evt, 'eventName', None) or getattr(evt, 'event_type', None) or ''
+
+
+def get_timestamp(evt: Any) -> Any:
+    """extract timestamp from event object/dict"""
+    if isinstance(evt, dict):
+        return evt.get('ts') or evt.get('timestamp')
+    return getattr(evt, 'ts', None) or getattr(evt, 'timestamp', None)
+
+
+def create_state_fn() -> Callable:
+    """factory for state representation function"""
+    return event_to_state
+
+
+def create_event_name_fn() -> Callable:
+    """factory for event name extraction function"""
+    return get_event_name
+
+
+def create_timestamp_fn() -> Callable:
+    """factory for timestamp extraction function (returns raw value, use features.parse_timestamp to convert)"""
+    return get_timestamp

From 0f5f8affab007789dcfad9aea52cf4f2791b41f1 Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Wed, 21 Jan 2026 19:12:35 +0100
Subject: [PATCH 34/35] chore: make lib backwards compatible

---
 experiments/ml/arch.py           |  91 +++++-----------
 sim/rl/behavior_loader/models.py |  12 +++
 sim/rl/environment.py            | 175 +++++++++++++++----------------
 3 files changed, 126 insertions(+), 152 deletions(-)

diff --git a/experiments/ml/arch.py b/experiments/ml/arch.py
index 4ceb2e0..1fa4f96 100644
--- a/experiments/ml/arch.py
+++ b/experiments/ml/arch.py
@@ -8,6 +8,20 @@ import pandas as pd
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+import sys
+from pathlib import Path
+
+# add lib to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'lib'))
+from lib.features import (
+    transition_histogram as _lib_transition_histogram,
+    temporal_signature as _lib_temporal_signature,
+    state_coverage as _lib_state_coverage,
+    transition_entropy as _lib_transition_entropy,
+    featurize_trajectory as _lib_featurize_trajectory,
+    parse_timestamp
+)
+from lib.state import event_to_state, get_event_name, get_timestamp
 
 TASK = 'classification'
 LABELS = ['human', 'agent']
@@ -101,91 +115,40 @@ def nt_xent_loss(z_i: torch.Tensor, z_j: torch.Tensor, temperature: float = 0.5)
     return F.cross_entropy(sim, labels)
 
 
-# feature extraction utilities for trajectory -> feature vector
+# feature extraction utilities - delegating to lib.features for unified implementation
+# these wrappers maintain backwards compatibility for existing imports
+
 def transition_histogram(events: List, state_fn, max_states: int = 50) -> np.ndarray:
     """Compute normalized histogram of state transitions in trajectory"""
-    if len(events) < 2:
-        return np.zeros(max_states)
-    states = [state_fn(e) for e in events]
-    trans_counts = defaultdict(int)
-    for s, s_next in zip(states, states[1:]):
-        trans_counts[(s, s_next)] += 1
-    total = sum(trans_counts.values())
-    hist = np.array(list(trans_counts.values())[:max_states], dtype=np.float32)
-    hist = np.pad(hist, (0, max(0, max_states - len(hist))))
-    return hist / (total + 1e-10)
+    return _lib_transition_histogram(events, state_fn, max_states)
 
 
 def temporal_signature(events: List, ts_fn) -> np.ndarray:
     """Extract temporal features: mean/std/skew of inter-event times"""
-    if len(events) < 2:
-        return np.zeros(4, dtype=np.float32)
-    times = sorted([ts_fn(e) for e in events])
-    diffs = np.diff(times).astype(np.float32)
-    if len(diffs) == 0:
-        return np.zeros(4, dtype=np.float32)
-    mean_dt, std_dt = np.mean(diffs), np.std(diffs) + 1e-10
-    skew = np.mean(((diffs - mean_dt) / std_dt) ** 3) if std_dt > 1e-8 else 0.0
-    return np.array([mean_dt, std_dt, skew, len(diffs)], dtype=np.float32)
+    return _lib_temporal_signature(events, ts_fn)
 
 
 def state_coverage(events: List, state_fn, mdp_states: set) -> float:
     """Fraction of MDP states visited by trajectory"""
-    if not mdp_states:
-        return 0.0
-    visited = set(state_fn(e) for e in events)
-    return len(visited & mdp_states) / len(mdp_states)
+    return _lib_state_coverage(events, state_fn, mdp_states)
 
 
 def transition_entropy(events: List, state_fn) -> float:
     """Compute entropy of transition distribution (randomness of navigation)"""
-    if len(events) < 2:
-        return 0.0
-    states = [state_fn(e) for e in events]
-    trans_counts = defaultdict(int)
-    for s, s_next in zip(states, states[1:]):
-        trans_counts[(s, s_next)] += 1
-    total = sum(trans_counts.values())
-    probs = [c / total for c in trans_counts.values()]
-    return -sum(p * np.log(p + 1e-10) for p in probs)
+    return _lib_transition_entropy(events, state_fn)
 
 
 def featurize_trajectory(events: List, mdp: Optional[Dict] = None, input_dim: int = 64) -> np.ndarray:
-    """Convert trajectory to fixed-dim feature vector"""
-    def _state_repr(e):
-        return f"{getattr(e, 'page', None) or 'unk'}|{getattr(e, 'productId', None) or 'none'}|{e.eventName}"
+    """Convert trajectory to fixed-dim feature vector - uses lib.features implementation"""
+    mdp_states = set(mdp.get('states', [])) if mdp else set()
 
     def _ts_fn(e):
-        ts = getattr(e, 'ts', None)
-        if isinstance(ts, str):
-            from datetime import datetime
-            try:
-                return datetime.fromisoformat(ts.replace('Z', '+00:00')).timestamp()
-            except:
-                return 0.0
-        return float(ts) if ts else 0.0
+        return parse_timestamp(get_timestamp(e))
 
-    feats = []
-    feats.extend(transition_histogram(events, _state_repr, max_states=40))  # 40 dims
-    feats.extend(temporal_signature(events, _ts_fn))  # 4 dims
-    mdp_states = set(mdp.get('states', [])) if mdp else set()
-    feats.append(state_coverage(events, _state_repr, mdp_states))  # 1 dim
-    feats.append(transition_entropy(events, _state_repr))  # 1 dim
-    feats.append(len(events))  # trajectory length
-    feats.append(len(set(_state_repr(e) for e in events)))  # unique states
+    def _event_name_fn(e):
+        return get_event_name(e)
 
-    # event type distribution (page_view, hover, cart, purchase indicators)
-    event_names = [e.eventName for e in events]
-    feats.append(sum(1 for n in event_names if 'page' in n.lower()) / (len(events) + 1))
-    feats.append(sum(1 for n in event_names if 'hover' in n.lower()) / (len(events) + 1))
-    feats.append(sum(1 for n in event_names if 'cart' in n.lower()) / (len(events) + 1))
-    feats.append(sum(1 for n in event_names if 'purchase' in n.lower() or 'checkout' in n.lower()) / (len(events) + 1))
-
-    # pad/truncate to input_dim
-    feats = np.array(feats[:input_dim], dtype=np.float32)
-    if len(feats) < input_dim:
-        feats = np.pad(feats, (0, input_dim - len(feats)))
-    return feats
+    return _lib_featurize_trajectory(events, event_to_state, _ts_fn, _event_name_fn, mdp_states, input_dim)
 
 
 # gradient boosting classifiers for comparison baselines
diff --git a/sim/rl/behavior_loader/models.py b/sim/rl/behavior_loader/models.py
index 4c6bf21..3530724 100644
--- a/sim/rl/behavior_loader/models.py
+++ b/sim/rl/behavior_loader/models.py
@@ -6,6 +6,18 @@ from collections import defaultdict
 from typing import Dict, List, Tuple, Set
 import numpy as np
 import graphviz
+import sys
+from pathlib import Path
+
+# import lib utilities for optional use - models keep their own _state_repr for backwards compat
+# with the specific event structure (evt.value.payload)
+sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / 'lib'))
+try:
+    from lib.state import make_state_repr as lib_make_state_repr
+    from lib.features import transition_histogram as lib_transition_histogram
+except ImportError:
+    lib_make_state_repr = None
+    lib_transition_histogram = None
 
 class BehaviorModel:
     def __init__(self, src_dir: str, loader_cls=Loader):
diff --git a/sim/rl/environment.py b/sim/rl/environment.py
index fd725f8..d9ccbcb 100644
--- a/sim/rl/environment.py
+++ b/sim/rl/environment.py
@@ -1,7 +1,5 @@
-from sys import intern
 import gymnasium as gym
 from gymnasium import spaces
-from matplotlib import interactive
 import numpy as np
 from dataclasses import dataclass
 import pandas as pd
@@ -15,7 +13,7 @@ class BusinessLogicConstraints():
     max_price_adjustment: float = 0.30
     system_max_price: float = 500.0
     system_min_price: float = 1.0
-    product_catelogue_size: int = 100
+    product_catalogue_size: int = 100
     episode_length: int = 200
     sessions_per_step: int = 250
     agent_share: float = 0.25
@@ -37,17 +35,42 @@ class BusinessLogicConstraints():
 def _sigmoid(x: np.ndarray) -> np.ndarray:
     return 1.0 / (1.0 + np.exp(-x))
 
+class BehavioralProfile:
+    """simple markov chain model for generating synthetic interaction events"""
+    def __init__(self, actor: str, purchase_probs: np.ndarray):
+        self.actor = actor
+        self.purchase_probs = purchase_probs
+        self.states = ['view', 'cart', 'checkout']
+        # transition matrix: view->cart 0.3, view->view 0.6, view->exit 0.1, cart->checkout 0.5, cart->view 0.4, cart->exit 0.1
+        self.trans = {'view': {'view': 0.6, 'cart': 0.3, 'exit': 0.1}, 'cart': {'checkout': 0.5, 'view': 0.4, 'exit': 0.1}, 'checkout': {'exit': 1.0}}
+        if actor == 'agents':  # agents browse more before purchasing
+            self.trans['view'] = {'view': 0.75, 'cart': 0.15, 'exit': 0.1}
+            self.trans['cart'] = {'checkout': 0.3, 'view': 0.6, 'exit': 0.1}
+
+    def sample(self, rng: np.random.Generator) -> Dict[str, Any]:
+        """sample single interaction event"""
+        product_idx = rng.integers(0, len(self.purchase_probs))
+        state = 'view'  # always start with view
+        # pick next state based on transition probs
+        trans = self.trans.get(state, {'exit': 1.0})
+        next_state = rng.choice(list(trans.keys()), p=list(trans.values()))
+        price_paid = 0.0 if next_state != 'checkout' else float(rng.uniform(50, 200))
+        return {'action': state, 'product_idx': product_idx, 'actor': 'agent' if self.actor == 'agents' else 'human', 't': 0.0, 'price_paid': price_paid}
+
+
+def _load_behavioral_profile(actor: str, demand_forcing: np.ndarray) -> BehavioralProfile:
+    """returns a behavioral profile for generating synthetic sessions
+    actor: 'humans' or 'agents'
+    demand_forcing: per-product purchase probabilities used to weight interactions
+    """
+    return BehavioralProfile(actor, demand_forcing)
+
+
 class CommercePlatform:
-    """
-    This is just an extension of the state management for the environment, it does not implement anything dynamic just helps us simulate demand.
-    """
-    def __init__(self,
-                 product_catelogue_size: int,
-                 max_price: float,
-                 min_price: float,
-                 constraints: BusinessLogicConstraints):
-        self.product_catelogue_size = product_catelogue_size
-        self.product_supply = np.random.uniform(low=10, high=50, size=(self.product_catelogue_size,))
+    """state management for the environment, simulates demand"""
+    def __init__(self, product_catalogue_size: int, max_price: float, min_price: float, constraints: BusinessLogicConstraints):
+        self.product_catalogue_size = product_catalogue_size
+        self.product_supply = np.random.uniform(low=10, high=50, size=(self.product_catalogue_size,))
         self.max_price = max_price
         self.min_price = min_price
         self.constraints = constraints
@@ -55,27 +78,12 @@ class CommercePlatform:
         self._rng = np.random.default_rng(constraints.seed)
         self._last_interaction_df: pd.DataFrame = pd.DataFrame()
 
-
     def setup_true_demand(self, prices: np.ndarray) -> Dict[str, np.ndarray]:
-        # ground truth purchase propensities
         p = np.clip(prices, self.min_price, self.max_price)
         pn = p / self.max_price
         human_prob = self.constraints.base_human_demand * (pn ** self.constraints.human_price_elasticity)
         agent_prob = self.constraints.base_agent_demand * (pn ** self.constraints.agent_price_elasticity)
-        return {
-            "human_purchase_prob": np.clip(human_prob, 0.0, 0.95),
-            "agent_purchase_prob": np.clip(agent_prob, 0.0, 0.95)
-        }
-
-    def _load_behavioral_profile(actor : str, demand_forcing):
-        """
-        This returns a markov chain with average weights which we get from interaction data of our experiments.
-        This defines transition probabilities between different events:
-        search -> view_item_price_binN: 0.7
-        view_item_price_binN -> add_to_cart: 0.2
-        we also must reweight with the demand_forcing vector or purchase probabilities per-product
-        """
-
+        return {"human_purchase_prob": np.clip(human_prob, 0.0, 0.95), "agent_purchase_prob": np.clip(agent_prob, 0.0, 0.95)}
 
     def _simulate_sessions(self, base_prices: np.ndarray) -> pd.DataFrame:
         demand = self.setup_true_demand(base_prices)
@@ -162,22 +170,22 @@ class PHANTOMEnv(gym.Env):
         self.constraints = BusinessLogicConstraints()
         self.action_space = spaces.Box(low=-self.constraints.max_price_adjustment,
                                        high=self.constraints.max_price_adjustment,
-                                       shape=(self.constraints.product_catelogue_size,), dtype=np.float32)
+                                       shape=(self.constraints.product_catalogue_size,), dtype=np.float32)
         self.observation_space = spaces.Dict({
             "elasticity": spaces.Dict({
                 "price": spaces.Box(
-                    low=np.full((self.constraints.product_catelogue_size,), self.constraints.system_min_price, dtype=np.float32),
-                    high=np.full((self.constraints.product_catelogue_size,), self.constraints.system_max_price, dtype=np.float32),
+                    low=np.full((self.constraints.product_catalogue_size,), self.constraints.system_min_price, dtype=np.float32),
+                    high=np.full((self.constraints.product_catalogue_size,), self.constraints.system_max_price, dtype=np.float32),
                     dtype=np.float32),
                 "demand": spaces.Box(
-                    low=np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32),
-                    high=np.full((self.constraints.product_catelogue_size,), 1e6, dtype=np.float32),
+                    low=np.zeros((self.constraints.product_catalogue_size,), dtype=np.float32),
+                    high=np.full((self.constraints.product_catalogue_size,), 1e6, dtype=np.float32),
                     dtype=np.float32),
             })
             # TODO: define more features that we compute from the interaction data
         })
         self.commerce_platform = CommercePlatform(
-            product_catelogue_size=self.constraints.product_catelogue_size,
+            product_catalogue_size=self.constraints.product_catalogue_size,
             max_price=self.constraints.system_max_price,
             min_price=self.constraints.system_min_price,
             constraints=self.constraints)
@@ -192,12 +200,12 @@ class PHANTOMEnv(gym.Env):
             self._rng = np.random.default_rng(seed)
             self.commerce_platform._rng = np.random.default_rng(seed)
         self.t = 0
-        init_prices = self._rng.uniform(low=60.0, high=140.0, size=(self.constraints.product_catelogue_size,)).astype(np.float32)
+        init_prices = self._rng.uniform(low=60.0, high=140.0, size=(self.constraints.product_catalogue_size,)).astype(np.float32)
         self._prev_prices = init_prices.copy()
         self.state = {
             "elasticity": {
                 "price": init_prices,
-                "demand": np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32),
+                "demand": np.zeros((self.constraints.product_catalogue_size,), dtype=np.float32),
             }
         }
         return self.state, {}
@@ -210,38 +218,35 @@ class PHANTOMEnv(gym.Env):
                            self.constraints.system_max_price).astype(np.float32)
 
         self.state["elasticity"]["price"] = new_prices
-        # TODO: use the commerce platform to simulate sessions
         interactions_df = self.commerce_platform._simulate_sessions(new_prices)
         result = self.commerce_platform.compute_interaction_features(interactions_df)
-        # TODO: implement COI computation to use in reward
-        COI = 0.0
+        COI = 0.0  # TODO: implement cost-of-information computation
 
         volatility = 0.0 if self._prev_prices is None else \
             float(np.mean(np.abs((new_prices - self._prev_prices) / (self._prev_prices + 1e-6))))
         self._prev_prices = new_prices.copy()
 
-        revenue_observed = float(result["revenue_observed"])
-        agent_loss = float(result["agent_loss"])
+        # extract metrics with safe defaults for incomplete simulation
+        revenue_observed = float(result.get("revenue_observed", result.get("mean_sale_price", 0.0)))
+        agent_loss = float(result.get("agent_loss", 0.0))
 
         reward = (revenue_observed
                   - COI
                   - self.constraints.w_agent_loss * agent_loss
                   - self.constraints.w_volatility * volatility
-                  - self.constraints.w_estimation_error
-                  )
+                  - self.constraints.w_estimation_error)
 
         terminated = self.t >= self.constraints.episode_length
         info = {
             "t": self.t,
             "revenue_observed": revenue_observed,
-            "revenue_oracle": float(result["revenue_oracle"]),
+            "revenue_oracle": float(result.get("revenue_oracle", revenue_observed)),
             "agent_loss": agent_loss,
             "ux_volatility": volatility,
-            "mean_internal_error": err_mean,
-            "look_to_book": float(result["interaction_features"].get("look_to_book", 0.0)),
-            "mean_sale_price": float(result["interaction_features"].get("mean_sale_price", 0.0)),
-            "true_human_purchases_total": float(np.sum(result["true_human_demand"])),
-            "true_agent_purchases_total": float(np.sum(result["true_agent_purchases"])),
+            "look_to_book": float(result.get("look_to_book", 0.0)),
+            "mean_sale_price": float(result.get("mean_sale_price", 0.0)),
+            "true_human_purchases_total": 0.0,  # TODO: track from simulation
+            "true_agent_purchases_total": 0.0,  # TODO: track from simulation
         }
         return self.state, float(reward), terminated, False, info
 
@@ -250,46 +255,43 @@ if __name__ == "__main__":
     import matplotlib.pyplot as plt
     from collections import defaultdict
 
-    runs = {}
-    for use_defense in (False, True):
-        env = PHANTOMEnv(use_defense=use_defense)
-        obs, _ = env.reset(seed=42)
-        metrics = defaultdict(list)
-        total_reward = 0.0
-        done = False
+    env = PHANTOMEnv(constraints=BusinessLogicConstraints())
+    obs, _ = env.reset(seed=42)
+    metrics = defaultdict(list)
+    total_reward = 0.0
+    done = False
 
-        while not done:
-            action = env.action_space.sample()
-            obs, reward, done, _, info = env.step(action)
-            total_reward += reward
-            p_mean = float(np.mean(obs["elasticity"]["price"]))
-            q_mean = float(np.mean(obs["elasticity"]["demand"]))
-            p_std = float(np.std(obs["elasticity"]["price"]))
+    while not done:
+        action = env.action_space.sample()
+        obs, reward, done, _, info = env.step(action)
+        total_reward += reward
+        p_mean = float(np.mean(obs["elasticity"]["price"]))
+        q_mean = float(np.mean(obs["elasticity"]["demand"]))
+        p_std = float(np.std(obs["elasticity"]["price"]))
 
-            metrics['t'].append(info['t'])
-            metrics['price_mean'].append(p_mean)
-            metrics['price_std'].append(p_std)
-            metrics['demand_mean'].append(q_mean)
-            metrics['revenue_observed'].append(info['revenue_observed'])
-            metrics['revenue_oracle'].append(info['revenue_oracle'])
-            metrics['agent_loss'].append(info['agent_loss'])
-            metrics['ux_volatility'].append(info['ux_volatility'])
-            metrics['look_to_book'].append(info['look_to_book'])
-            metrics['reward'].append(reward)
-            metrics['human_purchases'].append(info['true_human_purchases_total'])
-            metrics['agent_purchases'].append(info['true_agent_purchases_total'])
+        metrics['t'].append(info['t'])
+        metrics['price_mean'].append(p_mean)
+        metrics['price_std'].append(p_std)
+        metrics['demand_mean'].append(q_mean)
+        metrics['revenue_observed'].append(info['revenue_observed'])
+        metrics['revenue_oracle'].append(info['revenue_oracle'])
+        metrics['agent_loss'].append(info['agent_loss'])
+        metrics['ux_volatility'].append(info['ux_volatility'])
+        metrics['look_to_book'].append(info['look_to_book'])
+        metrics['reward'].append(reward)
+        metrics['human_purchases'].append(info['true_human_purchases_total'])
+        metrics['agent_purchases'].append(info['true_agent_purchases_total'])
 
-            if info['t'] % 20 == 0 or done:
-                print(f"defense={'ON ' if use_defense else 'OFF'} t={info['t']:03d} p={p_mean:6.2f}±{p_std:4.2f} "
-                      f"q={q_mean:6.2f} rev={info['revenue_observed']:7.2f} oracle={info['revenue_oracle']:7.2f} "
-                      f"loss={info['agent_loss']:6.2f} ux={info['ux_volatility']:.3f} "
-                      f"ltb={info['look_to_book']:5.2f} r={reward:7.2f}")
+        if info['t'] % 20 == 0 or done:
+            print(f"t={info['t']:03d} p={p_mean:6.2f}±{p_std:4.2f} q={q_mean:6.2f} "
+                  f"rev={info['revenue_observed']:7.2f} oracle={info['revenue_oracle']:7.2f} "
+                  f"loss={info['agent_loss']:6.2f} ux={info['ux_volatility']:.3f} "
+                  f"ltb={info['look_to_book']:5.2f} r={reward:7.2f}")
 
-        runs[use_defense] = metrics
-        print(f"defense={'ON ' if use_defense else 'OFF'} total_reward={total_reward:.2f}\n")
+    print(f"total_reward={total_reward:.2f}")
 
     fig, axes = plt.subplots(3, 3, figsize=(15, 12))
-    fig.suptitle('PHANTOM Environment: Defense OFF vs ON', fontsize=14, fontweight='bold')
+    fig.suptitle('PHANTOM Environment Run', fontsize=14, fontweight='bold')
 
     plot_configs = [
         ('price_mean', 'Mean Price', 'Price'),
@@ -305,13 +307,10 @@ if __name__ == "__main__":
 
     for idx, (key, title, ylabel) in enumerate(plot_configs):
         ax = axes[idx // 3, idx % 3]
-        for use_defense, label, color in [(False, 'No Defense', 'red'), (True, 'With Defense', 'blue')]:
-            m = runs[use_defense]
-            ax.plot(m['t'], m[key], label=label, color=color, alpha=0.7, linewidth=1.5)
+        ax.plot(metrics['t'], metrics[key], color='blue', alpha=0.7, linewidth=1.5)
         ax.set_xlabel('Step')
         ax.set_ylabel(ylabel)
         ax.set_title(title, fontsize=10, fontweight='bold')
-        ax.legend(loc='best', fontsize=8)
         ax.grid(True, alpha=0.3)
 
     plt.tight_layout()

From 72877439ca8133613f19173eb6b47099d68141dc Mon Sep 17 00:00:00 2001
From: Daniel Rosel <daniel@alves.world>
Date: Wed, 21 Jan 2026 19:12:56 +0100
Subject: [PATCH 35/35] feat: contaminator and training

---
 experiments/procesing/contaminator.py | 87 ++++++++++++++++----------
 sim/rl/train.py                       | 89 ++++++++++++++-------------
 2 files changed, 100 insertions(+), 76 deletions(-)

diff --git a/experiments/procesing/contaminator.py b/experiments/procesing/contaminator.py
index da44c3d..2f23b2b 100644
--- a/experiments/procesing/contaminator.py
+++ b/experiments/procesing/contaminator.py
@@ -1,45 +1,66 @@
 import pandas as pd
 import random
-from sim.rl.behavior_loader import AgentBehaviorModel # TODO: proper import this
+import os
+from pathlib import Path
 
-base_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments"
-agent_dir = f"{base_dir}/agents/collected_data/"
+# use relative import when in package context, fallback for standalone
+try:
+    from sim.rl.behavior_loader.models import AgentBehaviorModel
+except ImportError:
+    import sys
+    sys.path.insert(0, str(Path(__file__).parent.parent.parent / "sim" / "rl" / "behavior_loader"))
+    from models import AgentBehaviorModel
+
+# paths should be configurable via environment or relative to project root
+PROJECT_ROOT = Path(__file__).parent.parent.parent
+AGENT_DATA_DIR = Path(os.getenv('PHANTOM_AGENT_DATA_DIR', PROJECT_ROOT / "experiments" / "agents" / "collected_data"))
 
 
-
-def remap_schema(df : pd.DataFrame, mapping: dict, on: str = "event_type"):
+def remap_schema(df: pd.DataFrame, mapping: dict, on: str = "event_type") -> pd.DataFrame:
+    """remap column values according to mapping dict, preserving unmapped values"""
     df = df.copy()
     df[on] = df[on].map(mapping).fillna(df[on])
     return df
 
 
-def contaminate_dataset(df : pd.DataFrame, on : str = "event_type",
-                        contamination_rate: float = 0.1) -> pd.DataFrame:
-    model = AgentBehaviorModel(agent_dir)
-    target_df_schema = df[on].unique().tolist()
-    mapping = {
-        'view': 'view_page'
-        # TODO: define properly for the given dataset
-    }
-    # think about replacing with freqdist method from library
-    OG_event_distribution = df[on].value_counts(normalize=True).to_dict()
-    # normalize to weights
-    OG_event_distribution = {k: v / sum(OG_event_distribution.values()) for k, v in OG_event_distribution.items()}
-    mapped_df = remap_schema(df, mapping, on=on)
-    N = len(df)
-    N_final = N / (1 - contamination_rate) # TODO: explain this in paper
-    N_contaminate = int(N_final - N)
-    start_event_types = random.choices(list(OG_event_distribution.keys()),
-                                    weights=list(OG_event_distribution.values()), k=N_contaminate)
-    # it makes sense
-    new_trajectories = []
-    for start_event in start_event_types:
-        # sample from og start
-        start = None # TODO: defin start accoding to dataset (randomly sample with weights of event distr)
-        trajectory = model.sample_trajectory(start) # TODO: explain this method in paper
-        new_trajectories.extend(trajectory)
+def contaminate_dataset(df: pd.DataFrame, on: str = "event_type",
+                        contamination_rate: float = 0.1,
+                        agent_data_dir: Path = None) -> pd.DataFrame:
+    """inject synthetic agent trajectories into a dataset
+    contamination_rate: fraction of final dataset that should be agent data (0.1 = 10% agents)
+    """
+    data_dir = agent_data_dir or AGENT_DATA_DIR
+    model = AgentBehaviorModel(str(data_dir))
+    model.build_MDP()  # ensure MDP is built before sampling
 
-    # TODO: make sure the new trajctories schema conforms with dataset
-    contaminate_df = pd.DataFrame(new_trajectories)
-    df = pd.concat([df, contaminate_df], ignore_index=True)
+    # compute event distribution from original data
+    event_dist = df[on].value_counts(normalize=True).to_dict()
+    total = sum(event_dist.values())
+    event_dist = {k: v / total for k, v in event_dist.items()}
+
+    # calculate how many synthetic events to add
+    N = len(df)
+    N_final = N / (1 - contamination_rate)
+    N_contaminate = int(N_final - N)
+
+    # sample start states weighted by original distribution
+    start_events = random.choices(list(event_dist.keys()), weights=list(event_dist.values()), k=N_contaminate)
+
+    # generate synthetic trajectories
+    new_rows = []
+    for start_event in start_events:
+        # sample trajectory from agent model, using a state that contains the event type
+        mdp_states = model.mdp.get('states', []) if model.mdp else []
+        matching_starts = [s for s in mdp_states if start_event in s]
+        if not matching_starts:
+            continue  # skip if no matching start state
+        start_state = random.choice(matching_starts)
+        trajectory = model.sample_traj(start_state, max_len=20)
+        for state in trajectory:
+            parts = state.split('|')  # page|productId|eventName format
+            new_rows.append({on: parts[-1] if parts else start_event, 'source': 'synthetic_agent'})
+
+    if new_rows:
+        contaminate_df = pd.DataFrame(new_rows)
+        df = pd.concat([df, contaminate_df], ignore_index=True)
     return df
diff --git a/sim/rl/train.py b/sim/rl/train.py
index ba257de..01e6809 100644
--- a/sim/rl/train.py
+++ b/sim/rl/train.py
@@ -3,15 +3,17 @@ import logging
 from pathlib import Path
 from typing import Dict, Type, Optional
 import pickle
-from torch import neg_
 from torch.utils.tensorboard import SummaryWriter
-from environment import PHANTOMEnv, FastTrainingConstraints, BusinessLogicConstraints
-from engine import (BasePricingEngine, WildPricingEngine, StaticPricingEngine,
-                   SimpleDemandEngine, RandomWalkEngine, ThompsonSamplingEngine)
+from environment import PHANTOMEnv, BusinessLogicConstraints
 
 logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
 logger = logging.getLogger(__name__)
 
+try:
+    from engine import (BasePricingEngine, WildPricingEngine, StaticPricingEngine,
+                       SimpleDemandEngine, RandomWalkEngine, ThompsonSamplingEngine)
+except ImportError:
+    BasePricingEngine = None  # engines not required for basic usage
 
 
 """
@@ -26,8 +28,7 @@ CURRENT SOLUTION BELOW does not implement correct learning or updates.
 
 class EngineTrainer:
     """wrapper to run pricing engines through episodes and collect metrics"""
-    def __init__(self, engine: BasePricingEngine, env: PHANTOMEnv,
-                 tb_writer: Optional[SummaryWriter] = None):
+    def __init__(self, engine, env: PHANTOMEnv, tb_writer: Optional[SummaryWriter] = None):
         self.engine = engine
         self.env = env
         self.episode_metrics = []
@@ -35,7 +36,6 @@ class EngineTrainer:
         self.global_step = 0
 
     def train(self, n_episodes: int, seed: int = 42):
-
         obs, _ = self.env.reset(seed=seed)
         prices = None
         for ep in range(n_episodes):
@@ -44,12 +44,21 @@ class EngineTrainer:
             self.engine.update(obs, reward, done, info)
         return self
 
-
-
-
-
-
-        return self.episode_metrics
+    def run_episode(self, seed: int = 42) -> Dict:
+        """run single evaluation episode and return metrics"""
+        obs, _ = self.env.reset(seed=seed)
+        self.engine.reset()
+        total_reward, prices = 0.0, None
+        ep_metrics = {'total_reward': 0.0}
+        done = False
+        while not done:
+            prices = self.engine.compute_prices(prices, obs) if prices is not None else obs["elasticity"]["price"]
+            obs, reward, done, _, info = self.env.step(prices)
+            total_reward += reward
+            for k, v in info.items():
+                ep_metrics[k] = v
+        ep_metrics['total_reward'] = total_reward
+        return ep_metrics
 
     def evaluate(self, n_episodes: int = 10, seed: int = 100) -> Dict:
         """evaluate trained engine"""
@@ -57,17 +66,16 @@ class EngineTrainer:
                                    'agent_loss', 'ux_volatility', 'look_to_book']}
         for ep in range(n_episodes):
             metrics = self.run_episode(seed=seed + ep)
-            for k in results:                results[k].append(metrics[k])
+            for k in results:
+                results[k].append(metrics.get(k, 0.0))
         return {k: (np.mean(v), np.std(v)) for k, v in results.items()}
 
 
-def make_env(fast: bool = True):
-    constraints = FastTrainingConstraints() if fast else BusinessLogicConstraints()
-    return PHANTOMEnv(constraints=constraints)
+def make_env():
+    return PHANTOMEnv(constraints=BusinessLogicConstraints())
 
 
-def train_engine(engine_cls: Type[BasePricingEngine], env: PHANTOMEnv,
-                n_episodes: int, seed: int = 42,
+def train_engine(engine_cls, env: PHANTOMEnv, n_episodes: int, seed: int = 42,
                 tb_writer: Optional[SummaryWriter] = None) -> EngineTrainer:
     constraints = env.constraints
     engine = engine_cls(constraints=constraints, seed=seed)
@@ -80,15 +88,11 @@ def save_trainer(trainer: EngineTrainer, path: Path):
     """save engine state and metrics"""
     path.parent.mkdir(parents=True, exist_ok=True)
     with open(path, 'wb') as f:
-        pickle.dump({
-            'engine': trainer.engine,
-            'metrics': trainer.episode_metrics
-        }, f)
+        pickle.dump({'engine': trainer.engine, 'metrics': trainer.episode_metrics}, f)
     logger.info(f"Saved trainer to {path}")
 
 
-def load_trainer(path: Path, env: PHANTOMEnv,
-                 tb_writer: Optional[SummaryWriter] = None) -> EngineTrainer:
+def load_trainer(path: Path, env: PHANTOMEnv, tb_writer: Optional[SummaryWriter] = None) -> EngineTrainer:
     """load saved engine"""
     with open(path, 'rb') as f:
         data = pickle.load(f)
@@ -98,45 +102,44 @@ def load_trainer(path: Path, env: PHANTOMEnv,
 
 
 if __name__ == "__main__":
+    if BasePricingEngine is None:
+        logger.error("Engines not available, cannot run training")
+        exit(1)
+
     base_dir = Path("./runs")
     base_dir.mkdir(exist_ok=True)
 
     engines = {
         "Wild": WildPricingEngine,
         "Static": StaticPricingEngine,
-#        "SimpleDemand": SimpleDemandEngine,
         "RandomWalk": RandomWalkEngine,
         "ThompsonSampling": ThompsonSamplingEngine,
     }
-    defenses = [False, True]
     n_train_episodes = 50
     n_eval_episodes = 10
     seed = 42
-    fast_mode = True
 
-    logger.info(f"Training config: {n_train_episodes} episodes per engine, fast_mode={fast_mode}")
+    logger.info(f"Training config: {n_train_episodes} episodes per engine")
 
     trained_trainers = {}
 
     for engine_name, engine_cls in engines.items():
-        for use_defense in defenses:
-            defense_label = "defense_on" if use_defense else "defense_off"
-            run_name = f"{engine_name}_{defense_label}"
-            log_dir = base_dir / run_name
-            log_dir.mkdir(parents=True, exist_ok=True)
+        run_name = engine_name
+        log_dir = base_dir / run_name
+        log_dir.mkdir(parents=True, exist_ok=True)
 
-            logger.info(f"Training {engine_name} with defense={use_defense}")
-            logger.info(f"Log directory: {log_dir}")
+        logger.info(f"Training {engine_name}")
+        logger.info(f"Log directory: {log_dir}")
 
-            env = make_env(fast=fast_mode)
-            tb_writer = SummaryWriter(log_dir=str(log_dir))
-            trainer = train_engine(engine_cls, env, n_train_episodes, seed, tb_writer=tb_writer)
-            tb_writer.close()
+        env = make_env()
+        tb_writer = SummaryWriter(log_dir=str(log_dir))
+        trainer = train_engine(engine_cls, env, n_train_episodes, seed, tb_writer=tb_writer)
+        tb_writer.close()
 
-            save_path = log_dir / "trainer.pkl"
-            save_trainer(trainer, save_path)
+        save_path = log_dir / "trainer.pkl"
+        save_trainer(trainer, save_path)
 
-            trained_trainers[run_name] = (trainer, env)
+        trained_trainers[run_name] = (trainer, env)
 
     logger.info("Starting evaluation")