- To get started, edit the page.tsx file. -
-- Looking for a starting point or more instructions? Head over to{" "} - - Templates - {" "} - or the{" "} - - Learning - {" "} - center. -
-From a9d73ccce50a3c769d103cf417cb5f3d7ca38aca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Alves=20R=C3=B6sel?= <60182044+velocitatem@users.noreply.github.com> Date: Tue, 13 Jan 2026 17:07:29 +0100 Subject: [PATCH 01/35] Paper first fillout (#39) * initial environemnt definitions * high level defintion * formlating the reward simply * improved implementation * tailored docker compose image for secondary tenaordboard * preliminary desriptions and babble * details on formulation and defintion of agent and its loop * typos one * more grammar issues * fluidity improvements and refactors * more decluttering and dnoising * finalizing introduction review * some methodology * somehow this disappeared * bit more of this and that * methodology of how we do architectuer and online DP * fix: compilation * expanding on the taxonomy and economic references * authoer notes * acks + google GCP * making space w new format nada lit review * stronger lit review and more sources * forgot about tables and graphs * dedupe citations * adding cloudflare * fixing env vars * updating docs with url * upating embed * fixing the url * paper badge * formaliztaion of rewards and adding definitions * noisy formulations * connecting some more dots here * adding significant weight in prices * fixing error * fixing typos and consistency * extra math formulations and refferenceot DRO * fixing diagram of loops * github mindmap * fixing erro and thiknig about big picture * enhancing the website * goals methodology and gitignore * some more references and theory links * talking about some wtp * feature: added wordcounter * forcing latex builds and fixining the bib # * refactor: update Cost of Information equations and notation for clarity * some more math and refactors * refactor: unify notation and improve clarity in COI equations * refactor: generalize master function for demand estimation and pricing strategies * we dont like math but we have to do it :( * refactor: enhance Cost of Information framework with additional context and illustration * refactor: enhance literature review and methodology sections with economic theory insights and system architecture details * alining format to fit the rubric * refactoring bibliography * fix: align * mdp additionally * trying different title * adding balance figure * agentic givergence, finally * fix: figure fonts adjusted to match --- .github/workflows/latex.yml | 48 ++- .gitignore | 6 + Makefile | 19 +- README.md | 84 +++- docker-compose.yml | 15 +- docs/goals/goals.csv | 21 + docs/index.html | 15 +- paper/.latexmkrc | 4 +- paper/concat_code.sh | 8 +- paper/src/auto/main.el | 9 +- paper/src/bib/references.bib | 425 ++++++++++++++++++ paper/src/chapters/01-intro.tex | 47 +- paper/src/chapters/02-literature-review.tex | 39 +- paper/src/chapters/03-methodology.tex | 275 ++++++++++-- paper/src/chapters/05-discussion.tex | 10 + paper/src/chapters/06-conclusion.tex | 2 +- paper/src/chapters/balance_figure.tex | 38 ++ paper/src/chapters/feature_table.tex | 65 +++ paper/src/chapters/loop_figure.tex | 110 +++++ paper/src/chapters/mdp_agent.pdf | Bin 0 -> 10743 bytes paper/src/chapters/mdp_human.pdf | Bin 0 -> 12194 bytes paper/src/main.tex | 45 +- paper/src/preamble.tex | 27 +- sim/rl/environment.py | 451 ++++++++++++++++++++ 24 files changed, 1656 insertions(+), 107 deletions(-) create mode 100644 docs/goals/goals.csv create mode 100644 paper/src/chapters/balance_figure.tex create mode 100644 paper/src/chapters/feature_table.tex create mode 100644 paper/src/chapters/loop_figure.tex create mode 100644 paper/src/chapters/mdp_agent.pdf create mode 100644 paper/src/chapters/mdp_human.pdf create mode 100644 sim/rl/environment.py diff --git a/.github/workflows/latex.yml b/.github/workflows/latex.yml index 2b40879..a8b5c9f 100644 --- a/.github/workflows/latex.yml +++ b/.github/workflows/latex.yml @@ -19,10 +19,56 @@ jobs: with: root_file: main.tex working_directory: paper/src - args: -pdf -interaction=nonstopmode -file-line-error -outdir=../build + args: -pdf -f -interaction=nonstopmode -file-line-error -outdir=../build pre_compile: bash ../concat_code.sh - name: Upload PDF uses: actions/upload-artifact@v4 with: name: thesis-pdf path: paper/build/main.pdf + + - name: Get current date + id: date + run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT + + - name: Upload to Cloudflare R2 + env: + AWS_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} + AWS_ENDPOINT_URL: ${{ secrets.R2_ENDPOINT }} + DATE: ${{ steps.date.outputs.date }} + BUCKET_NAME: ${{ secrets.R2_BUCKET_NAME }} + run: | + pip install boto3 + python3 << 'EOF' + import boto3 + import os + + s3 = boto3.client('s3', + endpoint_url=os.environ['AWS_ENDPOINT_URL'], + aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'], + aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'] + ) + + date = os.environ['DATE'] + bucket = os.environ['BUCKET_NAME'] + + # upload dated version + dated_filename = f"thesis-{date}.pdf" + s3.upload_file( + 'paper/build/main.pdf', + bucket, + dated_filename, + ExtraArgs={'ContentType': 'application/pdf'} + ) + print(f"Uploaded {dated_filename}") + + # upload latest version + s3.upload_file( + 'paper/build/main.pdf', + bucket, + 'thesis-latest.pdf', + ExtraArgs={'ContentType': 'application/pdf'} + ) + print(f"Uploaded thesis-latest.pdf") + EOF diff --git a/.gitignore b/.gitignore index 733e405..9db7742 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,12 @@ paper/src/bib/auto experiments/airflow/logs/* experiments/airflow/logs/scheduler/ experiments/airflow/logs/dag_processor_manager/ +experiments/collected_data/* + +paper/src/auto/* +lib/ +docs/goals/*.md +PHANTOM.wiki/ tests/e2e/node_modules/** **/auto/*.el *.old diff --git a/Makefile b/Makefile index d2d2d7f..0c51bb3 100644 --- a/Makefile +++ b/Makefile @@ -22,14 +22,15 @@ $(BUILDDIR): pdf.build: $(BUILDDIR) @bash paper/concat_code.sh @cd $(SRCDIR) && \ - $(LATEXMK) -pdf -jobname=$(JOBNAME) \ + $(LATEXMK) -pdf -jobname=$(JOBNAME) -f \ -interaction=nonstopmode -file-line-error \ + -r ../.latexmkrc \ -outdir=../$(BUILDDIR) $(TEX) .PHONY: pdf.watch pdf.watch: $(BUILDDIR) @cd $(SRCDIR) && \ - $(LATEXMK) -pvc -pdf -jobname=$(JOBNAME) \ + $(LATEXMK) -pvc -pdf -jobname=$(JOBNAME) -f \ -interaction=nonstopmode -file-line-error \ -r ../.latexmkrc \ -outdir=../$(BUILDDIR) $(TEX) @@ -72,6 +73,18 @@ stats.lines: @find . \( -path '*/node_modules' -o -path '*/.venv' -o -path '*/venv' \) -prune -o \ \( -name "*.ts" -o -name "*.py" \) -type f -print0 | xargs -0 cat | wc -l +.PHONY wordcount +wordcount: + @echo "Counting words in main text (excluding appendix)..." + @texcount -nosub -total -sum -1 \ + $(SRCDIR)/chapters/01-intro.tex \ + $(SRCDIR)/chapters/02-literature-review.tex \ + $(SRCDIR)/chapters/03-methodology.tex \ + $(SRCDIR)/chapters/04-results.tex \ + $(SRCDIR)/chapters/05-discussion.tex \ + $(SRCDIR)/chapters/06-conclusion.tex + + .PHONY: pdf clean watch run.webapp test count-lines all pdf: pdf.build clean: pdf.clean @@ -79,4 +92,4 @@ watch: pdf.watch run.webapp: web.dev test: test.backend count-lines: stats.lines -all: pdf.build +all: pdf.build \ No newline at end of file diff --git a/README.md b/README.md index 1126458..17a8c45 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,92 @@ ### PHANTOM [](https://github.com/velocitatem/PHANTOM/actions/workflows/latex.yml) +[](https://pub-d5b94a3c29fd40c6b3881946e463fdb7.r2.dev/thesis-latest.pdf) [](https://sites.research.google/trc/faq/) [](https://phantom-hotel.vercel.app) [](https://phantom-airline.vercel.app) - +```mermaid +mindmap + PHANTOM((PHANTOM Project)) + North Star + Study how automated actors change markets + Build an experimentation platform for real-world-like commerce + Two-loop learning system + Online observation loop + Offline "defense gym" loop + Core Economic Questions + Price Discovery + How prices respond to demand signals + How signal quality changes with bots/agents + Demand & Elasticity + Shifts in willingness-to-pay + Short-run vs long-run elasticity + Market Efficiency & Welfare + Consumer surplus vs producer surplus + Deadweight loss from frictions/manipulation + Price Discrimination & Segmentation + Behavioral feature-based segmentation + Fairness vs profitability tradeoffs + Information Asymmetry + Agents amplify search and arbitrage + Sellers infer more about buyers; buyers infer more about sellers + Strategic Interaction + Consumers vs firms vs agents + Feedback loops: policy ↔ behavior ↔ price + Market Power & Competition + Algorithmic pricing as competitive tool + Risks: tacit coordination / "algorithmic collusion" + Externalities + Congestion and attention costs + Spillovers: one segment’s behavior affects others’ prices + System-Level View + Participants + Humans + Agents (automated buyers/actors) + Firms (pricing decision-makers) + Platform (measurement + control layer) + Markets Simulated + Repeated transactions + Limited inventory / capacity constraints (conceptually) + Time dynamics (learning over time) + Interventions + Pricing policies + Experiment assignment / randomized exposure + Agent behavioral policies (task-driven) + Measurement & Causal Inference + What is observed + Actions (search, click, purchase intent) + Context (product attributes, time, exposure) + Outcomes (conversion, revenue, churn proxies) + Identification strategy + A/B tests and randomization + Counterfactual baselines + Robustness checks (offline replay) + Key metrics + Revenue / profit proxies + Conversion & bounce + Price volatility / stability + Welfare proxies (e.g., dispersion, access) + Risk, Governance, and Ethics + Manipulation & Integrity + Bot-driven demand distortion + Measurement contamination + Fairness & Transparency + Differential pricing concerns + Explainability and auditability + Safety Constraints + Guardrails on price moves + Monitoring for runaway feedback loops + Outputs + Insights + When do agents raise/lower prices via behavior shifts? + Which market designs are robust to automation? + Defenses + Agent-aware pricing policies (robust control) + Detection + mitigation strategies (feature-level separability) + Platform Value + Reusable testbed for market + AI-agent research +``` diff --git a/docker-compose.yml b/docker-compose.yml index f572758..f72f415 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,8 +1,17 @@ services: - - tensorboard: + tensorboard-rl: image: tensorflow/tensorflow:latest - container_name: "PHANTOM-tensorboard" + container_name: "PHANTOM-tensorboard-rl" + ports: + - "6007:6006" + volumes: + - ./sim/rl/runs:/logs + command: tensorboard --logdir=/logs --host=0.0.0.0 --port=6006 + restart: unless-stopped + + tensorboard-ml: + image: tensorflow/tensorflow:latest + container_name: "PHANTOM-tensorboard-ml" ports: - "6006:6006" volumes: diff --git a/docs/goals/goals.csv b/docs/goals/goals.csv new file mode 100644 index 0000000..b999fc4 --- /dev/null +++ b/docs/goals/goals.csv @@ -0,0 +1,21 @@ +store_mode,task_name,task_description,definition_of_done +airline,The Indecisive Executive (SEA-LAX),"You are traveling SEA to LAX for business. You prefer Business Class for the comfort, but you need to justify the expense to your company. 1) Find the Business Class option and check its price. 2) Compare it against the Economy option on the same route to see how much money you are saving or spending. 3) Spend some time weighing the pros and cons of the ""Flexible"" fare rule vs the standard one. 4) Ultimately, decide that your comfort is worth it and book the Business Class ticket.","Booking for SEA-LAX Business Class is completed." +airline,The Cross-Country Splurge (LAX-JFK),"You are flying LAX to JFK and want to treat yourself to First Class, but only if it's the right flight. 1) Find the First Class option. 2) thoroughly check the details (duration, arrival time). 3) Compare it with the Business Class option if available, or just look at other departure times to ensure this is the best schedule. 4) After confirming this is the absolute best option, proceed to book First Class.","Booking for LAX-JFK First Class is completed." +airline,The Budget Student (DFW-ORD),"You are a broke student flying DFW to ORD. You have a budget of roughly $200. 1) Find the cheapest Economy flight. 2) Before booking, frantically check if there are any other flights or if the ""Premium"" economy is somehow cheaper (it won't be, but you should check). 3) Hesitate for a moment to consider if you should just drive instead. 4) Resign yourself to the flight and book the Economy ticket.","Booking for DFW-ORD Economy Class is completed." +airline,The Quick Hop Commuter (LAX-SFO),"You need to get from LAX to SFO as fast as possible. Price is secondary to speed. 1) Search for flights and identify the one with the shortest duration (1h 30m). 2) Click into the details to verify the arrival time fits your schedule. 3) briefly explore if there's a Business Class upgrade available for this short flight. 4) Decide to stick with Economy since it's such a short trip and book it.","Booking for LAX-SFO is completed." +airline,The Status Chaser (SFO-SEA),"You are trying to earn airline points and need a ""Premium"" class ticket specifically. 1) Search SFO to SEA. 2) Filter or look for the Premium Economy option. 3) Compare the price gap between Premium and Standard Economy. 4) Browse the details to see if the ""Premium"" fare includes better baggage allowance. 5) Conclude it's worth the points and book the Premium seat.","Booking for SFO-SEA Premium Economy is completed." +airline,The Family Reunion (MIA-ATL),"You are booking for a family of 4 (2 adults, 2 children) flying MIA to ATL. 1) Search for 4 passengers. 2) You prefer Premium, but if the total is too high, you might settle for Economy. 3) Add Premium to your cart, look at the total, and hesitate. 4) Go back and check the Economy price for 4 people. 5) Decide to treat your family and go back to book the Premium option.","Booking for MIA-ATL (Premium) is completed." +airline,The Red Eye Skeptic (LAX-JFK),"You need to fly LAX to JFK but hate late arrivals. 1) Search for the flight and check the arrival time of the First Class option. 2) It arrives early morning (02:15), which worries you. 3) Spend some time looking for other flight options on different days to see if there's a better schedule. 4) Realize this is the only direct option that works and proceed to book it despite the time.","Booking for LAX-JFK is completed." +airline,The Refundable Requirement (ATL-DFW),"Your meeting in Dallas might get cancelled, so you strictly need a ""Refundable"" ticket. 1) Search ATL to DFW. 2) Find the First Class option and verify it lists ""Refundable"". 3) Check the Economy option to see if it is also refundable (it might not be). 4) Weigh the cost difference. 5) Choose the First Class Refundable option for peace of mind.","Booking for ATL-DFW First Class is completed." +airline,The Hub Connector (ORD-MIA),"You are flying ORD to MIA to catch a cruise. You cannot be late. 1) Search for the flight. 2) Verify the ""stops"" is 0 (Direct). 3) Click into details to check the duration. 4) Worry that 3h 30m might be too long in Economy. 5) Look for a Business class option. 6) Decide to save money for the cruise and book Economy.","Booking for ORD-MIA Economy is completed." +airline,The West Coast Hopper (SEA-LAX Business),"You fly this route often and usually pay around $700. 1) Search SEA to LAX. 2) Find the Business Class ticket. 3) Check if the price is near your usual $720 or if it's surged. 4) If it looks expensive, browse other dates to compare. 5) Return to your original desired date and book the Business Class seat.","Booking for SEA-LAX Business is completed." +hotel,The Honeymoon Suite (Presidential),"It is your honeymoon. You want the best room available, specifically one with a ""jacuzzi"". 1) Search for a room for 2 people. 2) Identify the ""Presidential Suite"". 3) Click details to confirm the amenities include a jacuzzi. 4) Browse the ""Executive Suite"" just to see what you are upgrading from. 5) Go back to the Presidential Suite, confirm it's the one you want, and book it.","Booking for the Presidential Suite is completed." +hotel,The Digital Nomad (Executive),"You are working remotely and strictly need a ""workspace"". 1) Search for a room. 2) Check the ""Executive Suite"" details for a workspace. 3) Check the ""Deluxe Room"" to see if it also has a workspace and is cheaper. 4) Compare the images (if available) or amenity lists of both. 5) Decide the Executive Suite looks more comfortable for a week of work and book it.","Booking for the Executive Suite is completed." +hotel,The Safety First (Superior),"You are traveling with valuables and need a ""safe"" in the room. 1) Search for a room. 2) Look at the ""Standard Room"" amenities. Does it have a safe? 3) Look at the ""Superior Room"". Verify it has a safe. 4) Compare the price difference. Is safety worth the extra cost? 5) Decide it is, and book the Superior Room.","Booking for the Superior Room is completed." +hotel,The Bachelor Party (Max Occupancy),"You are booking for 4 guys. You want everyone in one room if possible. 1) Search for 4 adults. 2) Find the room that fits 4 people (Presidential). 3) It looks expensive. Go back and search for 2 adults to see the price of a ""Standard Room"". 4) Calculate if booking two Standard Rooms is cheaper than one Presidential. 5) Decide it's too much hassle to manage two bookings and book the Presidential Suite.","Booking for the Presidential Suite is completed." +hotel,The Budget Refundable (Junior),"You want a cheap room but your dates might change, so it MUST be refundable. 1) Search for a room. 2) Sort by price or find the cheapest options. 3) Check the ""Standard"" and ""Superior"" rooms. Notice they are likely Non-Refundable. 4) Find the ""Junior Suite"" which is Refundable. 5) Grumble about the price difference but book the Junior Suite because you need the flexibility.","Booking for the Junior Suite is completed." +hotel,The View Hunter (Executive),"You want a room with a ""city_view"" or balcony. 1) Search for a room. 2) Check the amenities of the ""Deluxe Room"". 3) Check the amenities of the ""Executive Suite"". 4) Compare the prices. 5) Decide to treat yourself to the Executive Suite for the better view/balcony and book it.","Booking for the Executive Suite is completed." +hotel,The Just-A-Bed (Standard),"You just need a place to crash. Lowest price wins. 1) Search for a room. 2) Identify the absolute cheapest option (Standard Room). 3) Click details just to make sure it has ""wifi"". 4) Briefly glance at the ""Superior Room"" to see if the upgrade is <$10. 5) If not, go back and book the Standard Room immediately.","Booking for the Standard Room is completed." +hotel,The Family Vacation (Deluxe),"You are traveling with a child. You need a room that isn't too cramped but not a suite. 1) Search for 2 adults, 1 child. 2) Look at the ""Deluxe Room"". 3) Check the amenities for ""coffee_maker"" (parents need coffee). 4) Compare it with the ""Junior Suite"". 5) Decide the Deluxe Room is sufficient value and book it.","Booking for the Deluxe Room is completed." +hotel,The Long Stay (Junior),"You are staying for 7 nights. You want something nicer than a standard room but affordable. 1) Search for a room. 2) Look at the ""Junior Suite"". 3) Check the amenities for a ""mini_fridge"" or similar. 4) Compare the total cost for 7 nights against your budget. 5) Hesitate and look at the ""Standard Room"" price. 6) Decide the extra space of the Junior Suite is worth it for a long stay and book it.","Booking for the Junior Suite is completed." +hotel,The Last Minute Panic (Superior),"It's late and you need a room for tonight. 1) Search for a room for 1 person. 2) You recognize the ""Superior Room"" brand. 3) Click it. 4) Quickly verify check-in times or details. 5) Don't overthink it—book the Superior Room as fast as possible.","Booking for the Superior Room is completed." diff --git a/docs/index.html b/docs/index.html index f190154..a3f587b 100644 --- a/docs/index.html +++ b/docs/index.html @@ -47,7 +47,7 @@ - + @@ -233,14 +233,13 @@
7F*Tas=ZBMDv&+27w?rV`xOkXBv z23QiWRmDwdt9qo4{3{%$l;dJ7a}suUPDHyV$# zisOxI7IVa8V^6r{ z6WRi8o y0S<$=;`qUz!MQV>&e{)hPNgietfM@CjB% z2{3X*MH>`tAx!~w2P;*fsao~6E`+f?9P!NM_etX~{DKTUkH5q+W)7yJN(s7+K_SL| zTGCIsuZvk+i?Y95H-)b?gJ)0lF=FhOf`PE4Nx8h*UJvXiuOC14Tfo$4^G|Qc5gdNA zYfXApGSa@Qg*9VSyyd8yQ*bl2e@+Q|(>g})z+#WLu#>cZ5rwdRvM0d9^9}sV8E&1_ zrIpvCb;VpBMC@PJoNO=Nj3k2I5Ecl#K}ZfjrebbBD20KR z?Cr72#r)L8lFlfP)3LyQ!y**d>NPcN{5(X&)5o*f<$IE@`^i*X1fuqQ @Z$o12L$l-0d>iX~@N`S)i&-9M`}-X Blst-q%QHauy`Zu=s;yb;tq;C|r7drXrw*&)_ zx8B|*eHCZ2nxMH0q8N9Y-H#L5*6*g4%33db8&Vi#? g>9YFsMFt;F0;$v zQ=(NGb5^9)OGVQ5@-Do&Fupxi68j Y@09OKc7+_wV--9Cu(*3SM5Pwl}aae8c=ak>@y>hW}aVB +(@6vx3reIV9hy5TbW&!V9Bye+g?RcXNzAUYx6x1%X1|&3YMYmJnOBaK{qQAS z68DmbEK3YN3R)tE8sZ{5N&PZ~EroSozch)Fk9+c-cwm#OKZo#)ZYIu~)v3H+5Z)U% z`E 0ldZW%7_U?AwWwq9QUu-Y)Sj_BG;Y!nWr2kF6iud7~&JD8s{FU)o46V=Q3& zh%KjB_`HN~iBfG`6*&KW-tTATW%%XCLbfKeYs68+BgAFI$B>*i{oB0T&D$Q^_uFwJ zRNNnX#ZXccfyZZN#Pm#~FJLmQR|L3ZvqQ++3%NXJBB?Ucy-d$8BU)WNE|Wza?rK_JQ}%@xyBP^42&R@+&qSySob%DOSiW_g%w?K|I4tqR^S zwD^6*LBTkpWsny~CHw#z&VWY36m{6r4x!eDP}yK~*c8_@9j%u})jG5kc@=n7ZaK+) zi`I2n9qa#2bY@8Po~O}a-~JNo8|&w*n`IjhR@}PNb=^k>RSD+wMX%Fek!lu~2Oh@T zdHFK3ZO&Svmj2rV$DhS84{W0;?FgbkuXbE-Fc$}PHuw;Y;a>R~`Z5MFSue9Kwa1Y| zC?nRL*W2ns*Snp19xr>_O67LWsv=svH~R$YgEy^iE?eF5NhwGj*B657V{!H+ALD*K z?vn)SA}YP&KUw9&eT0$}gD3CHMI~EVstXRLr;;Sa&cc60zvgFXf21OziBqYwPutL; zBhmXwswl1+TfFgdnxBeb_<`Md{N3!?YvwIa0WLGPk@aw)vyd3xJHi?2uJTb^Pn4Rd z=zy(jS?6WC(azfit`0%Y>49PH$msXDgJ;K{YZglv40v)Q(d%Mq&2pNyOEPwFgCg1< z_?+T=S) bjxj2E3Q7e-d?FfqeL|O+8YOWyDROK#d$U1(@!}FhdoHMI{E0jH za}dg|%s|yIfrPxMN@Y&W1?%hTWX9C27t%Xzwge}?U~Xcs>TT@j-2I7rw{b_ENoM%p z_hdJ*zT3sTvkScY7(oa{juAUYVDuR{Y|we$5;N2@9w9H0QJWLXVdg(qCxmzTlMH;0 z#0TfMRTXg3!;hIInCmx~syUVEQABoqX_&dN<28d!BUI)W?=;H}%WbpFP`aWtYe-}% zfnXF$ 1|_8$SI(&x2%c{T8b-2nEXOW8%?P#hTHjg>NpuWpysr_q2 E zSf-Y)im-YCa|tir8{xqTu_zc 4h@t7VK2G6E#7l@u$1gDslYmN9j {b z{5u~tgVY?Z3sCbR9CIfCZkIP)#Qtx%nkHWIkFSCe?yl=1?q^|ot&X#C?>w-Y;^BJ@ zj@QO%F0)X@i&lb)Q7=1XI4V9Ei!-erL!1SDNHzrLYgnf9I yX{NC)u$8A2c?nN+~tPk}vvyaWtHM&6_ZYnj5Z!QnZE1PK!e&&3pJaJ1; zm?P#s*v%118Q`bwtkiKIJQbU^!*=wV8E|aWtL-`J_)!8HYdWK|nAYEpl&U$SJ6PyC znoaL_RPI2HSdO%_8uho*(y`RSx7=@9%w627ZO~)pbG65EFMW}I(*C{pSclfKu+Sg{ zVyanKmS^N#INqKfDwqwEzOZ8?S6DWVh*DhE4z~|huVw3-Ui7cF2rR1>RA+`&U6(ET zu9twiJg&cq#I04(iNyVF?0PfexT^x=QiPX|)+hIZtwz}=H|5%uVA8^!h_B+1lCFSv zFmXq|u`YHhJ>ZAz-P&iFnmP<@3wK#c<@KDs6nPc6u% &&fhR;AkNA`oF=QUiHud+WzgJd;WyE^HC)Jf4*V-_t3!^XvGP>fCy`@h ztpcb)&ghtzUy$M!fgy2oIY#e11i-l$$8913yk9!HZi_9mpsy@28kdni!+tK0s=P)l zmhafEh{XeXrUpf1psx1}Sf(QMnBA%4^ ~u4Z3kSre3*l{!~lMQ#imwd~DiO=u5C&KkECcb|%-)7Y9tnq2Osk%kgn2(~k(J zjLJu6-HJ=STz^Vz#xLA{LoB*b08?A$+ ~X@H&XI1xacqZ0|0t1_xRt{ zcIYS$ns)-xoAzlTGHC5+6n02G@)Kcb9`dy$deTLc@`FN-s1LV?=E6w4v)uT@be$y? z_O}Z+wh41?sj>(~knGymiNEks`k)xHaYU(qnoiGC-w#hB#AMdbCt~GIH)>2)5Ahw_ zX{=K^$vG0Q2ue^_)|!$=K9D)-Zbudz;&-6Qtf(c2<;7qV*0}9 JfcQcIs@;y7CrS5U*xb%_+6yaDOc z1xJNP1s8>~Vy$IM LTinh-fI7-b~|yvX62> zS8pQ8v(LKAx;-8*UMUurw|0lMsWwErhGn8k&1{%fX7Ej-lUc!xl4?uNAE5{4!o(xt zJiL6Y%_c5){5wJY(WJIvmqT7q{G|i|A@9Pn<|ml?iK|zP8fD`D%`W~`i1UP2F|&gH z-o(G4z-LtN4;bzl!xI798`_zDgxJ{qfwVo5-s@XEVRV9G$_lEQv_fW(KM+M#u${gV z*v`zDUeU%%-}-NEF?}mDOBcYqzZn6~oPgh~)PHbU>YLaDn3?_-5fZX-25153K^)8g zdM0L80Emf$9l*)N0nqu)BMH&BG&2;mHn9W)fWIk%_J-gmyzz I+0{RHlTnE<~LK*fI`mCyL)v;Y6QiGFKPaxj4W z#x9lZ9Kg@!`%M+nw+H`5LjQN>dWJNGP4(^mGKP%)pKTC`^Y?iFz9Rt8zY3^Ct)FV9 z%&biSjA~}qg4XtCe-VDmshJr;Ozod+3j815%pi6K4t5Zb^VvbznAsSZn1L*8oc~Xc z|0ei9Df|EFV+B4-v2wC8fH*-cKsEpy3y=ZC%*GDl1hBFJ8Gx)DtWT5wCzbQ5{*052 zgZ A`1B=5f)xie zK2yqt6XVU(k$P1huVgj1ZfV{Sx6HvV8GS#=zOi*kcI&D@))KSWU*c+RcVtbi!biXi zAi3iSl7R@~h_fCIuXJ` 4rwY%bk?|8aRF#1qPc+WPvx38#ORFFb`)CU%a$!|7i&1^ WX0F--iFQ znBdRg^M5KNP%?7`1DO5@P-*}z5P%85^hf+tw6TEzKz|ogFiKh*+W d#(}he0ii(R00p2u6N*d~3h}}e-GV<_zUd{|2WZ62 zU4Q{m7p53``_R`D 5I|G9K~h`t@f+3smoARr4TGZF=bsGJzme*x3E BqGtdA literal 0 HcmV?d00001 diff --git a/paper/src/chapters/mdp_human.pdf b/paper/src/chapters/mdp_human.pdf new file mode 100644 index 0000000000000000000000000000000000000000..7cef37a21dc48a0a6c66fa87a788419045bb8fcd GIT binary patch literal 12194 zcmb_?1yEeu(ryw6?iQTE9cBiH;O+!>*8qdNyE_RIToXJ%kl^mF!GpU7w+A_Limw zfB&m`GgEu-)vK5F>ebV`zK>EtM2r!{#DPHh?V#)yffWD**ciS?c>5N>EMo#Ob2JCA zJ&6<%00029n1z+2iT%^v%D~Y?#Kg$P*aU%(55d9F-o(Hf!8I*iV<4KM7QJysZSw0a zkdiN6@|3O>2xx_!_rQcUSj0c*&-l9W!TP?LJU-@Q!FN*+oM5)01+oYkF)=aI>#3hU zsKk$B@iPNwr$=2W2h2X&E$&Cts4WcM_t-Ty$B!+{%igu$hf4VwmP&mzy~awRHH3#h zbyo7pOaF2!7FEq$RaMe8ve%L{IOl_`nnW{OqG*jZ%0$$0;qmwMzabD|&{fK;grWX~ z=vMZl2a%Y#JUu2d>H=FAa##8AjpTMB bhq( zkFO #)2|n^CtmO-k>jt-(;Bu{B)k!72l#UKjjLOD z!x(`w2fPa6w8S-9vp{wlA74iP{S9m!LS*-Z4}mUnQOk-M@WocxEP?NalSvyMAsMx+ z>)RJOiUuc;j^JrDtV7yO7|}`CJ}uuh3nuX^L`7dYZM9Gls3cimJZ6R$%epUX0T#uI zLpiI+zPD~JuVO1;QsEo65dw}2_R8R}9>RHiC{A$d6?Mc{Dkz|JOATbp@Wm;Ed{(&n zLh(|Bfd+xnhycz^+ofZmD(>1z^uSd7o$AVDb0=NfPt{$p?s!i!$Qi`ou UAE4j>^7#%F8A!y EPo%1i_U=tgf~`K(|+A zgH`n91T*W){NjR&v3o`n@hV@TT>!durO%JC{G;Du`Q<49UrQ^SR`2@JaK___qUaBV zRG!N1ptj2sj%W0wn~d7L5F;nVCn_siAz9JSappfjRM0|0){q>t*^*URz*1-x=9rX| zmir{_t@4sOKk%TVN1r1|yjZ(O01FkT11u{kom<{UgoVs&JHgirK@H&wZ^Y~FQ* $JdA=|X zxH7?`7z&P~;R`x4C?FPCS8|n @Y%Ipzx^v%EHKZ%rx}2%_ p_=aI*uI+&Y6#=xGo+X Op=oGCWkgP!p<71F07UP!aAP9uNj1)2w%8q$S z+`lOjwj^6Wwx+Eg*+c(i9#`3ScfTlG3xlNY5b7X+RjW5fZ(SCj&*$w+aepV6I`wGE z8?E&*8A;RB`ke{49bu1^wL>Z?<_@SzNc5o0Yfp0wq7-p*Px-dK{!c%5%f9k=AIJN` z5*hmNy}A}jA%1c50*KS^T)^K;w0gjvjMymjvh&RwXqcLuGL`m~0rM#nLDlFFI5wKm z=1U_Dm_x7oN`gBr^Vd)qu =7d)}RK zJ8T7F#?kGs(it&g(WMFMX2I5hBSnVcmA=sKg8qXywO*gmihi-0y=x;^YUfEKK`*BW zrY_m9S*PjC`!e<%>*nD}Po?;@Ks(lNPABos?L;=>WB`)T#Y$^mO`y+Fyq6RRgeO$| zhZKwI^w(^a1y8f7pq0d3+9kap19h4*2Lh-SII`L2tXa;^6Nio=dC>f-MtFm=%B?nL z;x8(9!T~hrNb<8z7XTe+cL`@CDEh%7QD!hgnlI>hvJZuD13jK5tb}63wh%iAjut=+ z_kms5c(4^T8;Z70>KE|gS7 45BX*+FwO|4iIirm$Hl89cKdEiS-4g=%}mU z)P=p~j=*@)<8BF`F#^{ZiMdMJNTc4&-@A=AXO%`#!9Xv+z+3DLdF^|u=`I#RA(2Nk z^>NH);?RjiB*_d%e77`6Ww-d%VtegjTF+b{5>28ZP7~UX55(tf?=d8!hB~=LoNCB! z(58@q<>+@DnE*7jkK=2)bUaeCl*JkDfqYk4Q&)T!QOY? 0PEUq-1;=H-)#( z6U~67pS;#V!}JdkcN;d_*;ZbnBxjSti~dMLtRK8R^JqqY+Em(-D4|nM6A!Q=E8 jcEUe&?Gl=9;G>{;14T_krGC{T7 4tp0p1eA9@caG8h-ul=Wo<6UV=pu5KpZYD7g#Y>|UuA4?K;>n(D$g|Od4+YC zR;B7KWNzMXU@0(hWaGe<=UAhI@>UVOs#fH6kQ&RKiVXgi$Wmalp`sr@>}osPfrv9> z>nF4WeeW#b$D!*n`g&8k8s1dZ{-tm1%@I1XeyAyJZjwPn UMSPt zkY(EBpUPT6zCkkdZqnlDZ%2|VmA`$uyio2MfpQ7Y=hXswasM=nG#>8imM$0Z1#yyL z=GVo*sY~=v1idSq5kAygueLxc<|F-@OGV%Q k8YvqgHIsB4DpBl0O?!xyZ z4#}lC>cZ@mU`Ae@gH12LrMwbfU74Fy!~?at1_kW~3n6jFQ^}WQ)mXl8)WK+#y~e}( z925<}$yDR=aZTb^vlSctN+2P@4+6k$F3nGz*M{#8T0{0DD-Ff@PNy| HJO Lf%7&`8D~wfqtrSL<9@RsIqYyJfrHWj7;`6jX~-yIlwF|QH*}|u01E~JvI5ME zFR6MXH1yEAK#G2G1$uUR%|u;cbLo0Rq^?D$4_RF++@qv`(k$gyiF#S3U5g%3Mo3p0 z^wZ2U;()iWSK#h8wPw@K7+t0=Kx@9Qf7&k*6TAq-scEYTKDEr?kcG#A)-v`6wMn5_ zM7?%7TPow!_$p4FIVSVq@^E{0>cf^3;*4MdG5#~>eR_J%mY-AP=lfHV4Cdfsc~*EX zvizM)|4yGpT^+@h9G_BWz}vUaf~OC&0r05+U=|hxfB?)!22a FM|9V4Qys24Gfpvo!%Q%NxE|a4ChOo|Fpt$?|)hW#QxW`K%jrj>(6O`z< Hc^r9oP3fpyE){WaWjx+(rO^w)lVbc3@r8cFtXFrMdx^?S z+Sw9}X8N$|Wa*FW#@{U3o27>?L_n{~r17v((6R@hs7E@Qt2frL77e`HPjY?0AceP4 z`N+(0YaDP3j$ZC;M*lENDi7k7mC%RNMJ>~AHof<=Vdyxy@orqIw(439GT9^UGJ{>} z+LIi3Q>NF)*Be7hL9^y$yK&jCR#v*v(l((jR3{EEOjk`+ 2nOZAU6O~l{ zGgla?!>c&PY9@o0(X7CWyhAyN#BzQogA+TIA9;2OmSv|6wE@eP{E4w?Z!PyR@|bzy z!htOg77{tvV!GxfhhFdEvLg=y0^#dne{H9Vw0bg!``8Pa8i)jswb@*Dz`+Xl6^ctp zVaNQTrM&s2mb3-!ki6{AMJ9P&$H3KIhmcciRyImrpR3SDg@ ~M%FJzMrd%9GSY2sTjZBL@AbrK1 z6Eo3A=lV+GTw>Ag>Vg3xypT6B-R~S_&m7N}($W%m-4x}9*B{+ZFG#esEGTQ~8Y+N1 zX5)GMs9%cwe@tCPg1lx(2TRFx6~9oqQIcs9XqO1=x)%JvK1(r%jSAJqm57#aW~-r} zb34}&NsL5X!aj)oh61h3gkUlibc&8mvQ6IH>?}+(HI%~HW^IqSQ{YHSZD1rMEF|r( zT$Gpdl+?%atOIepjdcl?GYNTb5Yo$uNcf!QC`=10%xnfOe+sdsXQ!7o>wG_dIAWM> zFu9C5TrnoT35lSHCu2)NUvLuP>||5-L|%s-*7-OK5OlniBs-sHfhFb3*|tBzS9Ede z!@mhSd^xNipIQxqFN>xBh2YTTMHx47LTtZGj@d^dcbe0gDC?A0b>`mk*1hfm)~~yF zrhIvnuP|feAsVc9-89dBn1Dy7eQR!X|3x*m@O=E|+_#A>U9SUd!xboNh&$HC5GB7N zHOzhZhm8;nAZ6btQthO%G@R4=!L%<-1kU;HBJo)gYYen;S-$2g)UG|0fs5!S?XeQ3 zz_*<789Yc|{hS2`U6yut6TfCBk;`>QA4EHQ haX?&gm1!+y&l6JGs`J`cgW-% zj5muH%ot6i(idkx$hAbcjJ*?n9ex>J&P(@Ev7>k~nlpJ7a$2qf7+mO9W@mgXC-yj* zwTt4y(E9qbk34nM$2gk&^b{qD*PBpkdRqJX{9r&uMP?3>XE=TA&K1Yv2R?s5y|iAb zW_|W@vr3(=smO+c3Mn#9vmi$?>fNCbderVjdNV!bT|LjGSz91BJkpJ;W2djM#}V$x z0hx@+njchD6fDZ&qtvZG)k2*hW;Ju+!`G^_?x^8mekQR93)Z50#fj=mB#)gV`S#0O z$>v?^kVa&EUVXReF~Fg}dJf~jDm4>-W=g1#w%6tR7ThN8O8b!#+&qsRo&D{%Mp=a< zs#c0uE~VeG9?JWd>B^mtyl;L4z^1;xxY`NCG*gJ><7S{%YpA!JNYe8>nOZxCy7Iex zM@-_TDvK@>l-sGh1xPbXQYmp@{Y2^4M`qti8_0;R1rZ_ys5Pvkq~A8=&TS0zh9wb6 z+}H2Yr1UDA8pmp+BrH~nU-UN 4%$tl(v8UJ<3u{+XIczzQ zKNaQ-4KRAvJA8pSXmiKaema`F`v^I!{yag-BC&A8rrs6x5t#Es#kR(WH`GWuVjOeg8>vQ&n2p4_!TjvqYZ*F;c17$%z4h7pF_K zKJKyH5PJKK_B7>|!YK 8XlV?W)1zU1^MGwIV}Mn%!8_r? zx8)KssyxSH;HPT2a;ewk5r-AWCZ?ou5dKMnjOy+pgCJ2Y-IxWGCiOvlm{A&-rhT4V zYL+`S>gpO82CVP9$08_HOV?_M)9^TN%z43IZ|3ld0IKw5-5z>huvFjdD&0m`Q|n22 z!nSq*x}N&9`(1K(4a4X8>J16(Le<5L+WoJ3cimayr)Mw$SZfb4*HpKZyKw>>MU+c8 z?~beMcysh^;tu?2W;I3LDQHJd@ald%xHw^N=vm9y_=?yH;7$I)?fqed5=zMo;*8Pn zx2?L56XxP7BL_>0UQRnG3YcNu(@-v4SUO~|bw|80l2UuKyNDe_W2b}|{N+TEA2;c$ zF!v&o7u~x2VVWY;d5qkZ*MIr&G7k~&0%?TC!>_QFXNXVUwYOPB-B54!%dcs;`M0tc zRsJ#9vhjU`8l3bt >@Su-Emf6 +o@(k`5=ccJ6anO~geMGlj_pt{CBNpl=t z*i4BqAI!QB&gbsQ51*`HU?Zh7TR7zPPG!nJ2-z+)`-U>-x_wa^Fqhxd<1}%Rx^p%) zHP@4TqdJyU``*WL)``M?N!$w8&9O9f)#(_gIN}GxO^K89I77F3HL|HLI` OGz5p~+D_T;yXI$-{dea_blo@G<#(KfW2TaYGuo~a1^JYz z$tY1#qH2>9pkx$n8-EyPnAAWEmxMgT68^+*&mj38a|KEbF-j4vqD-W4ty+*=I}n=X zbpj@sP%D9>Zt__&x_O+=S6`}{-P?SJwKZn}n%|LK_zOGje+Y#7sn3Ze2VXdxwE1aE zYg20rb`y3hWV0|a4+3um4}rIq{1HEgWsblel&YAj7y}y(8xs;;NGLC>7y==5?|16= z>UZe(=y##}nX{L3o3oX3nR8_5kIcvq=4VdX#Df<@87jCIV>tc}>+j)9HQ{QmuG4k6 zbaJJHh+@6e{F9@6n76hre6hw^cIdA37RBn}G@X*jsJT=_?dEvWt)r4`xqQ~;ZT$6A zJX@8Syf=`A-t00Qia)H=s~PX>S9C7!gh9oSaLJ9ACQ(%jTErD*C8QfW8x@np1o$#I z35qNf@%?AsORMpr33&MB55TLQdOBj}u^?uby%Yn%GH?!mXi6-lwiOK}?|$!@>uPGg zp{X}@2sppF{$ehd#LFS4{($H}`e6QBi;q>l m5x>uvrCtW4)L$J%R)5NBpLxzV zFPzs|6R12l{dKPn-@8da*53&&mCv!~M-)^(uckF)lbt_GzOa_LaFl9 t^om z?ozvVv#BY$UWABBx@UStIHeO3PKg1gNS!vV?!tMJv-)hx5$!%jhbX0$-5Xhg-p|`# z6KZYJ#Dc#RjpK>NtQl3iSrOc%2j#^deMXlel+-sNxR(IX_BjcD8T&Rsg2-oLLrfTo zmazPZq*65A7^7wrWx6=W+zcxn;dNZY_ec?LZUyc~D Q*5-y4 V4wWuCoy+$)h_`r`sNuswWwvUv2xhTkcnBC-j7urrnRP3pJO|Y$81|TVsbAb z@LBG^GLZc|*~>4x$1mcO9i;Eapj=#F%=4X(hlgKiZ+oqu@#hylHiIuXh>%@4;G@+n z8*Xah=8%0*(A!&ry;pLGcF8w!y7DC$bJp8CT&n`tat>cI0A(U1fnzvAuiNq>U1)KN zMV*1eC#uCJAIaXK5vmd!)u&SqnYQI~niO|2WEZ~(5PfOpkXwiw4X$x1`AN6bvP}>5 zvato$i+Xd+s}S{bk~>8UI9V)@REn608k;7_F_=}yO~PH7Hdi`7zaQUmg4-ly{byJh zLL|0d4?I;|j{=j1Ym%Wd9s-T~hqeK#asXoGcA~avW>+pvnIkyRG;~L?hzk?SEj9iS zU$%P`eh@mjRc?jq7*i%U2xRET+70gK?&s2^wUwn*+Qu>n=Tu33p8-iRO{bD~ka3Vx zD)>%J(NH8*EsUOurc++a>gsQ!CYysV$bjM`d3_#E`W+Q4o({&CqiY3)66w?8~5b zHiLcFTgyIo&4Ec_Uaf(t({Babg2 +taA>X&vzbi zUx$vPvtu8%AeBQx2(naez9#gtPoK7v*c*GO*7L{>_P$Z%a5CI)>})JQ3uvA(I~K)> zoU!|gdHog6%^U61vv-xunO>tK@p_;>e;Pkz;Qa|kn7dmWRo|qAoRD*i^r5@cWt7}r zH&f*MX*JU#lOoF^t?~<{sPzP=1{$0134$?&cJc*x{ky32HVU6Eq%%e5`%Bd4d7_)g zs&}r5kcal^=&&R2_s7NOu$iO3S@e9Ih%Vqi; N%4lWV(%h7+=dp+{reUxn+@kTj!1rZrqKbWiWcBO3*Sr%50*3;t05v@4~WMLXI zF3Eb}mk?7@`SyG^f2KgU-f^d2tGxM|U~_X2Z?3ew%*ah$nk0XIyKjBnroXs)8UNc@ z!slhkB=gI2dKFkqp9;c6lVMGpdZN1xvfxF>q;(2UIx{=!6FJm!%g+f4NrHl(4MVH~ zHFbJAUmyct!d1LviY0`D_V-f&nB;)R*v71zT#{NxO?^M4Ir{nq7j{@C$TK~rPm4Ek zK$IFTQZGR7tH*C9Po>K($`80~^+z(GU{kithO<7FmnntRsPGh(&X`hD`)D;s7JQ}s z{HuIPEWb~HH_-Ewh-%Rfs^WnWAs^AnJH7JI3iYd)HzJ3R5`tL8Q;1~AI`_N0=c7w! z{0HSBxLzai)_cp;&GNE{b|@UN>tby)=DAK4;gdfM+nLOAmeb0C=(SK992_Qqm3xx; zPBV5x8D=6bh4RHP2n|^i^-X=lgNEY-%4ojJYR0=p#ukF5Xtl9#_XbBf&4<+QmNXBc zA|3eOP58=d9x-oiuwB@zaoJ?E8OKrL$k+oF!6&DNmBN)Qf{8dpO~T^u9-~9zS}fA# z$bY=q+J%1<5F}#7;R!9}FVFn3+leRTIr yrX7Wyh&{gRQJT65dkbKCA|1m zN;T>r5G@pCf!<{c6ZipcB-CAuK>h*Zv^aFPiawx@sI17UDlo}X)Oi*w0ZGzA2fV{~BghZaSiQ_YckMf~}*;N)uJ$tu3*o4fuxQ_!Qi#mJ-BV(rS zxbtxj&Uo`$ze;`1 EwUu-9%3j NragK2Z%zq-(ug_CowyOXRYgHthAm));(VF!cg_dGZNHzn~Q5ofD?Mw@G4r z7_}*`U|i(`&dHwYtPQ#AVPC!kpP?%Tx$CwTl0;liAaN5IDhNs|2nT2QTFf@2D1QZ6 z8RTN)-*h?YCnH}yczgAE47#KsvCGJ>c>Ho{3>o1U?9Oh)rrBY=1Ao7>4Iq5Eh>1}* z(}y&HM4`5%EWf$B4bz)Daw16~wrjw|mkoF8oIvJ|yzH;fJK#`~At3H=PoKcvRHEWl zL2)(RkzLZ;Qis%EpifT=`8EwRe{&NqU6JEi_kmy%!9s}s)5n_~J^VXXdT>=8#q7;R zV*5j(To|_DavH|lsqzYf+d?c;yhnMFo!+S~a&Da2o(FPxI2g!qdP@B~XN}F X9eE-B)qxoX6q__>c*a63cPOmR>>eot>%5-n78Mqs zTH(k8sm92z^FEA|42o{D;Pj1~??mo}G29Xvr_=TdAc14Uw*uK5Vs_IOyR8%-j|*Uu zuw8SFfR|!;joFn%0YA4?>wdW>Hz>oDhh#TlF->>8W=Z4ds8`cosHbUM^{=71i-Iv@ zbJYmQd~YijFj0DGSLl&-&c~VG*5`f|2E+VgrH7hfAx6ix{S|+ByIxXTM4v4K0y81w zn(pX|Io@4GU-vx_Il3gNj@)7tjEORE?8(k*BfXi*cGczpz6_vnl?xVzwO+_( t7}r7 zofiw^PWKY?H=FwrjD4=%;Wm4gmomTR%S+clFyb5&tiv ~-OJyUk^Rg0(sPp_#SsW}r--ectpmFqatr~dvrS$r#P zDhC40l`waLOA)MH>*FgJp3Ayd1p5jz(BDwhnFWWw1Jov<_NGx*r(YW!0Mr9GN+pA* z@IB=NROm|==#W)44f6ZTK8N&tjM*YC4&GQZ(D)XlMb%L$YxV7QkxC7Mp2ifEH2sK! zG}~uw#eS8Hkbc?y_;6N;BrU_cAK|PtJ(3%|3aT&>ixm<9LHM0G#ttB49OI5${KWUi ziz&wvyn{0 4J&OlSp521;d?({@?gm-Q@7olCclH3P=bcI zW6DIddjT9SX@OJlO1NR4;$H+MV3$?kQ_08I_aVrnnr~9|0pDTQhnXsfDx5o|b;z0f zpIG5=c;iG8?5%bW1%5T6fRrG17wzN1i@o8jAInz&_P1ek8YDuAX&MB=TzBYbH^X$C zVL|E;PjfJm3I!aKdqpWjyAN1o0v8BLlC $NYe+keUXD46H0W= zE2hU5Rnoo+^ygzzMriX5K)eCW4#OOqyo7~j|JsK^QEa71hg2!Ns((zqZvVg)>nktY z`#KXw*Yk@SR16X%DZ)W5K>8hL5~4O;G0jS++wj>+>xPp#U`;Rs+BtD~37Va-%0j;j zR^3ZqMI!>nv=LaGTO`=drN|p<0!SuITX~fL<;v85a^CvWOB+kw6Zg~WD+RGv+_0fA z)lna iG#6o0>OVHVgG^y|H8Kbpl80_ z|BhUz6F&k803vlCxrGxp2*2*Ku9Ska*kvNT5Z}9a5u1!+0gbioSDQ(smR>EqM-t6{ ztCY5{%s-Q@=XgOZ?$Ryl;&qI^b%IZ{i^J@t&>(cX#8_MY>nPY%5ie|HuW>~o1fp#6 zr3#fhEvasID1k)FQ$cfYpV2+it8RN}W{2U+nNzB_ZEMB0w#tW#*>?>|rta1-lNuN{ zmhxNol@Q4bwUc_e&k?_xFr^{CIOU@{W&;t6z$5fJ&N}(PD<3o4gzEHLNDkdlwA5MJ zs}o&UdCzSOrL4Pmr{ae6Y#%KL1y5m3nfFsfE#q+l{k{d6BO3N%Dk+%wh0!YL-o3be zney6VI`@~GF(gRqkd~OiqXIl|%77NkIwP@EFE-r~6j@Lce64aVv PJqsD9Z5$6D+?n*h?$iM0Qg%X z=wM{>gfBm-{4PFE`nmkv__sjVz*fSpNqulfnhH30*D zBfg6NK&GGZ>StU1yNP~}pyXue_#3NMws$glHs5clkb#5AZ{+)bXRc=mTiD#d{x4(5 z82niVfw+G6_wPFb_`PR;96C|R6N%LVVg_JVvw#Rf94!74{BEaaVeDw`@MKfq|L|o2 zaWHXmfPh@j4#LjD&ID!wva)mi|9$+op8u!W{J+()0iX5RxY(IMTp(5;JAj=P$OL+F zZx9!NjUC7YWaDIeTGv0NTu)qKE_P0iXTSf4oSl`G3CIfOdRjOOh>M96%nE+$<4Fa` z#>B Sp4BQa({Csp7|4CmOuRL|H6+}i4(L81fm5UJ%qc^svx9(E}j>?s6ddW zeQoDK&l`xVosBDLxbtvJ^<7hH6HH=rx&*}P%*Au=j1Y90%;k!IdmjF3?A979^!+sa z!OKwHTh%ov5FEbe@aMz{eSxMmyNu5ttf-l=Jcr#W1% # zw1#n;aYp!qZ&?VnyPs`b)<@MI{~cDD-Tf+P0$Ybc#cDM7Y_%4 XOYttTr?2Wv-Q3>{u2%>e8)_;k#e=%8} zA}yGM^Y6dUzvw0Z3M1-@CZ@j)|0l16 0>Q^n@DR0FT-ezB z4@j#S&=_x*1cZ`^I;CD<> np.ndarray: + return 1.0 / (1.0 + np.exp(-x)) + + +def simple_agent_detector(session_df: pd.DataFrame) -> pd.Series: + # baseline heuristic: high velocity + low conversion + v = session_df.get("interaction_velocity", pd.Series(0.0, index=session_df.index)) + cr = session_df.get("conversion_rate", pd.Series(0.0, index=session_df.index)) + total = session_df.get("total_interactions", pd.Series(0, index=session_df.index)) + return (total >= 12) & (v >= 0.20) & (cr <= 0.01) + + +class CommercePlatform: + def __init__(self, product_catelogue_size: int, max_price: float, min_price: float, + constraints: BusinessLogicConstraints, agent_detector: Optional[Callable[[pd.DataFrame], pd.Series]] = None, + use_defense: bool = False): + self.product_catelogue_size = product_catelogue_size + self.max_price = max_price + self.min_price = min_price + self.constraints = constraints + self.use_defense = use_defense + self.agent_detector = agent_detector + self.simulation_history: List[Dict[str, Any]] = [] + self._rng = np.random.default_rng(constraints.seed) + self._popularity = self._rng.lognormal(mean=0.0, sigma=0.6, size=self.product_catelogue_size) + self._popularity = self._popularity / (self._popularity.mean() + 1e-12) + self._last_interaction_df: pd.DataFrame = pd.DataFrame() + + def setup_true_demand(self, prices: np.ndarray) -> Dict[str, np.ndarray]: + # ground truth purchase propensities + p = np.clip(prices, self.min_price, self.max_price) + pn = p / self.max_price + human_prob = self.constraints.base_human_demand * (pn ** self.constraints.human_price_elasticity) + agent_prob = self.constraints.base_agent_demand * (pn ** self.constraints.agent_price_elasticity) + return { + "human_purchase_prob": np.clip(human_prob * self._popularity, 0.0, 0.95), + "agent_purchase_prob": np.clip(agent_prob * self._popularity, 0.0, 0.95) + } + + def _session_markup_multiplier(self, signal_score: float) -> float: + # session-based COI markup based on demand signal expression + x = (signal_score - self.constraints.coi_threshold) / max(self.constraints.coi_sigmoid_temp, 1e-6) + return 1.0 + self.constraints.coi_strength * float(_sigmoid(np.array([x]))[0]) + + def _simulate_sessions(self, base_prices: np.ndarray) -> pd.DataFrame: + demand = self.setup_true_demand(base_prices) + human_pprob = demand["human_purchase_prob"] + agent_pprob = demand["agent_purchase_prob"] + events: List[Dict[str, Any]] = [] + T = self.constraints.sessions_per_step + n_agent_sessions = int(round(T * self.constraints.agent_share)) + n_human_sessions = T - n_agent_sessions + + # human sessions: normal browse with possible purchase + for s in range(n_human_sessions): + session_id = f"h_{len(events)}_{s}" + k = int(self._rng.integers(1, 4)) + prod_ids = self._rng.choice(self.product_catelogue_size, size=k, replace=False) + t = 0.0 + inter_times = self._rng.gamma(shape=2.0, scale=3.0, size=3 * k) + signal_score = 0.0 + purchased_any = False + + for i, pid in enumerate(prod_ids): + t += float(inter_times[i]) + price_shown = float(base_prices[pid]) + events.append({ + "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid), + "action": "view", "t": t, "price_shown": price_shown, "is_purchase": 0, + "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0, + }) + signal_score += 1.0 + + if self._rng.random() < 0.35: + t += float(inter_times[i + k]) + events.append({ + "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid), + "action": "cart", "t": t, "price_shown": price_shown, "is_purchase": 0, + "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0, + }) + signal_score += 2.0 + + if (not purchased_any) and (self._rng.random() < float(human_pprob[pid])): + t += float(inter_times[i + 2 * k]) + mult = self._session_markup_multiplier(signal_score) + price_paid = float(np.clip(base_prices[pid] * mult, self.min_price, self.max_price)) + events.append({ + "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid), + "action": "purchase", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 1, + "price_paid": price_paid, "oracle_price_paid": price_paid, "signal_score": signal_score, + }) + purchased_any = True + + # agent sessions: split recon/purchase to circumvent COI + n_agent_ids = max(1, n_agent_sessions // 2) + for a in range(n_agent_ids): + agent_id = f"a_{a}" + recon_session_id = f"{agent_id}_recon" + t = 0.0 + n_views = int(self._rng.poisson(lam=8) * self.constraints.agent_recon_multiplier) + 5 + inter_times = self._rng.gamma(shape=2.0, scale=0.6, size=max(n_views, 1)) + prod_ids = self._rng.integers(0, self.product_catelogue_size, size=n_views) + recon_signal = 0.0 + + for i, pid in enumerate(prod_ids): + t += float(inter_times[i]) + events.append({ + "session_id": recon_session_id, "actor": "agent", "agent_id": agent_id, "product_id": int(pid), + "action": "view", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 0, + "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0, + }) + recon_signal += 1.0 + + # clean purchase session with minimal interactions + if self._rng.random() < self.constraints.agent_purchase_probability: + purchase_session_id = f"{agent_id}_clean" + pid = int(self._rng.integers(0, self.product_catelogue_size)) + t2 = 0.0 + clean_signal = 0.0 + t2 += float(self._rng.gamma(shape=2.0, scale=0.7)) + events.append({ + "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid, + "action": "view", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 0, + "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0, + }) + clean_signal += 1.0 + + if self._rng.random() < float(agent_pprob[pid]): + t2 += float(self._rng.gamma(shape=2.0, scale=0.7)) + obs_mult = self._session_markup_multiplier(clean_signal) + obs_paid = float(np.clip(base_prices[pid] * obs_mult, self.min_price, self.max_price)) + oracle_mult = self._session_markup_multiplier(recon_signal) # oracle links recon->purchase + oracle_paid = float(np.clip(base_prices[pid] * oracle_mult, self.min_price, self.max_price)) + events.append({ + "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid, + "action": "purchase", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 1, + "price_paid": obs_paid, "oracle_price_paid": oracle_paid, "signal_score": clean_signal, + }) + + return pd.DataFrame(events) + + def compute_interaction_features(self, interaction_df: pd.DataFrame) -> Dict[str, float]: + if interaction_df.empty: + return {"mean_sale_price": 0.0, "look_to_book": 0.0} + purchases = interaction_df[interaction_df["action"] == "purchase"] + mean_sale_price = float(purchases["price_paid"].mean()) if not purchases.empty else 0.0 + views = float((interaction_df["action"] == "view").sum()) + buys = float((interaction_df["action"] == "purchase").sum()) + return {"mean_sale_price": mean_sale_price, "look_to_book": float(views / (buys + 1e-6))} + + def _session_feature_table(self, df: pd.DataFrame) -> pd.DataFrame: + if df.empty: + return pd.DataFrame() + g = df.groupby("session_id", sort=False) + session_duration = g["t"].max() - g["t"].min() + total_interactions = g.size() + avg_time_between = g["t"].apply(lambda x: float(np.diff(np.sort(x.to_numpy())).mean()) if len(x) > 1 else 0.0) + interaction_velocity = total_interactions / (session_duration + 1e-6) + views = g.apply(lambda x: int((x["action"] == "view").sum()), include_groups=False) + cart_adds = g.apply(lambda x: int((x["action"] == "cart").sum()), include_groups=False) + purchases = g.apply(lambda x: int((x["action"] == "purchase").sum()), include_groups=False) + conversion_rate = purchases / (views + 1e-6) + is_agent = g["actor"].apply(lambda s: bool((s == "agent").any()), include_groups=False) + + return pd.DataFrame({ + "session_duration_sec": session_duration.astype(float), + "avg_time_between_events": avg_time_between.astype(float), + "total_interactions": total_interactions.astype(int), + "interaction_velocity": interaction_velocity.astype(float), + "item_views": views.astype(int), + "cart_adds": cart_adds.astype(int), + "purchases": purchases.astype(int), + "conversion_rate": conversion_rate.astype(float), + "is_agent": is_agent.astype(bool), + }).reset_index() + + def demand_estimate(self, interaction_df: pd.DataFrame, exclude_sessions: Optional[pd.Series] = None) -> np.ndarray: + # proxy demand from weighted interaction events + if interaction_df.empty: + return np.zeros(self.product_catelogue_size, dtype=np.float32) + df = interaction_df + if exclude_sessions is not None: + bad_sessions = set(exclude_sessions.loc[exclude_sessions].index) + df = df[~df["session_id"].isin(bad_sessions)] + weights = {"view": 0.15, "cart": 0.75, "purchase": 2.5} + w = df["action"].map(weights).fillna(0.0).to_numpy(dtype=float) + prod = df["product_id"].to_numpy(dtype=int) + q_hat = np.zeros(self.product_catelogue_size, dtype=float) + np.add.at(q_hat, prod, w) + return q_hat.astype(np.float32) + + def run_pricing_simulation(self, prices: np.ndarray) -> Dict[str, Any]: + interaction_df = self._simulate_sessions(prices) + self._last_interaction_df = interaction_df + session_df = self._session_feature_table(interaction_df) + + predicted_agent_sessions = None + if (self.use_defense and self.agent_detector is not None and not session_df.empty): + predicted_agent_sessions = self.agent_detector(session_df.set_index("session_id")) + + q_hat_naive = self.demand_estimate(interaction_df, exclude_sessions=None) + q_hat_defended = self.demand_estimate(interaction_df, exclude_sessions=predicted_agent_sessions) \ + if predicted_agent_sessions is not None else q_hat_naive.copy() + + true_human = np.zeros(self.product_catelogue_size, dtype=float) + true_agent = np.zeros(self.product_catelogue_size, dtype=float) + if not interaction_df.empty: + purchases = interaction_df[interaction_df["action"] == "purchase"] + if not purchases.empty: + for _, r in purchases.iterrows(): + if r["actor"] == "human": + true_human[int(r["product_id"])] += 1.0 + else: + true_agent[int(r["product_id"])] += 1.0 + + revenue_observed = float(interaction_df["price_paid"].sum()) if not interaction_df.empty else 0.0 + revenue_oracle = float(interaction_df["oracle_price_paid"].sum()) if not interaction_df.empty else 0.0 + agent_loss = max(0.0, revenue_oracle - revenue_observed) + + eps = 1e-6 + internal_error_naive = np.abs(true_human - q_hat_naive) / (true_human + eps) + internal_error_def = np.abs(true_human - q_hat_defended) / (true_human + eps) + interaction_features = self.compute_interaction_features(interaction_df) + + summary = { + "prices": prices.copy(), + "interaction_df": interaction_df, + "session_df": session_df, + "q_hat_naive": q_hat_naive, + "q_hat_defended": q_hat_defended, + "true_human_demand": true_human.astype(np.float32), + "true_agent_purchases": true_agent.astype(np.float32), + "internal_error_naive": internal_error_naive.astype(np.float32), + "internal_error_defended": internal_error_def.astype(np.float32), + "interaction_features": interaction_features, + "revenue_observed": revenue_observed, + "revenue_oracle": revenue_oracle, + "agent_loss": agent_loss, + "predicted_agent_sessions": predicted_agent_sessions, + } + self.simulation_history.append(summary) + return summary + + def get_interaction_data(self) -> np.ndarray: + if self._last_interaction_df.empty: + return np.array([], dtype=object) + return self._last_interaction_df.to_dict(orient="records") + + +class PHANTOMEnv(gym.Env): + metadata = {"render_modes": []} + + def __init__(self, use_defense: bool = False): + super().__init__() + self.constraints = BusinessLogicConstraints() + self.action_space = spaces.Box(low=-self.constraints.max_price_adjustment, + high=self.constraints.max_price_adjustment, + shape=(self.constraints.product_catelogue_size,), dtype=np.float32) + self.observation_space = spaces.Dict({ + "elasticity": spaces.Dict({ + "price": spaces.Box( + low=np.full((self.constraints.product_catelogue_size,), self.constraints.system_min_price, dtype=np.float32), + high=np.full((self.constraints.product_catelogue_size,), self.constraints.system_max_price, dtype=np.float32), + dtype=np.float32), + "demand": spaces.Box( + low=np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32), + high=np.full((self.constraints.product_catelogue_size,), 1e6, dtype=np.float32), + dtype=np.float32), + }) + }) + self.commerce_platform = CommercePlatform( + product_catelogue_size=self.constraints.product_catelogue_size, + max_price=self.constraints.system_max_price, + min_price=self.constraints.system_min_price, + constraints=self.constraints, + agent_detector=simple_agent_detector, + use_defense=use_defense) + self._rng = np.random.default_rng(self.constraints.seed) + self.t = 0 + self._prev_prices: Optional[np.ndarray] = None + self.state: Dict[str, Any] = {} + + def reset(self, seed: Optional[int] = None, options: Optional[dict] = None): + super().reset(seed=seed) + if seed is not None: + self._rng = np.random.default_rng(seed) + self.commerce_platform._rng = np.random.default_rng(seed) + self.t = 0 + init_prices = self._rng.uniform(low=60.0, high=140.0, size=(self.constraints.product_catelogue_size,)).astype(np.float32) + self._prev_prices = init_prices.copy() + self.state = { + "elasticity": { + "price": init_prices, + "demand": np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32), + } + } + return self.state, {} + + def step(self, action: np.ndarray): + self.t += 1 + base_prices = self.state["elasticity"]["price"].astype(np.float32) + new_prices = np.clip(base_prices * (1.0 + action.astype(np.float32)), + self.constraints.system_min_price, + self.constraints.system_max_price).astype(np.float32) + result = self.commerce_platform.run_pricing_simulation(new_prices) + + if self.commerce_platform.use_defense: + demand_est = result["q_hat_defended"] + internal_err = result["internal_error_defended"] + else: + demand_est = result["q_hat_naive"] + internal_err = result["internal_error_naive"] + + self.state["elasticity"]["price"] = new_prices + self.state["elasticity"]["demand"] = demand_est + + volatility = 0.0 if self._prev_prices is None else \ + float(np.mean(np.abs((new_prices - self._prev_prices) / (self._prev_prices + 1e-6)))) + self._prev_prices = new_prices.copy() + + revenue_observed = float(result["revenue_observed"]) + agent_loss = float(result["agent_loss"]) + err_mean = float(np.mean(internal_err)) + + reward = (revenue_observed + - self.constraints.w_agent_loss * agent_loss + - self.constraints.w_volatility * volatility + - self.constraints.w_estimation_error * err_mean) + + terminated = self.t >= self.constraints.episode_length + info = { + "t": self.t, + "revenue_observed": revenue_observed, + "revenue_oracle": float(result["revenue_oracle"]), + "agent_loss": agent_loss, + "ux_volatility": volatility, + "mean_internal_error": err_mean, + "look_to_book": float(result["interaction_features"].get("look_to_book", 0.0)), + "mean_sale_price": float(result["interaction_features"].get("mean_sale_price", 0.0)), + "true_human_purchases_total": float(np.sum(result["true_human_demand"])), + "true_agent_purchases_total": float(np.sum(result["true_agent_purchases"])), + } + return self.state, float(reward), terminated, False, info + + +if __name__ == "__main__": + import matplotlib.pyplot as plt + from collections import defaultdict + + runs = {} + for use_defense in (False, True): + env = PHANTOMEnv(use_defense=use_defense) + obs, _ = env.reset(seed=42) + metrics = defaultdict(list) + total_reward = 0.0 + done = False + + while not done: + action = env.action_space.sample() + obs, reward, done, _, info = env.step(action) + total_reward += reward + p_mean = float(np.mean(obs["elasticity"]["price"])) + q_mean = float(np.mean(obs["elasticity"]["demand"])) + p_std = float(np.std(obs["elasticity"]["price"])) + + metrics['t'].append(info['t']) + metrics['price_mean'].append(p_mean) + metrics['price_std'].append(p_std) + metrics['demand_mean'].append(q_mean) + metrics['revenue_observed'].append(info['revenue_observed']) + metrics['revenue_oracle'].append(info['revenue_oracle']) + metrics['agent_loss'].append(info['agent_loss']) + metrics['ux_volatility'].append(info['ux_volatility']) + metrics['look_to_book'].append(info['look_to_book']) + metrics['reward'].append(reward) + metrics['human_purchases'].append(info['true_human_purchases_total']) + metrics['agent_purchases'].append(info['true_agent_purchases_total']) + + if info['t'] % 20 == 0 or done: + print(f"defense={'ON ' if use_defense else 'OFF'} t={info['t']:03d} p={p_mean:6.2f}±{p_std:4.2f} " + f"q={q_mean:6.2f} rev={info['revenue_observed']:7.2f} oracle={info['revenue_oracle']:7.2f} " + f"loss={info['agent_loss']:6.2f} ux={info['ux_volatility']:.3f} " + f"ltb={info['look_to_book']:5.2f} r={reward:7.2f}") + + runs[use_defense] = metrics + print(f"defense={'ON ' if use_defense else 'OFF'} total_reward={total_reward:.2f}\n") + + fig, axes = plt.subplots(3, 3, figsize=(15, 12)) + fig.suptitle('PHANTOM Environment: Defense OFF vs ON', fontsize=14, fontweight='bold') + + plot_configs = [ + ('price_mean', 'Mean Price', 'Price'), + ('demand_mean', 'Mean Demand Estimate', 'Demand'), + ('revenue_observed', 'Revenue (Observed)', 'Revenue'), + ('agent_loss', 'Agent Loss (Oracle - Observed)', 'Loss'), + ('ux_volatility', 'UX Volatility (Price Change)', 'Volatility'), + ('look_to_book', 'Look-to-Book Ratio', 'Ratio'), + ('reward', 'Step Reward', 'Reward'), + ('human_purchases', 'Human Purchases', 'Count'), + ('agent_purchases', 'Agent Purchases', 'Count'), + ] + + for idx, (key, title, ylabel) in enumerate(plot_configs): + ax = axes[idx // 3, idx % 3] + for use_defense, label, color in [(False, 'No Defense', 'red'), (True, 'With Defense', 'blue')]: + m = runs[use_defense] + ax.plot(m['t'], m[key], label=label, color=color, alpha=0.7, linewidth=1.5) + ax.set_xlabel('Step') + ax.set_ylabel(ylabel) + ax.set_title(title, fontsize=10, fontweight='bold') + ax.legend(loc='best', fontsize=8) + ax.grid(True, alpha=0.3) + + plt.tight_layout() + plt.savefig('phantom_env_comparison.png', dpi=150, bbox_inches='tight') + print("Plot saved to phantom_env_comparison.png") + plt.show() From 20132c084c0056de8a1d5f4019138cc2eba1829c Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Sun, 14 Dec 2025 17:30:01 +0100 Subject: [PATCH 02/35] initial environemnt definitions --- sim/rl/environment.py | 471 +++++------------------------------------- 1 file changed, 50 insertions(+), 421 deletions(-) diff --git a/sim/rl/environment.py b/sim/rl/environment.py index 19f9ad4..803a4fd 100644 --- a/sim/rl/environment.py +++ b/sim/rl/environment.py @@ -2,450 +2,79 @@ import gymnasium as gym from gymnasium import spaces import numpy as np from dataclasses import dataclass -import pandas as pd -from typing import Callable, Optional, Dict, Any, List -# "learner" agent learning to optimize pricing -# "agent" part of environment creating demand signals that learner processes +# here when we say "learner" we mean the agent that is learning to optimize the pricing and "agent" is part of the envrionment where the agent is creating demand that that "learner" is processing" @dataclass class BusinessLogicConstraints(): - max_price_adjustment: float = 0.30 - system_max_price: float = 500.0 - system_min_price: float = 1.0 - product_catelogue_size: int = 100 - episode_length: int = 200 - sessions_per_step: int = 250 - agent_share: float = 0.25 - agent_recon_multiplier: float = 6.0 - agent_purchase_probability: float = 0.20 - coi_strength: float = 0.25 - coi_threshold: float = 4.0 - coi_sigmoid_temp: float = 1.25 - base_human_demand: float = 0.08 - base_agent_demand: float = 0.05 - human_price_elasticity: float = -1.2 - agent_price_elasticity: float = -0.6 - w_agent_loss: float = 1.0 - w_volatility: float = 5.0 - w_estimation_error: float = 0.25 - seed: int = 7 - - -def _sigmoid(x: np.ndarray) -> np.ndarray: - return 1.0 / (1.0 + np.exp(-x)) - - -def simple_agent_detector(session_df: pd.DataFrame) -> pd.Series: - # baseline heuristic: high velocity + low conversion - v = session_df.get("interaction_velocity", pd.Series(0.0, index=session_df.index)) - cr = session_df.get("conversion_rate", pd.Series(0.0, index=session_df.index)) - total = session_df.get("total_interactions", pd.Series(0, index=session_df.index)) - return (total >= 12) & (v >= 0.20) & (cr <= 0.01) - - -class CommercePlatform: - def __init__(self, product_catelogue_size: int, max_price: float, min_price: float, - constraints: BusinessLogicConstraints, agent_detector: Optional[Callable[[pd.DataFrame], pd.Series]] = None, - use_defense: bool = False): - self.product_catelogue_size = product_catelogue_size - self.max_price = max_price - self.min_price = min_price - self.constraints = constraints - self.use_defense = use_defense - self.agent_detector = agent_detector - self.simulation_history: List[Dict[str, Any]] = [] - self._rng = np.random.default_rng(constraints.seed) - self._popularity = self._rng.lognormal(mean=0.0, sigma=0.6, size=self.product_catelogue_size) - self._popularity = self._popularity / (self._popularity.mean() + 1e-12) - self._last_interaction_df: pd.DataFrame = pd.DataFrame() - - def setup_true_demand(self, prices: np.ndarray) -> Dict[str, np.ndarray]: - # ground truth purchase propensities - p = np.clip(prices, self.min_price, self.max_price) - pn = p / self.max_price - human_prob = self.constraints.base_human_demand * (pn ** self.constraints.human_price_elasticity) - agent_prob = self.constraints.base_agent_demand * (pn ** self.constraints.agent_price_elasticity) - return { - "human_purchase_prob": np.clip(human_prob * self._popularity, 0.0, 0.95), - "agent_purchase_prob": np.clip(agent_prob * self._popularity, 0.0, 0.95) - } - - def _session_markup_multiplier(self, signal_score: float) -> float: - # session-based COI markup based on demand signal expression - x = (signal_score - self.constraints.coi_threshold) / max(self.constraints.coi_sigmoid_temp, 1e-6) - return 1.0 + self.constraints.coi_strength * float(_sigmoid(np.array([x]))[0]) - - def _simulate_sessions(self, base_prices: np.ndarray) -> pd.DataFrame: - demand = self.setup_true_demand(base_prices) - human_pprob = demand["human_purchase_prob"] - agent_pprob = demand["agent_purchase_prob"] - events: List[Dict[str, Any]] = [] - T = self.constraints.sessions_per_step - n_agent_sessions = int(round(T * self.constraints.agent_share)) - n_human_sessions = T - n_agent_sessions - - # human sessions: normal browse with possible purchase - for s in range(n_human_sessions): - session_id = f"h_{len(events)}_{s}" - k = int(self._rng.integers(1, 4)) - prod_ids = self._rng.choice(self.product_catelogue_size, size=k, replace=False) - t = 0.0 - inter_times = self._rng.gamma(shape=2.0, scale=3.0, size=3 * k) - signal_score = 0.0 - purchased_any = False - - for i, pid in enumerate(prod_ids): - t += float(inter_times[i]) - price_shown = float(base_prices[pid]) - events.append({ - "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid), - "action": "view", "t": t, "price_shown": price_shown, "is_purchase": 0, - "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0, - }) - signal_score += 1.0 - - if self._rng.random() < 0.35: - t += float(inter_times[i + k]) - events.append({ - "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid), - "action": "cart", "t": t, "price_shown": price_shown, "is_purchase": 0, - "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0, - }) - signal_score += 2.0 - - if (not purchased_any) and (self._rng.random() < float(human_pprob[pid])): - t += float(inter_times[i + 2 * k]) - mult = self._session_markup_multiplier(signal_score) - price_paid = float(np.clip(base_prices[pid] * mult, self.min_price, self.max_price)) - events.append({ - "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid), - "action": "purchase", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 1, - "price_paid": price_paid, "oracle_price_paid": price_paid, "signal_score": signal_score, - }) - purchased_any = True - - # agent sessions: split recon/purchase to circumvent COI - n_agent_ids = max(1, n_agent_sessions // 2) - for a in range(n_agent_ids): - agent_id = f"a_{a}" - recon_session_id = f"{agent_id}_recon" - t = 0.0 - n_views = int(self._rng.poisson(lam=8) * self.constraints.agent_recon_multiplier) + 5 - inter_times = self._rng.gamma(shape=2.0, scale=0.6, size=max(n_views, 1)) - prod_ids = self._rng.integers(0, self.product_catelogue_size, size=n_views) - recon_signal = 0.0 - - for i, pid in enumerate(prod_ids): - t += float(inter_times[i]) - events.append({ - "session_id": recon_session_id, "actor": "agent", "agent_id": agent_id, "product_id": int(pid), - "action": "view", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 0, - "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0, - }) - recon_signal += 1.0 - - # clean purchase session with minimal interactions - if self._rng.random() < self.constraints.agent_purchase_probability: - purchase_session_id = f"{agent_id}_clean" - pid = int(self._rng.integers(0, self.product_catelogue_size)) - t2 = 0.0 - clean_signal = 0.0 - t2 += float(self._rng.gamma(shape=2.0, scale=0.7)) - events.append({ - "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid, - "action": "view", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 0, - "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0, - }) - clean_signal += 1.0 - - if self._rng.random() < float(agent_pprob[pid]): - t2 += float(self._rng.gamma(shape=2.0, scale=0.7)) - obs_mult = self._session_markup_multiplier(clean_signal) - obs_paid = float(np.clip(base_prices[pid] * obs_mult, self.min_price, self.max_price)) - oracle_mult = self._session_markup_multiplier(recon_signal) # oracle links recon->purchase - oracle_paid = float(np.clip(base_prices[pid] * oracle_mult, self.min_price, self.max_price)) - events.append({ - "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid, - "action": "purchase", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 1, - "price_paid": obs_paid, "oracle_price_paid": oracle_paid, "signal_score": clean_signal, - }) - - return pd.DataFrame(events) - - def compute_interaction_features(self, interaction_df: pd.DataFrame) -> Dict[str, float]: - if interaction_df.empty: - return {"mean_sale_price": 0.0, "look_to_book": 0.0} - purchases = interaction_df[interaction_df["action"] == "purchase"] - mean_sale_price = float(purchases["price_paid"].mean()) if not purchases.empty else 0.0 - views = float((interaction_df["action"] == "view").sum()) - buys = float((interaction_df["action"] == "purchase").sum()) - return {"mean_sale_price": mean_sale_price, "look_to_book": float(views / (buys + 1e-6))} - - def _session_feature_table(self, df: pd.DataFrame) -> pd.DataFrame: - if df.empty: - return pd.DataFrame() - g = df.groupby("session_id", sort=False) - session_duration = g["t"].max() - g["t"].min() - total_interactions = g.size() - avg_time_between = g["t"].apply(lambda x: float(np.diff(np.sort(x.to_numpy())).mean()) if len(x) > 1 else 0.0) - interaction_velocity = total_interactions / (session_duration + 1e-6) - views = g.apply(lambda x: int((x["action"] == "view").sum()), include_groups=False) - cart_adds = g.apply(lambda x: int((x["action"] == "cart").sum()), include_groups=False) - purchases = g.apply(lambda x: int((x["action"] == "purchase").sum()), include_groups=False) - conversion_rate = purchases / (views + 1e-6) - is_agent = g["actor"].apply(lambda s: bool((s == "agent").any()), include_groups=False) - - return pd.DataFrame({ - "session_duration_sec": session_duration.astype(float), - "avg_time_between_events": avg_time_between.astype(float), - "total_interactions": total_interactions.astype(int), - "interaction_velocity": interaction_velocity.astype(float), - "item_views": views.astype(int), - "cart_adds": cart_adds.astype(int), - "purchases": purchases.astype(int), - "conversion_rate": conversion_rate.astype(float), - "is_agent": is_agent.astype(bool), - }).reset_index() - - def demand_estimate(self, interaction_df: pd.DataFrame, exclude_sessions: Optional[pd.Series] = None) -> np.ndarray: - # proxy demand from weighted interaction events - if interaction_df.empty: - return np.zeros(self.product_catelogue_size, dtype=np.float32) - df = interaction_df - if exclude_sessions is not None: - bad_sessions = set(exclude_sessions.loc[exclude_sessions].index) - df = df[~df["session_id"].isin(bad_sessions)] - weights = {"view": 0.15, "cart": 0.75, "purchase": 2.5} - w = df["action"].map(weights).fillna(0.0).to_numpy(dtype=float) - prod = df["product_id"].to_numpy(dtype=int) - q_hat = np.zeros(self.product_catelogue_size, dtype=float) - np.add.at(q_hat, prod, w) - return q_hat.astype(np.float32) - - def run_pricing_simulation(self, prices: np.ndarray) -> Dict[str, Any]: - interaction_df = self._simulate_sessions(prices) - self._last_interaction_df = interaction_df - session_df = self._session_feature_table(interaction_df) - - predicted_agent_sessions = None - if (self.use_defense and self.agent_detector is not None and not session_df.empty): - predicted_agent_sessions = self.agent_detector(session_df.set_index("session_id")) - - q_hat_naive = self.demand_estimate(interaction_df, exclude_sessions=None) - q_hat_defended = self.demand_estimate(interaction_df, exclude_sessions=predicted_agent_sessions) \ - if predicted_agent_sessions is not None else q_hat_naive.copy() - - true_human = np.zeros(self.product_catelogue_size, dtype=float) - true_agent = np.zeros(self.product_catelogue_size, dtype=float) - if not interaction_df.empty: - purchases = interaction_df[interaction_df["action"] == "purchase"] - if not purchases.empty: - for _, r in purchases.iterrows(): - if r["actor"] == "human": - true_human[int(r["product_id"])] += 1.0 - else: - true_agent[int(r["product_id"])] += 1.0 - - revenue_observed = float(interaction_df["price_paid"].sum()) if not interaction_df.empty else 0.0 - revenue_oracle = float(interaction_df["oracle_price_paid"].sum()) if not interaction_df.empty else 0.0 - agent_loss = max(0.0, revenue_oracle - revenue_observed) - - eps = 1e-6 - internal_error_naive = np.abs(true_human - q_hat_naive) / (true_human + eps) - internal_error_def = np.abs(true_human - q_hat_defended) / (true_human + eps) - interaction_features = self.compute_interaction_features(interaction_df) - - summary = { - "prices": prices.copy(), - "interaction_df": interaction_df, - "session_df": session_df, - "q_hat_naive": q_hat_naive, - "q_hat_defended": q_hat_defended, - "true_human_demand": true_human.astype(np.float32), - "true_agent_purchases": true_agent.astype(np.float32), - "internal_error_naive": internal_error_naive.astype(np.float32), - "internal_error_defended": internal_error_def.astype(np.float32), - "interaction_features": interaction_features, - "revenue_observed": revenue_observed, - "revenue_oracle": revenue_oracle, - "agent_loss": agent_loss, - "predicted_agent_sessions": predicted_agent_sessions, - } - self.simulation_history.append(summary) - return summary - - def get_interaction_data(self) -> np.ndarray: - if self._last_interaction_df.empty: - return np.array([], dtype=object) - return self._last_interaction_df.to_dict(orient="records") + max_price_adjustment : float = 0.3 # maximum adjustment of price + system_max_price : float = 500.0 # maximum price allowed in the system + product_catelogue_size : int = 100 # number of products in the catalogue class PHANTOMEnv(gym.Env): - metadata = {"render_modes": []} - - def __init__(self, use_defense: bool = False): - super().__init__() + def __init__(self): + super(PHANTOMEnv, self).__init__() self.constraints = BusinessLogicConstraints() - self.action_space = spaces.Box(low=-self.constraints.max_price_adjustment, - high=self.constraints.max_price_adjustment, - shape=(self.constraints.product_catelogue_size,), dtype=np.float32) + self.action_space = spaces.Box( + low=-self.constraints.max_price_adjustment, high=self.constraints.max_price_adjustment, + shape=(1,), dtype=np.float32) # we allow teh learner to adjust price by some BusinessLogicConstraints factor + # Example for using image as input: self.observation_space = spaces.Dict({ - "elasticity": spaces.Dict({ - "price": spaces.Box( - low=np.full((self.constraints.product_catelogue_size,), self.constraints.system_min_price, dtype=np.float32), - high=np.full((self.constraints.product_catelogue_size,), self.constraints.system_max_price, dtype=np.float32), - dtype=np.float32), - "demand": spaces.Box( - low=np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32), - high=np.full((self.constraints.product_catelogue_size,), 1e6, dtype=np.float32), - dtype=np.float32), + 'elasticity': spaces.Dict({ + 'price': spaces.Box(low=0, high=self.constraints.system_max_price, + shape=(self.constraints.product_catelogue_size,), dtype=np.float32), + 'demand': spaces.Box(low=0, high=np.inf, + shape=(self.constraints.product_catelogue_size,), dtype=np.float32) }) }) - self.commerce_platform = CommercePlatform( - product_catelogue_size=self.constraints.product_catelogue_size, - max_price=self.constraints.system_max_price, - min_price=self.constraints.system_min_price, - constraints=self.constraints, - agent_detector=simple_agent_detector, - use_defense=use_defense) - self._rng = np.random.default_rng(self.constraints.seed) - self.t = 0 - self._prev_prices: Optional[np.ndarray] = None - self.state: Dict[str, Any] = {} - def reset(self, seed: Optional[int] = None, options: Optional[dict] = None): + def reset(self, seed=None, options=None): super().reset(seed=seed) - if seed is not None: - self._rng = np.random.default_rng(seed) - self.commerce_platform._rng = np.random.default_rng(seed) - self.t = 0 - init_prices = self._rng.uniform(low=60.0, high=140.0, size=(self.constraints.product_catelogue_size,)).astype(np.float32) - self._prev_prices = init_prices.copy() + # Initialize state self.state = { - "elasticity": { - "price": init_prices, - "demand": np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32), - } + 'price': 100.0, # base price + 'demand': 0.0 } return self.state, {} - def step(self, action: np.ndarray): - self.t += 1 - base_prices = self.state["elasticity"]["price"].astype(np.float32) - new_prices = np.clip(base_prices * (1.0 + action.astype(np.float32)), - self.constraints.system_min_price, - self.constraints.system_max_price).astype(np.float32) - result = self.commerce_platform.run_pricing_simulation(new_prices) + def step(self, action): + # Apply action + price_adjustment = action[0] + new_price = self.state['price'] * (1 + price_adjustment) + self.state['price'] = new_price - if self.commerce_platform.use_defense: - demand_est = result["q_hat_defended"] - internal_err = result["internal_error_defended"] - else: - demand_est = result["q_hat_naive"] - internal_err = result["internal_error_naive"] + # Simulate demand based on new price + demand = self.simulate_demand(new_price) + self.state['demand'] = demand - self.state["elasticity"]["price"] = new_prices - self.state["elasticity"]["demand"] = demand_est + # Calculate reward (e.g., revenue) + reward = new_price * demand - volatility = 0.0 if self._prev_prices is None else \ - float(np.mean(np.abs((new_prices - self._prev_prices) / (self._prev_prices + 1e-6)))) - self._prev_prices = new_prices.copy() + # Check if episode is done + done = self.state['price'] <= 0.0 or self.state['demand'] <= 0.0 - revenue_observed = float(result["revenue_observed"]) - agent_loss = float(result["agent_loss"]) - err_mean = float(np.mean(internal_err)) - - reward = (revenue_observed - - self.constraints.w_agent_loss * agent_loss - - self.constraints.w_volatility * volatility - - self.constraints.w_estimation_error * err_mean) - - terminated = self.t >= self.constraints.episode_length - info = { - "t": self.t, - "revenue_observed": revenue_observed, - "revenue_oracle": float(result["revenue_oracle"]), - "agent_loss": agent_loss, - "ux_volatility": volatility, - "mean_internal_error": err_mean, - "look_to_book": float(result["interaction_features"].get("look_to_book", 0.0)), - "mean_sale_price": float(result["interaction_features"].get("mean_sale_price", 0.0)), - "true_human_purchases_total": float(np.sum(result["true_human_demand"])), - "true_agent_purchases_total": float(np.sum(result["true_agent_purchases"])), - } - return self.state, float(reward), terminated, False, info + return self.state, reward, done, False, {} + def simulate_demand(self, price): + # Simple linear demand model: demand decreases as price increases + base_demand = 200 + price_sensitivity = 0.5 + demand = max(0, base_demand - price_sensitivity * price) + return demand if __name__ == "__main__": - import matplotlib.pyplot as plt - from collections import defaultdict + env = PHANTOMEnv() + obs, _ = env.reset() + done = False + total_reward = 0 - runs = {} - for use_defense in (False, True): - env = PHANTOMEnv(use_defense=use_defense) - obs, _ = env.reset(seed=42) - metrics = defaultdict(list) - total_reward = 0.0 - done = False + while not done: + action = env.action_space.sample() # Random action + obs, reward, done, _, _ = env.step(action) + total_reward += reward + print(f"Price: {obs['price']:.2f}, Demand: {obs['demand']:.2f}, Reward: {reward:.2f}") + if done: + break - while not done: - action = env.action_space.sample() - obs, reward, done, _, info = env.step(action) - total_reward += reward - p_mean = float(np.mean(obs["elasticity"]["price"])) - q_mean = float(np.mean(obs["elasticity"]["demand"])) - p_std = float(np.std(obs["elasticity"]["price"])) - - metrics['t'].append(info['t']) - metrics['price_mean'].append(p_mean) - metrics['price_std'].append(p_std) - metrics['demand_mean'].append(q_mean) - metrics['revenue_observed'].append(info['revenue_observed']) - metrics['revenue_oracle'].append(info['revenue_oracle']) - metrics['agent_loss'].append(info['agent_loss']) - metrics['ux_volatility'].append(info['ux_volatility']) - metrics['look_to_book'].append(info['look_to_book']) - metrics['reward'].append(reward) - metrics['human_purchases'].append(info['true_human_purchases_total']) - metrics['agent_purchases'].append(info['true_agent_purchases_total']) - - if info['t'] % 20 == 0 or done: - print(f"defense={'ON ' if use_defense else 'OFF'} t={info['t']:03d} p={p_mean:6.2f}±{p_std:4.2f} " - f"q={q_mean:6.2f} rev={info['revenue_observed']:7.2f} oracle={info['revenue_oracle']:7.2f} " - f"loss={info['agent_loss']:6.2f} ux={info['ux_volatility']:.3f} " - f"ltb={info['look_to_book']:5.2f} r={reward:7.2f}") - - runs[use_defense] = metrics - print(f"defense={'ON ' if use_defense else 'OFF'} total_reward={total_reward:.2f}\n") - - fig, axes = plt.subplots(3, 3, figsize=(15, 12)) - fig.suptitle('PHANTOM Environment: Defense OFF vs ON', fontsize=14, fontweight='bold') - - plot_configs = [ - ('price_mean', 'Mean Price', 'Price'), - ('demand_mean', 'Mean Demand Estimate', 'Demand'), - ('revenue_observed', 'Revenue (Observed)', 'Revenue'), - ('agent_loss', 'Agent Loss (Oracle - Observed)', 'Loss'), - ('ux_volatility', 'UX Volatility (Price Change)', 'Volatility'), - ('look_to_book', 'Look-to-Book Ratio', 'Ratio'), - ('reward', 'Step Reward', 'Reward'), - ('human_purchases', 'Human Purchases', 'Count'), - ('agent_purchases', 'Agent Purchases', 'Count'), - ] - - for idx, (key, title, ylabel) in enumerate(plot_configs): - ax = axes[idx // 3, idx % 3] - for use_defense, label, color in [(False, 'No Defense', 'red'), (True, 'With Defense', 'blue')]: - m = runs[use_defense] - ax.plot(m['t'], m[key], label=label, color=color, alpha=0.7, linewidth=1.5) - ax.set_xlabel('Step') - ax.set_ylabel(ylabel) - ax.set_title(title, fontsize=10, fontweight='bold') - ax.legend(loc='best', fontsize=8) - ax.grid(True, alpha=0.3) - - plt.tight_layout() - plt.savefig('phantom_env_comparison.png', dpi=150, bbox_inches='tight') - print("Plot saved to phantom_env_comparison.png") - plt.show() + print(f"Total Reward: {total_reward:.2f}") From 7d09232e48072598e7bf7bab46749fba47f00720 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Sun, 14 Dec 2025 17:53:48 +0100 Subject: [PATCH 03/35] high level defintion --- sim/rl/environment.py | 94 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 83 insertions(+), 11 deletions(-) diff --git a/sim/rl/environment.py b/sim/rl/environment.py index 803a4fd..a09438f 100644 --- a/sim/rl/environment.py +++ b/sim/rl/environment.py @@ -2,6 +2,7 @@ import gymnasium as gym from gymnasium import spaces import numpy as np from dataclasses import dataclass +import pandas as pd # here when we say "learner" we mean the agent that is learning to optimize the pricing and "agent" is part of the envrionment where the agent is creating demand that that "learner" is processing" @@ -9,17 +10,89 @@ from dataclasses import dataclass class BusinessLogicConstraints(): max_price_adjustment : float = 0.3 # maximum adjustment of price system_max_price : float = 500.0 # maximum price allowed in the system + system_min_price : float = 1.0 # minimum price allowed in the system product_catelogue_size : int = 100 # number of products in the catalogue +class CommercePlatform: + def __init__(self, product_catelogue_size: int, max_price: float, min_price: float): + self.product_catelogue_size = product_catelogue_size + self.max_price = max_price + self.min_price = min_price + self.simulation_history = [] + + + def setup_true_demand(self,prices: np.ndarray) -> tuple[np.ndarray, np.ndarray]: + human_price_elasticity = -1.5 # Example elasticity value + base_demand = 100 # Base demand for products + demand = base_demand * (prices / self.max_price) ** human_price_elasticity + + agent_price_elasticity = -2.0 # Example elasticity value for agents + agent_base_demand = 150 # Base demand for agents + agent_demand = agent_base_demand * (prices / self.max_price) ** agent_price_elasticity + + return demand + agent_demand, agent_demand + + + def compute_interaction_features(self, interaction_data: np.ndarray) -> dict: + df = pd.DataFrame(interaction_data) + return { + 'mean_sale_price': df[df['action'] == 'purchase']['price'].mean(), + } + + def run_pricing_simulation(self, prices: np.ndarray) -> np.ndarray: + # Simulate demand based on prices + + observed_demand, demand_from_agents = self.setup_true_demand(prices) + true_demand = observed_demand - demand_from_agents + + interaction_data = self.get_interaction_data() + interaction_features = self.compute_interaction_features(interaction_data) + demand_estimates = self.demand_estimate(interaction_data) + internal_error = np.abs(true_demand - demand_estimates) / (true_demand + 1e-6) + + self.simulation_history.append( + { + 'prices': prices, + 'true_demand': true_demand, + 'demand_estimates': demand_estimates, + 'internal_error': internal_error, + 'interaction_data': interaction_data, + 'interaction_features': interaction_features + }) + return np.array(interaction_data) + + def get_interaction_data(self) -> np.ndarray: + # Simulate interaction data + interaction_data = [] + return np.array(interaction_data) + + + def demand_estimate(self, interactions : np.ndarray) -> np.ndarray: + demand_estimates = np.random.rand(self.product_catelogue_size) * 100 # Dummy demand estimates + return demand_estimates + + + + + + + + + class PHANTOMEnv(gym.Env): def __init__(self): super(PHANTOMEnv, self).__init__() self.constraints = BusinessLogicConstraints() self.action_space = spaces.Box( low=-self.constraints.max_price_adjustment, high=self.constraints.max_price_adjustment, - shape=(1,), dtype=np.float32) # we allow teh learner to adjust price by some BusinessLogicConstraints factor + shape=(self.constraints.product_catelogue_size,), dtype=np.float32) # we allow teh learner to adjust price by some BusinessLogicConstraints factor # Example for using image as input: + self.commerce_platform = CommercePlatform( + product_catelogue_size=self.constraints.product_catelogue_size, + max_price=self.constraints.system_max_price, + min_price=self.constraints.system_min_price + ) self.observation_space = spaces.Dict({ 'elasticity': spaces.Dict({ 'price': spaces.Box(low=0, high=self.constraints.system_max_price, @@ -29,24 +102,23 @@ class PHANTOMEnv(gym.Env): }) }) - def reset(self, seed=None, options=None): + def reset(self, seed :int, options) -> tuple[dict, dict]: super().reset(seed=seed) # Initialize state self.state = { - 'price': 100.0, # base price - 'demand': 0.0 + 'elasticity': { + 'price': np.full((self.constraints.product_catelogue_size,), 100.0, dtype=np.float32), + 'demand': np.full((self.constraints.product_catelogue_size,), 50.0, dtype=np.float32) + } } return self.state, {} def step(self, action): - # Apply action - price_adjustment = action[0] - new_price = self.state['price'] * (1 + price_adjustment) - self.state['price'] = new_price + self.state['price'] = np.clip(self.state['price'] * (1 + action), + self.constraints.system_min_price, + self.constraints.system_max_price) + - # Simulate demand based on new price - demand = self.simulate_demand(new_price) - self.state['demand'] = demand # Calculate reward (e.g., revenue) reward = new_price * demand From 8a084584786bfbaff6b913ccf7d8af3c0804349d Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Sun, 14 Dec 2025 17:59:34 +0100 Subject: [PATCH 04/35] formlating the reward simply --- sim/rl/environment.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/sim/rl/environment.py b/sim/rl/environment.py index a09438f..ca7159b 100644 --- a/sim/rl/environment.py +++ b/sim/rl/environment.py @@ -40,7 +40,7 @@ class CommercePlatform: 'mean_sale_price': df[df['action'] == 'purchase']['price'].mean(), } - def run_pricing_simulation(self, prices: np.ndarray) -> np.ndarray: + def run_pricing_simulation(self, prices: np.ndarray) -> dict: # Simulate demand based on prices observed_demand, demand_from_agents = self.setup_true_demand(prices) @@ -51,16 +51,17 @@ class CommercePlatform: demand_estimates = self.demand_estimate(interaction_data) internal_error = np.abs(true_demand - demand_estimates) / (true_demand + 1e-6) - self.simulation_history.append( - { + + summary = { 'prices': prices, 'true_demand': true_demand, 'demand_estimates': demand_estimates, 'internal_error': internal_error, 'interaction_data': interaction_data, 'interaction_features': interaction_features - }) - return np.array(interaction_data) + } + self.simulation_history.append(summary) + return summary def get_interaction_data(self) -> np.ndarray: # Simulate interaction data @@ -118,10 +119,24 @@ class PHANTOMEnv(gym.Env): self.constraints.system_min_price, self.constraints.system_max_price) + result = self.commerce_platform.run_pricing_simulation(self.state['price']) + history = self.commerce_platform.simulation_history + self.state['demand'] = result['demand_estimates'] + + + + reward = sum( + self.state['price'] * self.state['demand'], + # performance historically, to take into account business kpi trends (using features from interaction data) + sum( + [-0.05 * i * history[-1]['internal_error'] for i in range(1, len(history))], + ) if len(history) > 1 else 0, + sum( + [0.1 * history[-1]['interaction_features']['mean_sale_price'] - 0.1 * history[i]['interaction_features']['mean_sale_price'] for i in range(len(history)-1)], + ) if len(history) > 1 else 0 + ) - # Calculate reward (e.g., revenue) - reward = new_price * demand # Check if episode is done done = self.state['price'] <= 0.0 or self.state['demand'] <= 0.0 From 201c98bcacd2420ed45c52a583383618b26d46dc Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Sun, 14 Dec 2025 18:59:02 +0100 Subject: [PATCH 05/35] improved implementation --- sim/rl/environment.py | 512 ++++++++++++++++++++++++++++++++---------- 1 file changed, 398 insertions(+), 114 deletions(-) diff --git a/sim/rl/environment.py b/sim/rl/environment.py index ca7159b..19f9ad4 100644 --- a/sim/rl/environment.py +++ b/sim/rl/environment.py @@ -3,165 +3,449 @@ from gymnasium import spaces import numpy as np from dataclasses import dataclass import pandas as pd +from typing import Callable, Optional, Dict, Any, List -# here when we say "learner" we mean the agent that is learning to optimize the pricing and "agent" is part of the envrionment where the agent is creating demand that that "learner" is processing" +# "learner" agent learning to optimize pricing +# "agent" part of environment creating demand signals that learner processes @dataclass class BusinessLogicConstraints(): - max_price_adjustment : float = 0.3 # maximum adjustment of price - system_max_price : float = 500.0 # maximum price allowed in the system - system_min_price : float = 1.0 # minimum price allowed in the system - product_catelogue_size : int = 100 # number of products in the catalogue + max_price_adjustment: float = 0.30 + system_max_price: float = 500.0 + system_min_price: float = 1.0 + product_catelogue_size: int = 100 + episode_length: int = 200 + sessions_per_step: int = 250 + agent_share: float = 0.25 + agent_recon_multiplier: float = 6.0 + agent_purchase_probability: float = 0.20 + coi_strength: float = 0.25 + coi_threshold: float = 4.0 + coi_sigmoid_temp: float = 1.25 + base_human_demand: float = 0.08 + base_agent_demand: float = 0.05 + human_price_elasticity: float = -1.2 + agent_price_elasticity: float = -0.6 + w_agent_loss: float = 1.0 + w_volatility: float = 5.0 + w_estimation_error: float = 0.25 + seed: int = 7 + + +def _sigmoid(x: np.ndarray) -> np.ndarray: + return 1.0 / (1.0 + np.exp(-x)) + + +def simple_agent_detector(session_df: pd.DataFrame) -> pd.Series: + # baseline heuristic: high velocity + low conversion + v = session_df.get("interaction_velocity", pd.Series(0.0, index=session_df.index)) + cr = session_df.get("conversion_rate", pd.Series(0.0, index=session_df.index)) + total = session_df.get("total_interactions", pd.Series(0, index=session_df.index)) + return (total >= 12) & (v >= 0.20) & (cr <= 0.01) class CommercePlatform: - def __init__(self, product_catelogue_size: int, max_price: float, min_price: float): + def __init__(self, product_catelogue_size: int, max_price: float, min_price: float, + constraints: BusinessLogicConstraints, agent_detector: Optional[Callable[[pd.DataFrame], pd.Series]] = None, + use_defense: bool = False): self.product_catelogue_size = product_catelogue_size self.max_price = max_price self.min_price = min_price - self.simulation_history = [] + self.constraints = constraints + self.use_defense = use_defense + self.agent_detector = agent_detector + self.simulation_history: List[Dict[str, Any]] = [] + self._rng = np.random.default_rng(constraints.seed) + self._popularity = self._rng.lognormal(mean=0.0, sigma=0.6, size=self.product_catelogue_size) + self._popularity = self._popularity / (self._popularity.mean() + 1e-12) + self._last_interaction_df: pd.DataFrame = pd.DataFrame() - - def setup_true_demand(self,prices: np.ndarray) -> tuple[np.ndarray, np.ndarray]: - human_price_elasticity = -1.5 # Example elasticity value - base_demand = 100 # Base demand for products - demand = base_demand * (prices / self.max_price) ** human_price_elasticity - - agent_price_elasticity = -2.0 # Example elasticity value for agents - agent_base_demand = 150 # Base demand for agents - agent_demand = agent_base_demand * (prices / self.max_price) ** agent_price_elasticity - - return demand + agent_demand, agent_demand - - - def compute_interaction_features(self, interaction_data: np.ndarray) -> dict: - df = pd.DataFrame(interaction_data) + def setup_true_demand(self, prices: np.ndarray) -> Dict[str, np.ndarray]: + # ground truth purchase propensities + p = np.clip(prices, self.min_price, self.max_price) + pn = p / self.max_price + human_prob = self.constraints.base_human_demand * (pn ** self.constraints.human_price_elasticity) + agent_prob = self.constraints.base_agent_demand * (pn ** self.constraints.agent_price_elasticity) return { - 'mean_sale_price': df[df['action'] == 'purchase']['price'].mean(), + "human_purchase_prob": np.clip(human_prob * self._popularity, 0.0, 0.95), + "agent_purchase_prob": np.clip(agent_prob * self._popularity, 0.0, 0.95) } - def run_pricing_simulation(self, prices: np.ndarray) -> dict: - # Simulate demand based on prices + def _session_markup_multiplier(self, signal_score: float) -> float: + # session-based COI markup based on demand signal expression + x = (signal_score - self.constraints.coi_threshold) / max(self.constraints.coi_sigmoid_temp, 1e-6) + return 1.0 + self.constraints.coi_strength * float(_sigmoid(np.array([x]))[0]) - observed_demand, demand_from_agents = self.setup_true_demand(prices) - true_demand = observed_demand - demand_from_agents + def _simulate_sessions(self, base_prices: np.ndarray) -> pd.DataFrame: + demand = self.setup_true_demand(base_prices) + human_pprob = demand["human_purchase_prob"] + agent_pprob = demand["agent_purchase_prob"] + events: List[Dict[str, Any]] = [] + T = self.constraints.sessions_per_step + n_agent_sessions = int(round(T * self.constraints.agent_share)) + n_human_sessions = T - n_agent_sessions - interaction_data = self.get_interaction_data() - interaction_features = self.compute_interaction_features(interaction_data) - demand_estimates = self.demand_estimate(interaction_data) - internal_error = np.abs(true_demand - demand_estimates) / (true_demand + 1e-6) + # human sessions: normal browse with possible purchase + for s in range(n_human_sessions): + session_id = f"h_{len(events)}_{s}" + k = int(self._rng.integers(1, 4)) + prod_ids = self._rng.choice(self.product_catelogue_size, size=k, replace=False) + t = 0.0 + inter_times = self._rng.gamma(shape=2.0, scale=3.0, size=3 * k) + signal_score = 0.0 + purchased_any = False + for i, pid in enumerate(prod_ids): + t += float(inter_times[i]) + price_shown = float(base_prices[pid]) + events.append({ + "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid), + "action": "view", "t": t, "price_shown": price_shown, "is_purchase": 0, + "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0, + }) + signal_score += 1.0 + + if self._rng.random() < 0.35: + t += float(inter_times[i + k]) + events.append({ + "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid), + "action": "cart", "t": t, "price_shown": price_shown, "is_purchase": 0, + "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0, + }) + signal_score += 2.0 + + if (not purchased_any) and (self._rng.random() < float(human_pprob[pid])): + t += float(inter_times[i + 2 * k]) + mult = self._session_markup_multiplier(signal_score) + price_paid = float(np.clip(base_prices[pid] * mult, self.min_price, self.max_price)) + events.append({ + "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid), + "action": "purchase", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 1, + "price_paid": price_paid, "oracle_price_paid": price_paid, "signal_score": signal_score, + }) + purchased_any = True + + # agent sessions: split recon/purchase to circumvent COI + n_agent_ids = max(1, n_agent_sessions // 2) + for a in range(n_agent_ids): + agent_id = f"a_{a}" + recon_session_id = f"{agent_id}_recon" + t = 0.0 + n_views = int(self._rng.poisson(lam=8) * self.constraints.agent_recon_multiplier) + 5 + inter_times = self._rng.gamma(shape=2.0, scale=0.6, size=max(n_views, 1)) + prod_ids = self._rng.integers(0, self.product_catelogue_size, size=n_views) + recon_signal = 0.0 + + for i, pid in enumerate(prod_ids): + t += float(inter_times[i]) + events.append({ + "session_id": recon_session_id, "actor": "agent", "agent_id": agent_id, "product_id": int(pid), + "action": "view", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 0, + "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0, + }) + recon_signal += 1.0 + + # clean purchase session with minimal interactions + if self._rng.random() < self.constraints.agent_purchase_probability: + purchase_session_id = f"{agent_id}_clean" + pid = int(self._rng.integers(0, self.product_catelogue_size)) + t2 = 0.0 + clean_signal = 0.0 + t2 += float(self._rng.gamma(shape=2.0, scale=0.7)) + events.append({ + "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid, + "action": "view", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 0, + "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0, + }) + clean_signal += 1.0 + + if self._rng.random() < float(agent_pprob[pid]): + t2 += float(self._rng.gamma(shape=2.0, scale=0.7)) + obs_mult = self._session_markup_multiplier(clean_signal) + obs_paid = float(np.clip(base_prices[pid] * obs_mult, self.min_price, self.max_price)) + oracle_mult = self._session_markup_multiplier(recon_signal) # oracle links recon->purchase + oracle_paid = float(np.clip(base_prices[pid] * oracle_mult, self.min_price, self.max_price)) + events.append({ + "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid, + "action": "purchase", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 1, + "price_paid": obs_paid, "oracle_price_paid": oracle_paid, "signal_score": clean_signal, + }) + + return pd.DataFrame(events) + + def compute_interaction_features(self, interaction_df: pd.DataFrame) -> Dict[str, float]: + if interaction_df.empty: + return {"mean_sale_price": 0.0, "look_to_book": 0.0} + purchases = interaction_df[interaction_df["action"] == "purchase"] + mean_sale_price = float(purchases["price_paid"].mean()) if not purchases.empty else 0.0 + views = float((interaction_df["action"] == "view").sum()) + buys = float((interaction_df["action"] == "purchase").sum()) + return {"mean_sale_price": mean_sale_price, "look_to_book": float(views / (buys + 1e-6))} + + def _session_feature_table(self, df: pd.DataFrame) -> pd.DataFrame: + if df.empty: + return pd.DataFrame() + g = df.groupby("session_id", sort=False) + session_duration = g["t"].max() - g["t"].min() + total_interactions = g.size() + avg_time_between = g["t"].apply(lambda x: float(np.diff(np.sort(x.to_numpy())).mean()) if len(x) > 1 else 0.0) + interaction_velocity = total_interactions / (session_duration + 1e-6) + views = g.apply(lambda x: int((x["action"] == "view").sum()), include_groups=False) + cart_adds = g.apply(lambda x: int((x["action"] == "cart").sum()), include_groups=False) + purchases = g.apply(lambda x: int((x["action"] == "purchase").sum()), include_groups=False) + conversion_rate = purchases / (views + 1e-6) + is_agent = g["actor"].apply(lambda s: bool((s == "agent").any()), include_groups=False) + + return pd.DataFrame({ + "session_duration_sec": session_duration.astype(float), + "avg_time_between_events": avg_time_between.astype(float), + "total_interactions": total_interactions.astype(int), + "interaction_velocity": interaction_velocity.astype(float), + "item_views": views.astype(int), + "cart_adds": cart_adds.astype(int), + "purchases": purchases.astype(int), + "conversion_rate": conversion_rate.astype(float), + "is_agent": is_agent.astype(bool), + }).reset_index() + + def demand_estimate(self, interaction_df: pd.DataFrame, exclude_sessions: Optional[pd.Series] = None) -> np.ndarray: + # proxy demand from weighted interaction events + if interaction_df.empty: + return np.zeros(self.product_catelogue_size, dtype=np.float32) + df = interaction_df + if exclude_sessions is not None: + bad_sessions = set(exclude_sessions.loc[exclude_sessions].index) + df = df[~df["session_id"].isin(bad_sessions)] + weights = {"view": 0.15, "cart": 0.75, "purchase": 2.5} + w = df["action"].map(weights).fillna(0.0).to_numpy(dtype=float) + prod = df["product_id"].to_numpy(dtype=int) + q_hat = np.zeros(self.product_catelogue_size, dtype=float) + np.add.at(q_hat, prod, w) + return q_hat.astype(np.float32) + + def run_pricing_simulation(self, prices: np.ndarray) -> Dict[str, Any]: + interaction_df = self._simulate_sessions(prices) + self._last_interaction_df = interaction_df + session_df = self._session_feature_table(interaction_df) + + predicted_agent_sessions = None + if (self.use_defense and self.agent_detector is not None and not session_df.empty): + predicted_agent_sessions = self.agent_detector(session_df.set_index("session_id")) + + q_hat_naive = self.demand_estimate(interaction_df, exclude_sessions=None) + q_hat_defended = self.demand_estimate(interaction_df, exclude_sessions=predicted_agent_sessions) \ + if predicted_agent_sessions is not None else q_hat_naive.copy() + + true_human = np.zeros(self.product_catelogue_size, dtype=float) + true_agent = np.zeros(self.product_catelogue_size, dtype=float) + if not interaction_df.empty: + purchases = interaction_df[interaction_df["action"] == "purchase"] + if not purchases.empty: + for _, r in purchases.iterrows(): + if r["actor"] == "human": + true_human[int(r["product_id"])] += 1.0 + else: + true_agent[int(r["product_id"])] += 1.0 + + revenue_observed = float(interaction_df["price_paid"].sum()) if not interaction_df.empty else 0.0 + revenue_oracle = float(interaction_df["oracle_price_paid"].sum()) if not interaction_df.empty else 0.0 + agent_loss = max(0.0, revenue_oracle - revenue_observed) + + eps = 1e-6 + internal_error_naive = np.abs(true_human - q_hat_naive) / (true_human + eps) + internal_error_def = np.abs(true_human - q_hat_defended) / (true_human + eps) + interaction_features = self.compute_interaction_features(interaction_df) summary = { - 'prices': prices, - 'true_demand': true_demand, - 'demand_estimates': demand_estimates, - 'internal_error': internal_error, - 'interaction_data': interaction_data, - 'interaction_features': interaction_features - } + "prices": prices.copy(), + "interaction_df": interaction_df, + "session_df": session_df, + "q_hat_naive": q_hat_naive, + "q_hat_defended": q_hat_defended, + "true_human_demand": true_human.astype(np.float32), + "true_agent_purchases": true_agent.astype(np.float32), + "internal_error_naive": internal_error_naive.astype(np.float32), + "internal_error_defended": internal_error_def.astype(np.float32), + "interaction_features": interaction_features, + "revenue_observed": revenue_observed, + "revenue_oracle": revenue_oracle, + "agent_loss": agent_loss, + "predicted_agent_sessions": predicted_agent_sessions, + } self.simulation_history.append(summary) return summary def get_interaction_data(self) -> np.ndarray: - # Simulate interaction data - interaction_data = [] - return np.array(interaction_data) - - - def demand_estimate(self, interactions : np.ndarray) -> np.ndarray: - demand_estimates = np.random.rand(self.product_catelogue_size) * 100 # Dummy demand estimates - return demand_estimates - - - - - - - + if self._last_interaction_df.empty: + return np.array([], dtype=object) + return self._last_interaction_df.to_dict(orient="records") class PHANTOMEnv(gym.Env): - def __init__(self): - super(PHANTOMEnv, self).__init__() + metadata = {"render_modes": []} + + def __init__(self, use_defense: bool = False): + super().__init__() self.constraints = BusinessLogicConstraints() - self.action_space = spaces.Box( - low=-self.constraints.max_price_adjustment, high=self.constraints.max_price_adjustment, - shape=(self.constraints.product_catelogue_size,), dtype=np.float32) # we allow teh learner to adjust price by some BusinessLogicConstraints factor - # Example for using image as input: + self.action_space = spaces.Box(low=-self.constraints.max_price_adjustment, + high=self.constraints.max_price_adjustment, + shape=(self.constraints.product_catelogue_size,), dtype=np.float32) + self.observation_space = spaces.Dict({ + "elasticity": spaces.Dict({ + "price": spaces.Box( + low=np.full((self.constraints.product_catelogue_size,), self.constraints.system_min_price, dtype=np.float32), + high=np.full((self.constraints.product_catelogue_size,), self.constraints.system_max_price, dtype=np.float32), + dtype=np.float32), + "demand": spaces.Box( + low=np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32), + high=np.full((self.constraints.product_catelogue_size,), 1e6, dtype=np.float32), + dtype=np.float32), + }) + }) self.commerce_platform = CommercePlatform( product_catelogue_size=self.constraints.product_catelogue_size, max_price=self.constraints.system_max_price, - min_price=self.constraints.system_min_price - ) - self.observation_space = spaces.Dict({ - 'elasticity': spaces.Dict({ - 'price': spaces.Box(low=0, high=self.constraints.system_max_price, - shape=(self.constraints.product_catelogue_size,), dtype=np.float32), - 'demand': spaces.Box(low=0, high=np.inf, - shape=(self.constraints.product_catelogue_size,), dtype=np.float32) - }) - }) + min_price=self.constraints.system_min_price, + constraints=self.constraints, + agent_detector=simple_agent_detector, + use_defense=use_defense) + self._rng = np.random.default_rng(self.constraints.seed) + self.t = 0 + self._prev_prices: Optional[np.ndarray] = None + self.state: Dict[str, Any] = {} - def reset(self, seed :int, options) -> tuple[dict, dict]: + def reset(self, seed: Optional[int] = None, options: Optional[dict] = None): super().reset(seed=seed) - # Initialize state + if seed is not None: + self._rng = np.random.default_rng(seed) + self.commerce_platform._rng = np.random.default_rng(seed) + self.t = 0 + init_prices = self._rng.uniform(low=60.0, high=140.0, size=(self.constraints.product_catelogue_size,)).astype(np.float32) + self._prev_prices = init_prices.copy() self.state = { - 'elasticity': { - 'price': np.full((self.constraints.product_catelogue_size,), 100.0, dtype=np.float32), - 'demand': np.full((self.constraints.product_catelogue_size,), 50.0, dtype=np.float32) + "elasticity": { + "price": init_prices, + "demand": np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32), } } return self.state, {} - def step(self, action): - self.state['price'] = np.clip(self.state['price'] * (1 + action), - self.constraints.system_min_price, - self.constraints.system_max_price) + def step(self, action: np.ndarray): + self.t += 1 + base_prices = self.state["elasticity"]["price"].astype(np.float32) + new_prices = np.clip(base_prices * (1.0 + action.astype(np.float32)), + self.constraints.system_min_price, + self.constraints.system_max_price).astype(np.float32) + result = self.commerce_platform.run_pricing_simulation(new_prices) - result = self.commerce_platform.run_pricing_simulation(self.state['price']) - history = self.commerce_platform.simulation_history - self.state['demand'] = result['demand_estimates'] + if self.commerce_platform.use_defense: + demand_est = result["q_hat_defended"] + internal_err = result["internal_error_defended"] + else: + demand_est = result["q_hat_naive"] + internal_err = result["internal_error_naive"] + self.state["elasticity"]["price"] = new_prices + self.state["elasticity"]["demand"] = demand_est + volatility = 0.0 if self._prev_prices is None else \ + float(np.mean(np.abs((new_prices - self._prev_prices) / (self._prev_prices + 1e-6)))) + self._prev_prices = new_prices.copy() - reward = sum( - self.state['price'] * self.state['demand'], - # performance historically, to take into account business kpi trends (using features from interaction data) - sum( - [-0.05 * i * history[-1]['internal_error'] for i in range(1, len(history))], - ) if len(history) > 1 else 0, - sum( - [0.1 * history[-1]['interaction_features']['mean_sale_price'] - 0.1 * history[i]['interaction_features']['mean_sale_price'] for i in range(len(history)-1)], - ) if len(history) > 1 else 0 - ) + revenue_observed = float(result["revenue_observed"]) + agent_loss = float(result["agent_loss"]) + err_mean = float(np.mean(internal_err)) + reward = (revenue_observed + - self.constraints.w_agent_loss * agent_loss + - self.constraints.w_volatility * volatility + - self.constraints.w_estimation_error * err_mean) + terminated = self.t >= self.constraints.episode_length + info = { + "t": self.t, + "revenue_observed": revenue_observed, + "revenue_oracle": float(result["revenue_oracle"]), + "agent_loss": agent_loss, + "ux_volatility": volatility, + "mean_internal_error": err_mean, + "look_to_book": float(result["interaction_features"].get("look_to_book", 0.0)), + "mean_sale_price": float(result["interaction_features"].get("mean_sale_price", 0.0)), + "true_human_purchases_total": float(np.sum(result["true_human_demand"])), + "true_agent_purchases_total": float(np.sum(result["true_agent_purchases"])), + } + return self.state, float(reward), terminated, False, info - # Check if episode is done - done = self.state['price'] <= 0.0 or self.state['demand'] <= 0.0 - - - return self.state, reward, done, False, {} - def simulate_demand(self, price): - # Simple linear demand model: demand decreases as price increases - base_demand = 200 - price_sensitivity = 0.5 - demand = max(0, base_demand - price_sensitivity * price) - return demand if __name__ == "__main__": - env = PHANTOMEnv() - obs, _ = env.reset() - done = False - total_reward = 0 + import matplotlib.pyplot as plt + from collections import defaultdict - while not done: - action = env.action_space.sample() # Random action - obs, reward, done, _, _ = env.step(action) - total_reward += reward - print(f"Price: {obs['price']:.2f}, Demand: {obs['demand']:.2f}, Reward: {reward:.2f}") - if done: - break + runs = {} + for use_defense in (False, True): + env = PHANTOMEnv(use_defense=use_defense) + obs, _ = env.reset(seed=42) + metrics = defaultdict(list) + total_reward = 0.0 + done = False - print(f"Total Reward: {total_reward:.2f}") + while not done: + action = env.action_space.sample() + obs, reward, done, _, info = env.step(action) + total_reward += reward + p_mean = float(np.mean(obs["elasticity"]["price"])) + q_mean = float(np.mean(obs["elasticity"]["demand"])) + p_std = float(np.std(obs["elasticity"]["price"])) + + metrics['t'].append(info['t']) + metrics['price_mean'].append(p_mean) + metrics['price_std'].append(p_std) + metrics['demand_mean'].append(q_mean) + metrics['revenue_observed'].append(info['revenue_observed']) + metrics['revenue_oracle'].append(info['revenue_oracle']) + metrics['agent_loss'].append(info['agent_loss']) + metrics['ux_volatility'].append(info['ux_volatility']) + metrics['look_to_book'].append(info['look_to_book']) + metrics['reward'].append(reward) + metrics['human_purchases'].append(info['true_human_purchases_total']) + metrics['agent_purchases'].append(info['true_agent_purchases_total']) + + if info['t'] % 20 == 0 or done: + print(f"defense={'ON ' if use_defense else 'OFF'} t={info['t']:03d} p={p_mean:6.2f}±{p_std:4.2f} " + f"q={q_mean:6.2f} rev={info['revenue_observed']:7.2f} oracle={info['revenue_oracle']:7.2f} " + f"loss={info['agent_loss']:6.2f} ux={info['ux_volatility']:.3f} " + f"ltb={info['look_to_book']:5.2f} r={reward:7.2f}") + + runs[use_defense] = metrics + print(f"defense={'ON ' if use_defense else 'OFF'} total_reward={total_reward:.2f}\n") + + fig, axes = plt.subplots(3, 3, figsize=(15, 12)) + fig.suptitle('PHANTOM Environment: Defense OFF vs ON', fontsize=14, fontweight='bold') + + plot_configs = [ + ('price_mean', 'Mean Price', 'Price'), + ('demand_mean', 'Mean Demand Estimate', 'Demand'), + ('revenue_observed', 'Revenue (Observed)', 'Revenue'), + ('agent_loss', 'Agent Loss (Oracle - Observed)', 'Loss'), + ('ux_volatility', 'UX Volatility (Price Change)', 'Volatility'), + ('look_to_book', 'Look-to-Book Ratio', 'Ratio'), + ('reward', 'Step Reward', 'Reward'), + ('human_purchases', 'Human Purchases', 'Count'), + ('agent_purchases', 'Agent Purchases', 'Count'), + ] + + for idx, (key, title, ylabel) in enumerate(plot_configs): + ax = axes[idx // 3, idx % 3] + for use_defense, label, color in [(False, 'No Defense', 'red'), (True, 'With Defense', 'blue')]: + m = runs[use_defense] + ax.plot(m['t'], m[key], label=label, color=color, alpha=0.7, linewidth=1.5) + ax.set_xlabel('Step') + ax.set_ylabel(ylabel) + ax.set_title(title, fontsize=10, fontweight='bold') + ax.legend(loc='best', fontsize=8) + ax.grid(True, alpha=0.3) + + plt.tight_layout() + plt.savefig('phantom_env_comparison.png', dpi=150, bbox_inches='tight') + print("Plot saved to phantom_env_comparison.png") + plt.show() From 3fa98f375df31eb23fe5a43116cf4c1535ce706d Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Wed, 17 Dec 2025 17:41:16 +0100 Subject: [PATCH 06/35] refactor to align moer with research in the env sims --- sim/rl/engine.py | 220 ++++++++++++++++++++++++++++++++++++ sim/rl/environment.py | 255 ++++++++++-------------------------------- sim/rl/train.py | 149 ++++++++++++++++++++++++ 3 files changed, 431 insertions(+), 193 deletions(-) create mode 100644 sim/rl/engine.py create mode 100644 sim/rl/train.py diff --git a/sim/rl/engine.py b/sim/rl/engine.py new file mode 100644 index 0000000..6d913f3 --- /dev/null +++ b/sim/rl/engine.py @@ -0,0 +1,220 @@ +import numpy as np +import pandas as pd +from abc import ABC, abstractmethod +from typing import Dict, Any +from environment import BusinessLogicConstraints + + +class BasePricingEngine(ABC): + """base interface for all pricing engines""" + def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0): + self.c = constraints + self.rng = np.random.default_rng(seed) + self.step_count = 0 + + @abstractmethod + def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray: + """compute new prices given current state and observation from environment + + args: + current_prices: current price vector [N] + observation: dict containing 'price', 'demand', and possibly interaction data + + returns: + new_prices: updated price vector [N] + """ + pass + + @abstractmethod + def update(obs, reward, done, info): + pass + + + + def reset(self): + """reset engine state for new episode""" + self.step_count = 0 + + +class WildPricingEngine(BasePricingEngine): + """production-like pricing using online elasticity estimation via EWMA regression""" + def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0): + super().__init__(constraints, seed) + # per-product unit costs (unknown to customers; known to platform) + self.unit_cost = self.rng.uniform(8.0, 40.0, size=self.c.product_catelogue_size).astype(np.float32) + # online elasticity estimate (start moderately elastic) + self.e_hat = np.full((self.c.product_catelogue_size,), -1.3, dtype=np.float32) + # EWMA state for log-log regression + self.mu_logp = np.zeros(self.c.product_catelogue_size, dtype=np.float32) + self.mu_logq = np.zeros(self.c.product_catelogue_size, dtype=np.float32) + self.cov_pq = np.zeros(self.c.product_catelogue_size, dtype=np.float32) + self.var_p = np.ones(self.c.product_catelogue_size, dtype=np.float32) + # knobs typical in production + self.lr = 0.08 + self.ewma = 0.05 + self.eps_explore = 0.03 + self.explore_scale = 0.03 + + def _safe_elasticity(self, e: np.ndarray) -> np.ndarray: + return np.clip(e, -5.0, -1.05) + + def reset(self): + super().reset() + self.e_hat = np.full((self.c.product_catelogue_size,), -1.3, dtype=np.float32) + self.mu_logp = np.zeros(self.c.product_catelogue_size, dtype=np.float32) + self.mu_logq = np.zeros(self.c.product_catelogue_size, dtype=np.float32) + self.cov_pq = np.zeros(self.c.product_catelogue_size, dtype=np.float32) + self.var_p = np.ones(self.c.product_catelogue_size, dtype=np.float32) + + def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray: + self.step_count += 1 + # extract demand signal (from env observation) as proxy for sales + demand = observation.get('demand', np.zeros(self.c.product_catelogue_size, dtype=np.float32)) + return self._update_from_demand(current_prices, demand) + + def _update_from_demand(self, prices: np.ndarray, sold: np.ndarray) -> np.ndarray: + # log transforms (add 1 to handle zeros) + logp = np.log(np.clip(prices, 1e-3, None)).astype(np.float32) + logq = np.log(sold + 1.0).astype(np.float32) + # EWMA moments for per-product regression: logq ≈ a + e*logp + a = self.ewma + dp = logp - self.mu_logp + dq = logq - self.mu_logq + self.mu_logp = (1 - a) * self.mu_logp + a * logp + self.mu_logq = (1 - a) * self.mu_logq + a * logq + self.cov_pq = (1 - a) * self.cov_pq + a * (dp * dq) + self.var_p = (1 - a) * self.var_p + a * (dp * dp + 1e-6) + e_new = self.cov_pq / (self.var_p + 1e-6) + self.e_hat = self._safe_elasticity(0.9 * self.e_hat + 0.1 * e_new) + # profit-optimal price for isoelastic demand (if e < -1) + e = self.e_hat + p_star = self.unit_cost * (e / (e + 1.0)) + # smooth toward p_star + new_prices = (1 - self.lr) * prices + self.lr * p_star + # exploration (small random perturbations) + if self.rng.random() < self.eps_explore: + noise = self.rng.normal(0.0, self.explore_scale, size=new_prices.shape).astype(np.float32) + new_prices = new_prices * (1.0 + noise) + # apply business guardrails (max change + bounds) + max_adj = self.c.max_price_adjustment + ratio = np.clip(new_prices / (prices + 1e-6), 1 - max_adj, 1 + max_adj) + new_prices = prices * ratio + new_prices = np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32) + return new_prices + + +class StaticPricingEngine(BasePricingEngine): + """baseline: fixed prices throughout episode""" + def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0): + super().__init__(constraints, seed) + self.fixed_prices = None + + def reset(self): + super().reset() + self.fixed_prices = None + + def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray: + self.step_count += 1 + if self.fixed_prices is None: + self.fixed_prices = current_prices.copy() + return self.fixed_prices.copy() + + +class SimpleDemandEngine(BasePricingEngine): + """demand-driven pricing: increase price when demand rises, decrease when it falls""" + def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0): + super().__init__(constraints, seed) + self.prev_demand = None + self.lr = 0.05 + + def reset(self): + super().reset() + self.prev_demand = None + + def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray: + self.step_count += 1 + demand = observation.get('demand', np.zeros(self.c.product_catelogue_size, dtype=np.float32)) + if self.prev_demand is None: + self.prev_demand = demand.copy() + return current_prices.copy() + # simple rule: if demand increases, raise price; if decreases, lower price + delta_d = demand - self.prev_demand + price_adj = self.lr * np.sign(delta_d) * np.abs(delta_d) / (np.abs(self.prev_demand) + 1.0) + new_prices = current_prices * (1.0 + price_adj) + self.prev_demand = demand.copy() + # apply constraints + max_adj = self.c.max_price_adjustment + ratio = np.clip(new_prices / (current_prices + 1e-6), 1 - max_adj, 1 + max_adj) + new_prices = current_prices * ratio + return np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32) + + +class RandomWalkEngine(BasePricingEngine): + """random walk pricing with mean reversion""" + def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0): + super().__init__(constraints, seed) + self.target_price = None + self.volatility = 0.02 + + def reset(self): + super().reset() + self.target_price = None + + def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray: + self.step_count += 1 + if self.target_price is None: + self.target_price = current_prices.copy() + # random walk with mean reversion toward target + noise = self.rng.normal(0.0, self.volatility, size=current_prices.shape).astype(np.float32) + reversion = 0.01 * (self.target_price - current_prices) + new_prices = current_prices * (1.0 + noise) + reversion + # apply constraints + max_adj = self.c.max_price_adjustment + ratio = np.clip(new_prices / (current_prices + 1e-6), 1 - max_adj, 1 + max_adj) + new_prices = current_prices * ratio + return np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32) + + +class ThompsonSamplingEngine(BasePricingEngine): + """bayesian bandit approach per product treating price as discrete action""" + def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0): + super().__init__(constraints, seed) + self.n_price_levels = 5 + self.alpha = np.ones((self.c.product_catelogue_size, self.n_price_levels), dtype=np.float32) + self.beta = np.ones((self.c.product_catelogue_size, self.n_price_levels), dtype=np.float32) + self.price_grid = None + self.last_actions = None + + def reset(self): + super().reset() + self.alpha = np.ones((self.c.product_catelogue_size, self.n_price_levels), dtype=np.float32) + self.beta = np.ones((self.c.product_catelogue_size, self.n_price_levels), dtype=np.float32) + self.price_grid = None + self.last_actions = None + + def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray: + self.step_count += 1 + if self.price_grid is None: + # define price grid per product + lo = current_prices * 0.7 + hi = current_prices * 1.3 + self.price_grid = np.linspace(lo, hi, self.n_price_levels).T + demand = observation.get('demand', np.zeros(self.c.product_catelogue_size, dtype=np.float32)) + # update beliefs based on last action + if self.last_actions is not None: + for i in range(self.c.product_catelogue_size): + a = self.last_actions[i] + reward = demand[i] + if reward > 0.5: + self.alpha[i, a] += reward + else: + self.beta[i, a] += 1.0 + # thompson sampling: sample from posterior, pick best + new_prices = np.zeros(self.c.product_catelogue_size, dtype=np.float32) + actions = np.zeros(self.c.product_catelogue_size, dtype=int) + for i in range(self.c.product_catelogue_size): + theta = self.rng.beta(self.alpha[i], self.beta[i]).astype(np.float32) + actions[i] = int(np.argmax(theta)) + new_prices[i] = self.price_grid[i, actions[i]] + self.last_actions = actions + return np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32) diff --git a/sim/rl/environment.py b/sim/rl/environment.py index 19f9ad4..fd725f8 100644 --- a/sim/rl/environment.py +++ b/sim/rl/environment.py @@ -1,5 +1,7 @@ +from sys import intern import gymnasium as gym from gymnasium import spaces +from matplotlib import interactive import numpy as np from dataclasses import dataclass import pandas as pd @@ -24,7 +26,7 @@ class BusinessLogicConstraints(): coi_sigmoid_temp: float = 1.25 base_human_demand: float = 0.08 base_agent_demand: float = 0.05 - human_price_elasticity: float = -1.2 + human_price_elasticity: float = -1.2 # assumptions here agent_price_elasticity: float = -0.6 w_agent_loss: float = 1.0 w_volatility: float = 5.0 @@ -35,31 +37,25 @@ class BusinessLogicConstraints(): def _sigmoid(x: np.ndarray) -> np.ndarray: return 1.0 / (1.0 + np.exp(-x)) - -def simple_agent_detector(session_df: pd.DataFrame) -> pd.Series: - # baseline heuristic: high velocity + low conversion - v = session_df.get("interaction_velocity", pd.Series(0.0, index=session_df.index)) - cr = session_df.get("conversion_rate", pd.Series(0.0, index=session_df.index)) - total = session_df.get("total_interactions", pd.Series(0, index=session_df.index)) - return (total >= 12) & (v >= 0.20) & (cr <= 0.01) - - class CommercePlatform: - def __init__(self, product_catelogue_size: int, max_price: float, min_price: float, - constraints: BusinessLogicConstraints, agent_detector: Optional[Callable[[pd.DataFrame], pd.Series]] = None, - use_defense: bool = False): + """ + This is just an extension of the state management for the environment, it does not implement anything dynamic just helps us simulate demand. + """ + def __init__(self, + product_catelogue_size: int, + max_price: float, + min_price: float, + constraints: BusinessLogicConstraints): self.product_catelogue_size = product_catelogue_size + self.product_supply = np.random.uniform(low=10, high=50, size=(self.product_catelogue_size,)) self.max_price = max_price self.min_price = min_price self.constraints = constraints - self.use_defense = use_defense - self.agent_detector = agent_detector self.simulation_history: List[Dict[str, Any]] = [] self._rng = np.random.default_rng(constraints.seed) - self._popularity = self._rng.lognormal(mean=0.0, sigma=0.6, size=self.product_catelogue_size) - self._popularity = self._popularity / (self._popularity.mean() + 1e-12) self._last_interaction_df: pd.DataFrame = pd.DataFrame() + def setup_true_demand(self, prices: np.ndarray) -> Dict[str, np.ndarray]: # ground truth purchase propensities p = np.clip(prices, self.min_price, self.max_price) @@ -67,14 +63,19 @@ class CommercePlatform: human_prob = self.constraints.base_human_demand * (pn ** self.constraints.human_price_elasticity) agent_prob = self.constraints.base_agent_demand * (pn ** self.constraints.agent_price_elasticity) return { - "human_purchase_prob": np.clip(human_prob * self._popularity, 0.0, 0.95), - "agent_purchase_prob": np.clip(agent_prob * self._popularity, 0.0, 0.95) + "human_purchase_prob": np.clip(human_prob, 0.0, 0.95), + "agent_purchase_prob": np.clip(agent_prob, 0.0, 0.95) } - def _session_markup_multiplier(self, signal_score: float) -> float: - # session-based COI markup based on demand signal expression - x = (signal_score - self.constraints.coi_threshold) / max(self.constraints.coi_sigmoid_temp, 1e-6) - return 1.0 + self.constraints.coi_strength * float(_sigmoid(np.array([x]))[0]) + def _load_behavioral_profile(actor : str, demand_forcing): + """ + This returns a markov chain with average weights which we get from interaction data of our experiments. + This defines transition probabilities between different events: + search -> view_item_price_binN: 0.7 + view_item_price_binN -> add_to_cart: 0.2 + we also must reweight with the demand_forcing vector or purchase probabilities per-product + """ + def _simulate_sessions(self, base_prices: np.ndarray) -> pd.DataFrame: demand = self.setup_true_demand(base_prices) @@ -84,94 +85,32 @@ class CommercePlatform: T = self.constraints.sessions_per_step n_agent_sessions = int(round(T * self.constraints.agent_share)) n_human_sessions = T - n_agent_sessions - - # human sessions: normal browse with possible purchase - for s in range(n_human_sessions): - session_id = f"h_{len(events)}_{s}" - k = int(self._rng.integers(1, 4)) - prod_ids = self._rng.choice(self.product_catelogue_size, size=k, replace=False) - t = 0.0 - inter_times = self._rng.gamma(shape=2.0, scale=3.0, size=3 * k) - signal_score = 0.0 - purchased_any = False - - for i, pid in enumerate(prod_ids): - t += float(inter_times[i]) - price_shown = float(base_prices[pid]) - events.append({ - "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid), - "action": "view", "t": t, "price_shown": price_shown, "is_purchase": 0, - "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0, - }) - signal_score += 1.0 - - if self._rng.random() < 0.35: - t += float(inter_times[i + k]) - events.append({ - "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid), - "action": "cart", "t": t, "price_shown": price_shown, "is_purchase": 0, - "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0, - }) - signal_score += 2.0 - - if (not purchased_any) and (self._rng.random() < float(human_pprob[pid])): - t += float(inter_times[i + 2 * k]) - mult = self._session_markup_multiplier(signal_score) - price_paid = float(np.clip(base_prices[pid] * mult, self.min_price, self.max_price)) - events.append({ - "session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid), - "action": "purchase", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 1, - "price_paid": price_paid, "oracle_price_paid": price_paid, "signal_score": signal_score, - }) - purchased_any = True - - # agent sessions: split recon/purchase to circumvent COI n_agent_ids = max(1, n_agent_sessions // 2) - for a in range(n_agent_ids): - agent_id = f"a_{a}" - recon_session_id = f"{agent_id}_recon" - t = 0.0 - n_views = int(self._rng.poisson(lam=8) * self.constraints.agent_recon_multiplier) + 5 - inter_times = self._rng.gamma(shape=2.0, scale=0.6, size=max(n_views, 1)) - prod_ids = self._rng.integers(0, self.product_catelogue_size, size=n_views) - recon_signal = 0.0 + session_map = { + 'humans': n_human_sessions, + 'agents': n_agent_ids + } + pprob_map = { + 'humans': human_pprob, + 'agents': agent_pprob + } + joint_events = [] + for actor, n_sessions in session_map.items(): + bp = _load_behavioral_profile(actor, pprob_map[actor]) + counter = 0 + events = [] + while counter < n_sessions: + session_events = [] + while len(session_events) == 0 or session_events[-1]['action'] == 'checkout': + interaction_event = bp.sample(self._rng) + interaction_event['session_id'] = f'{actor}_{counter:06d}' + # TODO any other assignments + session_events.append(interaction_event) + events.extend(session_events) + counter += 1 + joint_events.extend(events) - for i, pid in enumerate(prod_ids): - t += float(inter_times[i]) - events.append({ - "session_id": recon_session_id, "actor": "agent", "agent_id": agent_id, "product_id": int(pid), - "action": "view", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 0, - "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0, - }) - recon_signal += 1.0 - - # clean purchase session with minimal interactions - if self._rng.random() < self.constraints.agent_purchase_probability: - purchase_session_id = f"{agent_id}_clean" - pid = int(self._rng.integers(0, self.product_catelogue_size)) - t2 = 0.0 - clean_signal = 0.0 - t2 += float(self._rng.gamma(shape=2.0, scale=0.7)) - events.append({ - "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid, - "action": "view", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 0, - "price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0, - }) - clean_signal += 1.0 - - if self._rng.random() < float(agent_pprob[pid]): - t2 += float(self._rng.gamma(shape=2.0, scale=0.7)) - obs_mult = self._session_markup_multiplier(clean_signal) - obs_paid = float(np.clip(base_prices[pid] * obs_mult, self.min_price, self.max_price)) - oracle_mult = self._session_markup_multiplier(recon_signal) # oracle links recon->purchase - oracle_paid = float(np.clip(base_prices[pid] * oracle_mult, self.min_price, self.max_price)) - events.append({ - "session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid, - "action": "purchase", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 1, - "price_paid": obs_paid, "oracle_price_paid": oracle_paid, "signal_score": clean_signal, - }) - - return pd.DataFrame(events) + return pd.DataFrame(joint_events) def compute_interaction_features(self, interaction_df: pd.DataFrame) -> Dict[str, float]: if interaction_df.empty: @@ -183,6 +122,7 @@ class CommercePlatform: return {"mean_sale_price": mean_sale_price, "look_to_book": float(views / (buys + 1e-6))} def _session_feature_table(self, df: pd.DataFrame) -> pd.DataFrame: + # TODO: adapt this if df.empty: return pd.DataFrame() g = df.groupby("session_id", sort=False) @@ -208,73 +148,6 @@ class CommercePlatform: "is_agent": is_agent.astype(bool), }).reset_index() - def demand_estimate(self, interaction_df: pd.DataFrame, exclude_sessions: Optional[pd.Series] = None) -> np.ndarray: - # proxy demand from weighted interaction events - if interaction_df.empty: - return np.zeros(self.product_catelogue_size, dtype=np.float32) - df = interaction_df - if exclude_sessions is not None: - bad_sessions = set(exclude_sessions.loc[exclude_sessions].index) - df = df[~df["session_id"].isin(bad_sessions)] - weights = {"view": 0.15, "cart": 0.75, "purchase": 2.5} - w = df["action"].map(weights).fillna(0.0).to_numpy(dtype=float) - prod = df["product_id"].to_numpy(dtype=int) - q_hat = np.zeros(self.product_catelogue_size, dtype=float) - np.add.at(q_hat, prod, w) - return q_hat.astype(np.float32) - - def run_pricing_simulation(self, prices: np.ndarray) -> Dict[str, Any]: - interaction_df = self._simulate_sessions(prices) - self._last_interaction_df = interaction_df - session_df = self._session_feature_table(interaction_df) - - predicted_agent_sessions = None - if (self.use_defense and self.agent_detector is not None and not session_df.empty): - predicted_agent_sessions = self.agent_detector(session_df.set_index("session_id")) - - q_hat_naive = self.demand_estimate(interaction_df, exclude_sessions=None) - q_hat_defended = self.demand_estimate(interaction_df, exclude_sessions=predicted_agent_sessions) \ - if predicted_agent_sessions is not None else q_hat_naive.copy() - - true_human = np.zeros(self.product_catelogue_size, dtype=float) - true_agent = np.zeros(self.product_catelogue_size, dtype=float) - if not interaction_df.empty: - purchases = interaction_df[interaction_df["action"] == "purchase"] - if not purchases.empty: - for _, r in purchases.iterrows(): - if r["actor"] == "human": - true_human[int(r["product_id"])] += 1.0 - else: - true_agent[int(r["product_id"])] += 1.0 - - revenue_observed = float(interaction_df["price_paid"].sum()) if not interaction_df.empty else 0.0 - revenue_oracle = float(interaction_df["oracle_price_paid"].sum()) if not interaction_df.empty else 0.0 - agent_loss = max(0.0, revenue_oracle - revenue_observed) - - eps = 1e-6 - internal_error_naive = np.abs(true_human - q_hat_naive) / (true_human + eps) - internal_error_def = np.abs(true_human - q_hat_defended) / (true_human + eps) - interaction_features = self.compute_interaction_features(interaction_df) - - summary = { - "prices": prices.copy(), - "interaction_df": interaction_df, - "session_df": session_df, - "q_hat_naive": q_hat_naive, - "q_hat_defended": q_hat_defended, - "true_human_demand": true_human.astype(np.float32), - "true_agent_purchases": true_agent.astype(np.float32), - "internal_error_naive": internal_error_naive.astype(np.float32), - "internal_error_defended": internal_error_def.astype(np.float32), - "interaction_features": interaction_features, - "revenue_observed": revenue_observed, - "revenue_oracle": revenue_oracle, - "agent_loss": agent_loss, - "predicted_agent_sessions": predicted_agent_sessions, - } - self.simulation_history.append(summary) - return summary - def get_interaction_data(self) -> np.ndarray: if self._last_interaction_df.empty: return np.array([], dtype=object) @@ -284,7 +157,7 @@ class CommercePlatform: class PHANTOMEnv(gym.Env): metadata = {"render_modes": []} - def __init__(self, use_defense: bool = False): + def __init__(self, constraints): super().__init__() self.constraints = BusinessLogicConstraints() self.action_space = spaces.Box(low=-self.constraints.max_price_adjustment, @@ -301,14 +174,13 @@ class PHANTOMEnv(gym.Env): high=np.full((self.constraints.product_catelogue_size,), 1e6, dtype=np.float32), dtype=np.float32), }) + # TODO: define more features that we compute from the interaction data }) self.commerce_platform = CommercePlatform( product_catelogue_size=self.constraints.product_catelogue_size, max_price=self.constraints.system_max_price, min_price=self.constraints.system_min_price, - constraints=self.constraints, - agent_detector=simple_agent_detector, - use_defense=use_defense) + constraints=self.constraints) self._rng = np.random.default_rng(self.constraints.seed) self.t = 0 self._prev_prices: Optional[np.ndarray] = None @@ -336,17 +208,13 @@ class PHANTOMEnv(gym.Env): new_prices = np.clip(base_prices * (1.0 + action.astype(np.float32)), self.constraints.system_min_price, self.constraints.system_max_price).astype(np.float32) - result = self.commerce_platform.run_pricing_simulation(new_prices) - - if self.commerce_platform.use_defense: - demand_est = result["q_hat_defended"] - internal_err = result["internal_error_defended"] - else: - demand_est = result["q_hat_naive"] - internal_err = result["internal_error_naive"] self.state["elasticity"]["price"] = new_prices - self.state["elasticity"]["demand"] = demand_est + # TODO: use the commerce platform to simulate sessions + interactions_df = self.commerce_platform._simulate_sessions(new_prices) + result = self.commerce_platform.compute_interaction_features(interactions_df) + # TODO: implement COI computation to use in reward + COI = 0.0 volatility = 0.0 if self._prev_prices is None else \ float(np.mean(np.abs((new_prices - self._prev_prices) / (self._prev_prices + 1e-6)))) @@ -354,12 +222,13 @@ class PHANTOMEnv(gym.Env): revenue_observed = float(result["revenue_observed"]) agent_loss = float(result["agent_loss"]) - err_mean = float(np.mean(internal_err)) reward = (revenue_observed - - self.constraints.w_agent_loss * agent_loss - - self.constraints.w_volatility * volatility - - self.constraints.w_estimation_error * err_mean) + - COI + - self.constraints.w_agent_loss * agent_loss + - self.constraints.w_volatility * volatility + - self.constraints.w_estimation_error + ) terminated = self.t >= self.constraints.episode_length info = { diff --git a/sim/rl/train.py b/sim/rl/train.py new file mode 100644 index 0000000..41a87ab --- /dev/null +++ b/sim/rl/train.py @@ -0,0 +1,149 @@ +import numpy as np +import logging +from pathlib import Path +from typing import Dict, Type, Optional +import pickle +from torch import neg_ +from torch.utils.tensorboard import SummaryWriter +from environment import PHANTOMEnv, FastTrainingConstraints, BusinessLogicConstraints +from engine import (BasePricingEngine, WildPricingEngine, StaticPricingEngine, + SimpleDemandEngine, RandomWalkEngine, ThompsonSamplingEngine) + +logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s') +logger = logging.getLogger(__name__) + + + +""" +Target training loop: +have base prices p0 from env reset and run the env step, collect reward and metrics +pass this to the pricing engine which computes the price action to take based on previous reward by learning +the new action gets passed to the step +so we alternate, step -> reward -> engine (produces price delta) -> step with price delta -> reward +to make sure the reinforcement learning inside the engine can learn we need to have trajectory of prices +CURRENT SOLUTION BELOW does not implement correct learning or updates. +""" + +class EngineTrainer: + """wrapper to run pricing engines through episodes and collect metrics""" + def __init__(self, engine: BasePricingEngine, env: PHANTOMEnv, + tb_writer: Optional[SummaryWriter] = None): + self.engine = engine + self.env = env + self.episode_metrics = [] + self.tb_writer = tb_writer + self.global_step = 0 + + def train(self, n_episodes: int, seed: int = 42): + + obs, _ = self.env.reset(seed=seed) + prices = None + for ep in range(n_episodes): + prices = self.engine.compute_prices(prices, obs + obs, reward, done, _, info = self.env.step(prices) + self.engine.update(obs, reward, done, info) + return self + + + + + + + return self.episode_metrics + + def evaluate(self, n_episodes: int = 10, seed: int = 100) -> Dict: + """evaluate trained engine""" + results = {k: [] for k in ['total_reward', 'revenue_observed', 'revenue_oracle', + 'agent_loss', 'ux_volatility', 'look_to_book']} + for ep in range(n_episodes): + metrics = self.run_episode(seed=seed + ep) + for k in results: results[k].append(metrics[k]) + return {k: (np.mean(v), np.std(v)) for k, v in results.items()} + + +def make_env(fast: bool = True): + constraints = FastTrainingConstraints() if fast else BusinessLogicConstraints() + return PHANTOMEnv(constraints=constraints) + + +def train_engine(engine_cls: Type[BasePricingEngine], env: PHANTOMEnv, + n_episodes: int, seed: int = 42, + tb_writer: Optional[SummaryWriter] = None) -> EngineTrainer: + constraints = env.constraints + engine = engine_cls(constraints=constraints, seed=seed) + trainer = EngineTrainer(engine, env, tb_writer=tb_writer) + trainer.train(n_episodes, seed=seed) + return trainer + + +def save_trainer(trainer: EngineTrainer, path: Path): + """save engine state and metrics""" + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, 'wb') as f: + pickle.dump({ + 'engine': trainer.engine, + 'metrics': trainer.episode_metrics + }, f) + logger.info(f"Saved trainer to {path}") + + +def load_trainer(path: Path, env: PHANTOMEnv, + tb_writer: Optional[SummaryWriter] = None) -> EngineTrainer: + """load saved engine""" + with open(path, 'rb') as f: + data = pickle.load(f) + trainer = EngineTrainer(data['engine'], env, tb_writer=tb_writer) + trainer.episode_metrics = data['metrics'] + return trainer + + +if __name__ == "__main__": + base_dir = Path("./runs") + base_dir.mkdir(exist_ok=True) + + engines = { + "Wild": WildPricingEngine, + "Static": StaticPricingEngine, +# "SimpleDemand": SimpleDemandEngine, + "RandomWalk": RandomWalkEngine, + "ThompsonSampling": ThompsonSamplingEngine, + } + defenses = [False, True] + n_train_episodes = 50 + n_eval_episodes = 10 + seed = 42 + fast_mode = True + + logger.info(f"Training config: {n_train_episodes} episodes per engine, fast_mode={fast_mode}") + + trained_trainers = {} + + for engine_name, engine_cls in engines.items(): + for use_defense in defenses: + defense_label = "defense_on" if use_defense else "defense_off" + run_name = f"{engine_name}_{defense_label}" + log_dir = base_dir / run_name + log_dir.mkdir(parents=True, exist_ok=True) + + logger.info(f"Training {engine_name} with defense={use_defense}") + logger.info(f"Log directory: {log_dir}") + + env = make_env(fast=fast_mode) + tb_writer = SummaryWriter(log_dir=str(log_dir)) + trainer = train_engine(engine_cls, env, n_train_episodes, seed, tb_writer=tb_writer) + tb_writer.close() + + save_path = log_dir / "trainer.pkl" + save_trainer(trainer, save_path) + + trained_trainers[run_name] = (trainer, env) + + logger.info("Starting evaluation") + + for run_name, (trainer, env) in trained_trainers.items(): + logger.info(f"Evaluating {run_name}") + results = trainer.evaluate(n_episodes=n_eval_episodes, seed=seed + 1000) + for metric, (mean, std) in results.items(): + logger.info(f" {metric:20s}: {mean:10.2f} ± {std:6.2f}") + + logger.info(f"Results saved to: {base_dir}") From 6a06a8af4a01a898a1897cbc9b2560044e201ee1 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Wed, 17 Dec 2025 18:50:04 +0100 Subject: [PATCH 07/35] simple code cleanup --- sim/rl/engine.py | 7 +++++++ sim/rl/train.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/sim/rl/engine.py b/sim/rl/engine.py index 6d913f3..e0caca8 100644 --- a/sim/rl/engine.py +++ b/sim/rl/engine.py @@ -1,3 +1,4 @@ +from os import kill import numpy as np import pandas as pd from abc import ABC, abstractmethod @@ -5,6 +6,11 @@ from typing import Dict, Any from environment import BusinessLogicConstraints +""" +An angine by default should have its own demand estimation mechanism from the observed observations whihc are the computer feature. +From these features we then follow the researc hstructure of q -> p with a testable and must be updatable mechanism. +""" + class BasePricingEngine(ABC): """base interface for all pricing engines""" def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0): @@ -12,6 +18,7 @@ class BasePricingEngine(ABC): self.rng = np.random.default_rng(seed) self.step_count = 0 + @abstractmethod def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray: """compute new prices given current state and observation from environment diff --git a/sim/rl/train.py b/sim/rl/train.py index 41a87ab..ba257de 100644 --- a/sim/rl/train.py +++ b/sim/rl/train.py @@ -39,7 +39,7 @@ class EngineTrainer: obs, _ = self.env.reset(seed=seed) prices = None for ep in range(n_episodes): - prices = self.engine.compute_prices(prices, obs + prices = self.engine.compute_prices(prices, obs) obs, reward, done, _, info = self.env.step(prices) self.engine.update(obs, reward, done, info) return self From ec4cf074e65478c32bea71a2f39e4eeda714a15f Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Fri, 9 Jan 2026 20:20:31 +0100 Subject: [PATCH 08/35] feature: MDP behavior mappers (unlinked) --- sim/rl/behavior_loader/loader.py | 63 ++++++++++++++ sim/rl/behavior_loader/models.py | 137 +++++++++++++++++++++++++++++++ 2 files changed, 200 insertions(+) create mode 100644 sim/rl/behavior_loader/loader.py create mode 100644 sim/rl/behavior_loader/models.py diff --git a/sim/rl/behavior_loader/loader.py b/sim/rl/behavior_loader/loader.py new file mode 100644 index 0000000..99a1541 --- /dev/null +++ b/sim/rl/behavior_loader/loader.py @@ -0,0 +1,63 @@ +import os +from pydantic import BaseModel as Base +import json + +class PayloadModel(Base): + sessionId: str + experimentId: str | None + eventName: str + page: str | None + productId: str | None + metadata: dict + storeMode: str + userAgent: str + ts: str + +class ValueModel(Base): + payload: PayloadModel + encoding: str + isPayloadNull: bool + schemaId: int + size: int + +class InteractionModel(Base): + partitionID: int + offset: int + timestamp: int + compression: str + isTransactional: bool + headers: list + key: dict + value: ValueModel + +class Loader: + def __init__(self, src_dir: str): + self.src_dir = src_dir + self.entries = os.listdir(src_dir) + if not self.entries: raise ValueError("empty directory") + self.data = self._load_sessions() + + def _is_admin_page(self, interaction: InteractionModel) -> bool: + page = interaction.value.payload.page + return page and page.startswith("/admin/") + + def _load_sessions(self) -> dict: + sessions = {} + for entry in self.entries: + int_path = f"{self.src_dir}/{entry}/int.json" + raw = json.load(open(int_path)) + ints = [InteractionModel(**i) for i in raw] + sessions[entry] = [i for i in ints if not self._is_admin_page(i)] + return sessions + + def get_data(self) -> dict: + return self.data + + def get_entries(self) -> tuple[list[str], int]: + return self.entries, len(self.entries) + +if __name__ == "__main__": + DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/" + loader = Loader(DIR) + _, n = loader.get_entries() + print(f"Loaded {n} sessions from {DIR}") diff --git a/sim/rl/behavior_loader/models.py b/sim/rl/behavior_loader/models.py new file mode 100644 index 0000000..f8e92b7 --- /dev/null +++ b/sim/rl/behavior_loader/models.py @@ -0,0 +1,137 @@ +from loader import Loader +from collections import defaultdict +from typing import Dict, List, Tuple, Set +import numpy as np +import graphviz + +DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/" + +class BehaviorModel: + def __init__(self, src_dir: str = DIR): + self.loader = Loader(src_dir) + self.data = self.loader.get_data() + self.entries, self.num_entries = self.loader.get_entries() + self.mdp = None + + def _state_repr(self, evt) -> str: + p = evt.value.payload + return f"{p.page or 'unk'}|{p.productId or 'none'}|{p.eventName}" + + def _extract_sessions(self): + # transform raw events into sequential state trajectories per session + trajectories = [] + for sid, evts in self.data.items(): + if len(evts) < 2: continue + states = [self._state_repr(e) for e in sorted(evts, key=lambda x: x.timestamp)] + trajectories.append(states) + return trajectories + + def _calc_transitions(self, trajectories: List[List[str]]) -> Tuple[Dict, Set]: + trans = defaultdict(lambda: defaultdict(int)) + states = set() + for traj in trajectories: + for i in range(len(traj) - 1): + s, s_next = traj[i], traj[i+1] + trans[s][s_next] += 1 + states.update([s, s_next]) + return trans, states + + def _calc_rewards(self, trajectories: List[List[str]]) -> Dict: + # reward based on session progression depth + rwd = defaultdict(list) + for traj in trajectories: + n = len(traj) + for i, s in enumerate(traj): + rwd[s].append(i / n) + return rwd + + def _normalize_trans(self, counts: Dict) -> Dict: + return {s: {s_n: cnt/sum(nxt.values()) for s_n, cnt in nxt.items()} + for s, nxt in counts.items()} + + def build_MDP(self) -> Dict: + trajs = self._extract_sessions() + trans_cnt, states = self._calc_transitions(trajs) + trans_prob = self._normalize_trans(trans_cnt) + state_rwd = self._calc_rewards(trajs) + state_val = {s: np.mean(r) for s, r in state_rwd.items()} + + self.mdp = { + 'states': sorted(list(states)), + 'num_states': len(states), + 'transitions': trans_prob, + 'state_values': state_val, + 'state_rewards': state_rwd, + 'trans_counts': trans_cnt, + } + return self.mdp + + def transition_prob(self, s: str, s_next: str) -> float: + if not self.mdp: raise ValueError("build MDP first") + return self.mdp['transitions'].get(s, {}).get(s_next, 0.0) + + def state_value(self, s: str) -> float: + if not self.mdp: raise ValueError("build MDP first") + return self.mdp['state_values'].get(s, 0.0) + + def sample_traj(self, start: str, max_len: int = 50) -> List[str]: + if not self.mdp: raise ValueError("build MDP first") + path = [start] + curr = start + for _ in range(max_len): + nxt = self.mdp['transitions'].get(curr, {}) + if not nxt: break + curr = np.random.choice(list(nxt.keys()), p=list(nxt.values())) + path.append(curr) + return path + +def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = "mdp_graph", fmt: str = "svg", view: bool = False): + """visualize MDP as directed graph using graphviz, aggregated by event type""" + if not model.mdp: raise ValueError("build MDP first") + + # aggregate transitions by event type + evt_trans = defaultdict(lambda: defaultdict(float)) + for s, trans in model.mdp['transitions'].items(): + evt_src = s.split('|')[2] + for s_next, prob in trans.items(): + evt_dst = s_next.split('|')[2] + evt_trans[evt_src][evt_dst] += prob + + # normalize aggregated transitions + for evt_src in evt_trans: + total = sum(evt_trans[evt_src].values()) + if total > 0: + for evt_dst in evt_trans[evt_src]: + evt_trans[evt_src][evt_dst] /= total + + g = graphviz.Digraph(format=fmt) + g.attr(rankdir='LR', size='30') + g.attr('node', shape='circle', width='1', height='1') + + # collect all event types + events = set(evt_trans.keys()) + for trans in evt_trans.values(): + events.update(trans.keys()) + + # add nodes for each event type + for evt in events: + g.node(evt) + + # add edges above threshold + for evt_src in evt_trans: + for evt_dst, prob in evt_trans[evt_src].items(): + if prob > threshold: + g.edge(evt_src, evt_dst, label=f'{prob:.2f}') + + g.render(output, view=view, cleanup=True) + print(f"Saved MDP graph to {output}.{fmt}") + return g + +if __name__ == "__main__": + model = BehaviorModel(DIR) + mdp = model.build_MDP() + print(f"Built MDP: {mdp['num_states']} states, {sum(len(t) for t in mdp['transitions'].values())} transitions") + if not mdp['states']: + print("No states found") + exit(1) + visualize_mdp(model, threshold=0.05, output="mdp_viz", fmt="svg") From 131323ef56984229063ce1efca763615f51cb5d0 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Sat, 10 Jan 2026 10:33:56 +0100 Subject: [PATCH 09/35] featuer: dot exporter --- sim/rl/behavior_loader/models.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sim/rl/behavior_loader/models.py b/sim/rl/behavior_loader/models.py index f8e92b7..6e4201e 100644 --- a/sim/rl/behavior_loader/models.py +++ b/sim/rl/behavior_loader/models.py @@ -85,7 +85,7 @@ class BehaviorModel: path.append(curr) return path -def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = "mdp_graph", fmt: str = "svg", view: bool = False): +def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = "mdp_graph", fmt: str = "svg", view: bool = False, export_dot: bool = False): """visualize MDP as directed graph using graphviz, aggregated by event type""" if not model.mdp: raise ValueError("build MDP first") @@ -125,6 +125,13 @@ def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = " g.render(output, view=view, cleanup=True) print(f"Saved MDP graph to {output}.{fmt}") + + if export_dot: + dot_file = f"{output}.dot" + with open(dot_file, 'w') as f: + f.write(g.source) + print(f"Exported DOT source to {dot_file}") + return g if __name__ == "__main__": @@ -134,4 +141,4 @@ if __name__ == "__main__": if not mdp['states']: print("No states found") exit(1) - visualize_mdp(model, threshold=0.05, output="mdp_viz", fmt="svg") + visualize_mdp(model, threshold=0.05, output="mdp_viz", fmt="svg", export_dot=True) From f9bf3de71eb691c9182c46f091ffe25b80d402de Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Sat, 10 Jan 2026 11:48:03 +0100 Subject: [PATCH 10/35] pdf rendering --- sim/rl/behavior_loader/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sim/rl/behavior_loader/models.py b/sim/rl/behavior_loader/models.py index 6e4201e..bce2429 100644 --- a/sim/rl/behavior_loader/models.py +++ b/sim/rl/behavior_loader/models.py @@ -141,4 +141,4 @@ if __name__ == "__main__": if not mdp['states']: print("No states found") exit(1) - visualize_mdp(model, threshold=0.05, output="mdp_viz", fmt="svg", export_dot=True) + visualize_mdp(model, threshold=0.05, output="mdp_viz", fmt="pdf", export_dot=True) From 8b429b7a8e5a7c1e671c62afa34b013358be4208 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Mon, 12 Jan 2026 10:09:55 +0100 Subject: [PATCH 11/35] chore: refactor to better map end to end --- backend/provider/app.py | 71 ++++++++++++------------- experiments/procesing/pricers/simple.py | 59 +++++++++++++++++--- experiments/procesing/steps/session.py | 1 + lib/model_registry.py | 46 ++++++++++++++++ web/src/app/api/pricing/route.ts | 37 +++++++------ 5 files changed, 153 insertions(+), 61 deletions(-) diff --git a/backend/provider/app.py b/backend/provider/app.py index fb72a9d..6f9a55d 100644 --- a/backend/provider/app.py +++ b/backend/provider/app.py @@ -47,53 +47,52 @@ def health() -> dict: @app.get("/api/{mode}/price/{productId}", response_model=PriceResponse) def get_price(mode: Literal['hotel', 'airline'], productId: str, sessionId: Optional[str] = Query(None), experimentId: Optional[str] = Query(None)): + """ + THIS is the fast lookup service (mechanism). + Priority: session-keyed price > global optimal price > base price + """ product = supabase.table(f'{mode}_products').select("metadata").eq('id', productId).execute().data[0] if not product: raise HTTPException(404, f"Product {productId} not found") metadata = product['metadata'] base_price = metadata.get('base_price', 100.0) - # fetch pre-computed prices from registry + # PRIORITY 1: session-aware price (computed by Airflow worker) + if sessionId: + session_price = registry.get_session_price(sessionId, productId) + if session_price is not None: + return PriceResponse( + productId=productId, + price=session_price, + base_price=base_price, + markup=session_price/base_price, + elasticity=None, + model_version='session-aware' + ) + + # PRIORITY 2: global pre-computed prices (surge pricing) prices_df = registry.get_prices('latest') - elasticity_df = registry.get_elasticity('latest') - - if prices_df is None: - # fallback: no pre-computed prices available - return PriceResponse( - productId=productId, - price=base_price, - base_price=base_price, - markup=1.0, - elasticity=None - ) - - # lookup pre-computed price for this product - product_price_row = prices_df[prices_df['productId'] == productId] - if product_price_row.empty: - # product not in pre-computed prices, fallback to base - return PriceResponse( - productId=productId, - price=base_price, - base_price=base_price, - markup=1.0, - elasticity=None - ) - - optimal_price = float(product_price_row['optimal_price'].iloc[0]) # TODO: use optimal_price everywhere as aresult - - # get elasticity if available - product_elasticity = None - if elasticity_df is not None: - product_elasticity_row = elasticity_df[elasticity_df['productId'] == productId] - if not product_elasticity_row.empty: - product_elasticity = float(product_elasticity_row['elasticity'].iloc[0]) + if prices_df is not None: + product_price_row = prices_df[prices_df['productId'] == productId] + if not product_price_row.empty: + optimal_price = float(product_price_row['optimal_price'].iloc[0]) + return PriceResponse( + productId=productId, + price=optimal_price, + base_price=base_price, + markup=optimal_price/base_price, + elasticity=None, + model_version='surge' + ) + # PRIORITY 3: fallback to base price return PriceResponse( productId=productId, - price=optimal_price, + price=base_price, base_price=base_price, - markup=optimal_price/base_price, - elasticity=product_elasticity + markup=1.0, + elasticity=None, + model_version='base' ) @app.get("/models") diff --git a/experiments/procesing/pricers/simple.py b/experiments/procesing/pricers/simple.py index 39be37a..6bdd1ca 100644 --- a/experiments/procesing/pricers/simple.py +++ b/experiments/procesing/pricers/simple.py @@ -3,6 +3,46 @@ import pandas as pd from procesing.pricers.base import PricingFunction +def session_features_to_demand(session_features: pd.DataFrame) -> float: + """ + Map session behavioral features to demand proxy. + THIS is the critical θ̂ → D transformation for rule-based pricing. + + Logic: + - High velocity → agent behavior → price up (revenue recovery) + - High cart ratio → purchase intent → price up + - Low activity → discount to convert + + Returns: demand proxy score (0-20 range, higher = more demand) + """ + if session_features.empty: + return 1.0 + + feat = session_features.iloc[0] if len(session_features) > 0 else {} + + velocity = feat.get('interaction_velocity', 0) + cart_ratio = feat.get('cart_to_view_ratio', 0) + item_views = feat.get('item_views', 0) + cart_adds = feat.get('cart_adds', 0) + + # baseline demand + demand = 1.0 + + # agent detection: high velocity → treat as high "demand" to price up + if velocity > 2.0: + demand += 10.0 # strong agent signal + + # conversion intent: cart interaction → price up + if cart_ratio > 0.1 or cart_adds > 0: + demand += 5.0 + + # browsing depth: many views → interest signal + if item_views > 3: + demand += min(item_views, 5.0) + + return min(demand, 20.0) # cap at 20 + + class StaticPricer(PricingFunction): """Static pricing: always return fixed base prices""" @@ -67,21 +107,24 @@ class SimpleSurgePricer(PricingFunction): self.surge_multiplier = surge_multiplier self.discount_multiplier = discount_multiplier - def fit(self, market_data : pd.DataFrame): + def fit(self, market_data: pd.DataFrame): """Extract base prices from product catalog or historical averages""" self.base_prices = market_data['base_price'].to_numpy() if 'base_price' in market_data.columns else market_data['price'].values - self.demand_history = market_data['demand'].to_numpy() if 'demand' in market_data.columns else np.zeros_like(self.base_prices) + return self - def predict(self) -> np.ndarray: + def predict(self, state_space) -> np.ndarray: """ Adjust prices based on current demand using surge rules. - state_space.demand: demand counts per product - state_space.prices: current prices (fallback if base_prices not set) + state_space.demand: demand proxy per product (from session features) + state_space.prices: base prices """ - current_prices = self.base_prices if self.base_prices is not None else np.ones_like(demand_vector) * 99.99 - demand = self.demand_history if self.demand_history is not None else np.zeros_like(current_prices) - new_prices = current_prices.copy() + demand = np.asarray(state_space.demand) if state_space and hasattr(state_space, 'demand') else np.array([0]) + base = np.asarray(state_space.prices) if state_space and hasattr(state_space, 'prices') else self.base_prices + if base is None: + base = np.ones(len(demand)) * 99.99 + + new_prices = base.copy() high_mask = demand >= self.high_threshold new_prices[high_mask] *= self.surge_multiplier diff --git a/experiments/procesing/steps/session.py b/experiments/procesing/steps/session.py index 4b950aa..ec6f27c 100644 --- a/experiments/procesing/steps/session.py +++ b/experiments/procesing/steps/session.py @@ -135,6 +135,7 @@ class ExtractSessionFeaturesStep(BaseContextStep): Vectorized session feature extraction - replaces O(n^2) per-row loop. Input: interactions_df Output: session-level feature matrix + THIS is our main mapping from tau (trajectory) to some features vector theta - we need to do this very well. This is what will go into demand esimation. """ def transform(self, X: pd.DataFrame) -> pd.DataFrame: diff --git a/lib/model_registry.py b/lib/model_registry.py index 92d7934..e833a1a 100755 --- a/lib/model_registry.py +++ b/lib/model_registry.py @@ -178,3 +178,49 @@ class ModelRegistry: return True except: return False + + def set_session_prices(self, session_id: str, prices: Dict[str, float], ttl: int = 1800): + """ + Store prices for a specific session. + THIS is the write path for session-aware pricing. + + Args: + session_id: session identifier + prices: dict of {productId: price} + ttl: time-to-live in seconds (default 30min) + """ + if not prices: + return + + key = f"session:{session_id}:prices" + # use Redis hash for O(1) lookup per product + self.redis_client.hset(key, mapping={k: str(v) for k, v in prices.items()}) + self.redis_client.expire(key, ttl) + + def get_session_price(self, session_id: str, product_id: str) -> Optional[float]: + """ + Lookup price for (sessionId, productId). + THIS is the read path for fast provider lookup. + + Returns: price or None if not found + """ + key = f"session:{session_id}:prices" + price_str = self.redis_client.hget(key, product_id) + + if price_str is None: + return None + + return float(price_str.decode('utf-8') if isinstance(price_str, bytes) else price_str) + + def get_session_all_prices(self, session_id: str) -> Dict[str, float]: + """Get all prices for a session.""" + key = f"session:{session_id}:prices" + prices_raw = self.redis_client.hgetall(key) + + if not prices_raw: + return {} + + return { + (k.decode('utf-8') if isinstance(k, bytes) else k): float(v.decode('utf-8') if isinstance(v, bytes) else v) + for k, v in prices_raw.items() + } diff --git a/web/src/app/api/pricing/route.ts b/web/src/app/api/pricing/route.ts index 1aec75b..6532131 100644 --- a/web/src/app/api/pricing/route.ts +++ b/web/src/app/api/pricing/route.ts @@ -30,6 +30,8 @@ export async function GET(req: NextRequest) { const providerUrl = process.env.PRICING_PROVIDER_URL || 'http://localhost:5001'; try { const queryParams = new URLSearchParams(); + // THIS is our entry point into the dynamic pricing where we reference the context of the sesion and experiment and ask for a price to assign to the trajectory which is expressed + // The whole pipeline gets triggered from here. if (sessionId) queryParams.append('sessionId', sessionId); if (experimentId) queryParams.append('experimentId', experimentId); @@ -55,25 +57,26 @@ export async function GET(req: NextRequest) { price = Math.round(randomBase * 100) / 100; } - // log price to kafka for elasticity computation + // log price to kafka asynchronously (non-blocking) if (sessionId) { const backendUrl = process.env.BACKEND_URL || 'http://localhost:5000'; - try { - await fetch(`${backendUrl}/api/kafka/price-log`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - productId, - price, - sessionId, - experimentId: experimentId || undefined, - storeMode, - ts: timestamp, - }), - }); - } catch (err) { - console.error('[price-log-error]', err); - } + // fire and forget - don't await to avoid blocking response + fetch(`${backendUrl}/api/kafka/price-log`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + productId, + price, + sessionId, + experimentId: experimentId || undefined, + storeMode, + ts: timestamp, + }), + }).catch(err => { + if (process.env.NODE_ENV === 'development') { + console.error('[price-log-error]', err); + } + }); } if (process.env.NODE_ENV === 'development') { From 62a4008c29f94b753089de69ba84bfb6eea8a0cc Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Mon, 12 Jan 2026 13:37:48 +0100 Subject: [PATCH 12/35] feat: integration of pipeline hooks into testing --- Makefile | 2 ++ backend/server/app.py | 6 ++++- docker-compose.yml | 2 ++ .../airflow/dags/surge_pricing_pipeline.py | 24 +++++++++++++++---- experiments/procesing/pricers/simple.py | 3 ++- tests/e2e/helpers/kafka.ts | 4 ++-- tests/e2e/playwright.config.ts | 4 ++-- tests/e2e/scenarios/session-aware.spec.ts | 21 ++++++++++------ tests/e2e/scenarios/surge-pricing.spec.ts | 11 +++++++-- 9 files changed, 58 insertions(+), 19 deletions(-) diff --git a/Makefile b/Makefile index 0c51bb3..879afb5 100644 --- a/Makefile +++ b/Makefile @@ -49,8 +49,10 @@ test.backend: $(VENV) test.e2e: @cd tests/e2e && npm install @cd tests/e2e && npx playwright install chromium + @test -f tests/e2e/.env || cp tests/e2e/.env.example tests/e2e/.env @timeout 30 bash -c 'until curl -sf http://localhost:5000/health > /dev/null 2>&1; do sleep 1; done' || (echo "Backend not ready" && exit 1) @timeout 30 bash -c 'until curl -sf http://localhost:3000 > /dev/null 2>&1; do sleep 1; done' || (echo "Web app not ready" && exit 1) + @timeout 30 bash -c 'until curl -sf http://localhost:8085/health > /dev/null 2>&1; do sleep 1; done' || (echo "Airflow not ready" && exit 1) @cd tests/e2e && npm test .PHONY: test.all diff --git a/backend/server/app.py b/backend/server/app.py index d338408..f100811 100644 --- a/backend/server/app.py +++ b/backend/server/app.py @@ -198,12 +198,16 @@ def dump_logs( auto_offset_reset='earliest', enable_auto_commit=False, value_deserializer=lambda x: json.loads(x.decode('utf-8')), - consumer_timeout_ms=5000 + consumer_timeout_ms=30000, + fetch_max_wait_ms=10000, + max_poll_records=1000 ) events = [] for msg in consumer: events.append(msg.value) + if last_n and len(events) >= last_n * 2: + break consumer.close() diff --git a/docker-compose.yml b/docker-compose.yml index f72f415..561c393 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -144,6 +144,7 @@ services: - AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true - AIRFLOW__WEBSERVER__EXPOSE_CONFIG=true - AIRFLOW__WEBSERVER__SECRET_KEY=${AIRFLOW_SECRET_KEY} + - AIRFLOW__API__AUTH_BACKENDS=airflow.api.auth.backend.basic_auth - KAFKA_HOST=kafka - KAFKA_PORT=29092 - BACKEND_URL=http://backend:5000 @@ -180,6 +181,7 @@ services: - AIRFLOW__CORE__LOAD_EXAMPLES=false - AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true - AIRFLOW__WEBSERVER__SECRET_KEY=${AIRFLOW_SECRET_KEY} + - AIRFLOW__API__AUTH_BACKENDS=airflow.api.auth.backend.basic_auth - KAFKA_HOST=kafka - KAFKA_PORT=29092 - BACKEND_URL=http://backend:5000 diff --git a/experiments/airflow/dags/surge_pricing_pipeline.py b/experiments/airflow/dags/surge_pricing_pipeline.py index b1d7c61..1a3b3d0 100644 --- a/experiments/airflow/dags/surge_pricing_pipeline.py +++ b/experiments/airflow/dags/surge_pricing_pipeline.py @@ -120,15 +120,31 @@ def apply_surge_pricing(**kwargs): # rename demand_score to demand for pricer compatibility data = product_features.rename(columns={'demand_score': 'demand'}) + high_thresh = dag_conf.get('high_threshold', 10) + low_thresh = dag_conf.get('low_threshold', 2) + surge_mult = dag_conf.get('surge_multiplier', 1.2) + discount_mult = dag_conf.get('discount_multiplier', 0.9) + + logging.info(f"Surge pricing config: high_thresh={high_thresh}, low_thresh={low_thresh}, surge_mult={surge_mult}, discount_mult={discount_mult}") + logging.info(f"Demand stats: min={data['demand'].min():.2f}, max={data['demand'].max():.2f}, mean={data['demand'].mean():.2f}") + logging.info(f"Products with high demand (>={high_thresh}): {(data['demand'] >= high_thresh).sum()}") + logging.info(f"Products with low demand (<={low_thresh}): {(data['demand'] <= low_thresh).sum()}") + surge_pricer = SimpleSurgePricer( - high_threshold=dag_conf.get('high_threshold', 10), - low_threshold=dag_conf.get('low_threshold', 2), - surge_multiplier=dag_conf.get('surge_multiplier', 1.2), - discount_multiplier=dag_conf.get('discount_multiplier', 0.9) + high_threshold=high_thresh, + low_threshold=low_thresh, + surge_multiplier=surge_mult, + discount_multiplier=discount_mult ) surge_pricer.fit(data) data['optimal_price'] = surge_pricer.predict() + base_avg = data['base_price'].mean() + optimal_avg = data['optimal_price'].mean() + price_change_pct = ((optimal_avg - base_avg) / base_avg) * 100 + + logging.info(f"Price adjustment: base_avg={base_avg:.2f}, optimal_avg={optimal_avg:.2f}, change={price_change_pct:+.1f}%") + prices_df = data[['productId', 'price', 'base_price', 'optimal_price', 'demand']].rename(columns={ 'price': 'current_price', 'demand': 'demand_score' diff --git a/experiments/procesing/pricers/simple.py b/experiments/procesing/pricers/simple.py index 6bdd1ca..1a03f9f 100644 --- a/experiments/procesing/pricers/simple.py +++ b/experiments/procesing/pricers/simple.py @@ -124,7 +124,8 @@ class SimpleSurgePricer(PricingFunction): if base is None: base = np.ones(len(demand)) * 99.99 - new_prices = base.copy() + # ensure float dtype to allow multiplication by float multipliers + new_prices = base.astype(np.float64).copy() high_mask = demand >= self.high_threshold new_prices[high_mask] *= self.surge_multiplier diff --git a/tests/e2e/helpers/kafka.ts b/tests/e2e/helpers/kafka.ts index c0a95dd..18b977d 100644 --- a/tests/e2e/helpers/kafka.ts +++ b/tests/e2e/helpers/kafka.ts @@ -9,8 +9,8 @@ interface InteractionEvent { const dumpKafkaTopic = async (backendUrl: string, topic: string) => { const resp = await fetch(`${backendUrl}/api/kafka/dump?topic=${topic}`); if (!resp.ok) throw new Error(`Kafka dump failed: ${resp.status}`); - const { messages = [] } = await resp.json(); - return messages as any[]; + const { data = [] } = await resp.json(); + return data as any[]; }; export const waitForInteractionEvent = async ( diff --git a/tests/e2e/playwright.config.ts b/tests/e2e/playwright.config.ts index 54a5561..dc3c815 100644 --- a/tests/e2e/playwright.config.ts +++ b/tests/e2e/playwright.config.ts @@ -5,14 +5,14 @@ export default defineConfig({ fullyParallel: true, forbidOnly: !!process.env.CI, retries: 0, - workers: 5, + workers: 1, reporter: 'list', use: { baseURL: process.env.WEB_URL || 'http://localhost:3000', trace: 'retain-on-failure', screenshot: 'only-on-failure', }, - timeout: 60000, + timeout: 180000, expect: { timeout: 10000, }, diff --git a/tests/e2e/scenarios/session-aware.spec.ts b/tests/e2e/scenarios/session-aware.spec.ts index b204984..5c27747 100644 --- a/tests/e2e/scenarios/session-aware.spec.ts +++ b/tests/e2e/scenarios/session-aware.spec.ts @@ -9,6 +9,7 @@ import { addToCart, } from '../helpers/interactions'; import { getSessionEvents } from '../helpers/kafka'; +import { runSessionPricing } from '../helpers/airflow'; test.describe('SessionAwarePricer E2E', () => { const STORE_TYPE = 'hotel'; @@ -23,6 +24,9 @@ test.describe('SessionAwarePricer E2E', () => { await page.waitForTimeout(1500); const productId2 = await humanLikeViewProduct(page, STORE_TYPE); + + await runSessionPricing(STORE_TYPE); + const secondPrice = await getPriceFromDOM(page); expect(await verifySessionConsistency(page, sessionId)).toBeTruthy(); @@ -40,11 +44,13 @@ test.describe('SessionAwarePricer E2E', () => { await rapidViewProductViaFlow(page, 8, 100, STORE_TYPE); expect(await verifySessionConsistency(page, sessionId)).toBeTruthy(); - await page.waitForTimeout(2500); + await page.waitForTimeout(1000); const events = await getSessionEvents(backendUrl, sessionId); expect(events.length).toBeGreaterThanOrEqual(8); + await runSessionPricing(STORE_TYPE); + await page.goto(`/products/${productId}`); await page.waitForLoadState('networkidle'); const agentPrice = await getPriceFromDOM(page); @@ -59,14 +65,12 @@ test.describe('SessionAwarePricer E2E', () => { const productId = await viewProductViaFlow(page, STORE_TYPE); const baselinePrice = await getPriceFromDOM(page); - const startTime = Date.now(); await rapidViewProductViaFlow(page, 10, 80, STORE_TYPE); - const duration = (Date.now() - startTime) / 1000; - const eventsPerSec = 10 / duration; - expect(eventsPerSec).toBeGreaterThan(2.0); + const events = await getSessionEvents(backendUrl, sessionId); + expect(events.length).toBeGreaterThanOrEqual(10); - await page.waitForTimeout(2000); + await runSessionPricing(STORE_TYPE); await page.goto(`/products/${productId}`); await page.waitForLoadState('networkidle'); @@ -105,8 +109,11 @@ test.describe('SessionAwarePricer E2E', () => { await rapidViewProductViaFlow(page, 2, 150, STORE_TYPE); - await page.waitForTimeout(1500); + await page.waitForTimeout(1000); await humanLikeViewProduct(page, STORE_TYPE); + + await runSessionPricing(STORE_TYPE); + const finalPrice = await getPriceFromDOM(page); expect(Math.abs(finalPrice - baselinePrice) / baselinePrice).toBeLessThan(0.3); diff --git a/tests/e2e/scenarios/surge-pricing.spec.ts b/tests/e2e/scenarios/surge-pricing.spec.ts index e3e2f8d..26d29d3 100644 --- a/tests/e2e/scenarios/surge-pricing.spec.ts +++ b/tests/e2e/scenarios/surge-pricing.spec.ts @@ -7,6 +7,7 @@ import { verifySessionConsistency, } from '../helpers/interactions'; import { waitForInteractionEvent, countProductViews } from '../helpers/kafka'; +import { runSurgePricing } from '../helpers/airflow'; test.describe('SimpleSurgePricer E2E', () => { const STORE_TYPE = 'hotel'; @@ -29,7 +30,7 @@ test.describe('SimpleSurgePricer E2E', () => { await rapidViewProductViaFlow(page, 5, 200, STORE_TYPE); - await page.waitForTimeout(2000); + await page.waitForTimeout(1000); const evt = await waitForInteractionEvent(backendUrl, sessionId, 'view_item_page'); expect(evt).not.toBeNull(); @@ -37,6 +38,8 @@ test.describe('SimpleSurgePricer E2E', () => { const viewCount = await countProductViews(backendUrl, productId); expect(viewCount).toBeGreaterThanOrEqual(5); + await runSurgePricing(STORE_TYPE, 3, 1); + await page.goto(`/products/${productId}`); await page.waitForLoadState('networkidle'); const surgedPrice = await getPriceFromDOM(page); @@ -72,7 +75,9 @@ test.describe('SimpleSurgePricer E2E', () => { await rapidViewProductViaFlow(page, 5, 150, STORE_TYPE); - await page.waitForTimeout(1500); + await page.waitForTimeout(1000); + + await runSurgePricing(STORE_TYPE, 3, 1); await page.goto(`/products/${productId}`); await page.waitForLoadState('networkidle'); @@ -81,6 +86,8 @@ test.describe('SimpleSurgePricer E2E', () => { await page.waitForTimeout(12000); + await runSurgePricing(STORE_TYPE, 3, 1); + await page.goto(`/products/${productId}`); await page.waitForLoadState('networkidle'); const decayedPrice = await getPriceFromDOM(page); From e89cb263d49375f0b0a628810d41c86617ae5386 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Mon, 12 Jan 2026 20:59:09 +0100 Subject: [PATCH 13/35] planning --- .../airflow/dags/surge_pricing_factory.py | 10 +++++++ experiments/procesing/pricers/base.py | 29 +++++++++---------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/experiments/airflow/dags/surge_pricing_factory.py b/experiments/airflow/dags/surge_pricing_factory.py index a886d5b..b61e65c 100644 --- a/experiments/airflow/dags/surge_pricing_factory.py +++ b/experiments/airflow/dags/surge_pricing_factory.py @@ -1,3 +1,4 @@ +from pandas.core.algorithms import factorize_array from airflow import DAG from airflow.operators.python import PythonOperator from airflow.utils.dates import days_ago @@ -208,3 +209,12 @@ def create_surge_pricing_dag(store_mode: str) -> DAG: # instantiate DAGs for Airflow to discover dag_airline = create_surge_pricing_dag('airline') dag_hotel = create_surge_pricing_dag('hotel') + +# TODO: Refactor this factory from a surge pricing factory to a general pricing factory +# We will do this by passing a pricing strategy class to the factory, since the generic pipeline is: +# take all interaction data, group by sessionId and assign a new price vector to each session +# in the grouping we get a subset of the interactions per sessionId and we can map that to some Features +# we define a custom _get_features(interactions .) methodin the strategy class +# we then run only the inference which is the .predict(trajectory) per-session which will give us a new price vector +# this we then publish for each sessionId group +# this might include no deleting most of the pricers we have defined and starting with a super simple surge-pricing algorithm that is no-fit only predict. This we can then test end-to-end and observe changes to prices according to a desired strategy - we have to define this one as a very short term strategy because we run sessions that take only a few minutes. diff --git a/experiments/procesing/pricers/base.py b/experiments/procesing/pricers/base.py index 6569556..ecaabed 100644 --- a/experiments/procesing/pricers/base.py +++ b/experiments/procesing/pricers/base.py @@ -7,15 +7,6 @@ import pandas as pd class PricingFunction(ABC): """ Abstract base for pricing functions. - - Defines mapping: f(Q_t, P_t, S_t, H_t) -> P_{t+1} - - Where: - Q_t ∈ R^n: demand vector at time t - P_t ∈ R^n: price vector at time t - S_t: session features (behavioral signals, interactions) - H_t = {Q_{t-k}, P_{t-k}, S_{t-k}}: historical state trajectory - Objective: maximize E[R_T] = E[Σ P_t^T · Q_t] subject to: @@ -28,10 +19,10 @@ class PricingFunction(ABC): def fit(self, *kwargs): """ Offline training on historical data. + This is where we can think about some maximization of expected revenue + over historical trajectories to learn parameters of the pricing function. + (This however we cover move in the RL side of things) - Args: - historical_data: DataFrame with elasticity, prices, demand signals - **kwargs: additional training parameters """ pass @@ -39,12 +30,18 @@ class PricingFunction(ABC): def predict(self, *kwargs) -> np.ndarray: """ Generate optimal prices given current state. + This is an abstract method that transitions from τ -> P* + which is the mapping from the trajectory to optimal prices under + some subset of session grouping (so, per sessionId) + """ + pass - Args: - state_space: StateSpace object containing Q_t, P_t, S_t, H_t - + @abstractmethod + def _get_features(self, *kwargs) -> np.ndarray: + """ + Extract features from trajectory for pricing decision. Returns: - P_{t+1}: price vector in R^n + np.ndarray of shape (n_products, n_features) """ pass From 3c141a4b6c5d13bc4078807c381082fbc7ce625d Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Mon, 12 Jan 2026 22:33:47 +0100 Subject: [PATCH 14/35] chore: better test consistency before agnet --- docker-compose.yml | 20 ++++++++-- experiments/procesing/pricers/elasticity.py | 10 +++++ .../procesing/pricers/session_aware.py | 39 +++++++++++++++++++ experiments/procesing/pricers/simple.py | 23 +++++++++++ 4 files changed, 89 insertions(+), 3 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 561c393..ba2e8a3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -112,11 +112,14 @@ services: depends_on: - postgres environment: - - AIRFLOW__CORE__EXECUTOR=SequentialExecutor + - AIRFLOW__CORE__EXECUTOR=LocalExecutor - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow - AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY} - AIRFLOW__CORE__LOAD_EXAMPLES=false - AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true + - AIRFLOW__CORE__PARALLELISM=16 + - AIRFLOW__CORE__DAG_CONCURRENCY=8 + - AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG=4 - _AIRFLOW_DB_MIGRATE=true - _AIRFLOW_WWW_USER_CREATE=true - _AIRFLOW_WWW_USER_USERNAME=admin @@ -136,12 +139,17 @@ services: - airflow-init - redis environment: - - AIRFLOW__CORE__EXECUTOR=SequentialExecutor + - AIRFLOW__CORE__EXECUTOR=LocalExecutor - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow - AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY} - AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION=true - AIRFLOW__CORE__LOAD_EXAMPLES=false - AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true + - AIRFLOW__CORE__PARALLELISM=16 + - AIRFLOW__CORE__DAG_CONCURRENCY=8 + - AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG=4 + - AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL=30 + - AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL=60 - AIRFLOW__WEBSERVER__EXPOSE_CONFIG=true - AIRFLOW__WEBSERVER__SECRET_KEY=${AIRFLOW_SECRET_KEY} - AIRFLOW__API__AUTH_BACKENDS=airflow.api.auth.backend.basic_auth @@ -174,12 +182,18 @@ services: redis: condition: service_started environment: - - AIRFLOW__CORE__EXECUTOR=SequentialExecutor + - AIRFLOW__CORE__EXECUTOR=LocalExecutor - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow - AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY} - AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION=true - AIRFLOW__CORE__LOAD_EXAMPLES=false - AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true + - AIRFLOW__CORE__PARALLELISM=16 + - AIRFLOW__CORE__DAG_CONCURRENCY=8 + - AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG=4 + - AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL=30 + - AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL=60 + - AIRFLOW__SCHEDULER__PARSING_PROCESSES=2 - AIRFLOW__WEBSERVER__SECRET_KEY=${AIRFLOW_SECRET_KEY} - AIRFLOW__API__AUTH_BACKENDS=airflow.api.auth.backend.basic_auth - KAFKA_HOST=kafka diff --git a/experiments/procesing/pricers/elasticity.py b/experiments/procesing/pricers/elasticity.py index b203159..3ce3b42 100644 --- a/experiments/procesing/pricers/elasticity.py +++ b/experiments/procesing/pricers/elasticity.py @@ -57,3 +57,13 @@ class ElasticityBasedPricer(PricingFunction): # enforce bounds prices = np.clip(prices, self.price_floor, self.price_ceil) return prices + + def _get_features(self, state_space=None) -> np.ndarray: + """Extract elasticity, demand, and demand deviation for each product""" + if state_space is None or self.elasticity is None: + n = len(self.elasticity) if self.elasticity is not None else 0 + return np.zeros((n, 3)) + + demand = np.asarray(state_space.demand) + demand_dev = (demand - self.mean_demand) / (self.mean_demand + 1e-6) + return np.column_stack([self.elasticity, demand, demand_dev]) diff --git a/experiments/procesing/pricers/session_aware.py b/experiments/procesing/pricers/session_aware.py index 40343a7..dbc859f 100644 --- a/experiments/procesing/pricers/session_aware.py +++ b/experiments/procesing/pricers/session_aware.py @@ -107,6 +107,36 @@ class SessionAwarePricer(PricingFunction): return prices + def _get_features(self, state_space=None) -> np.ndarray: + """Extract elasticity, demand, and session features""" + if state_space is None or self.elasticity is None: + n = len(self.elasticity) if self.elasticity is not None else 0 + return np.zeros((n, 5)) + + demand = np.asarray(state_space.demand) + n_products = len(demand) + + # extract session features + velocity = 0.0 + view_depth = 0.0 + cart_to_view = 0.0 + + if not state_space.session_features.empty: + sf = state_space.session_features.iloc[0] + velocity = sf.get('interaction_velocity', 0.0) + view_depth = sf.get('product_view_depth', 0.0) + cart_to_view = sf.get('cart_to_view_ratio', 0.0) + + # broadcast session features to all products + features = np.column_stack([ + self.elasticity, + demand, + np.full(n_products, velocity), + np.full(n_products, view_depth), + np.full(n_products, cart_to_view) + ]) + return features + class ProductSpecificSessionPricer(PricingFunction): """ @@ -170,3 +200,12 @@ class ProductSpecificSessionPricer(PricingFunction): prices = np.clip(base_prices, self.price_floor, self.price_ceil) return prices + + def _get_features(self, state_space=None) -> np.ndarray: + """Extract elasticity and demand features for product-specific pricing""" + if state_space is None or self.elasticity is None: + n = len(self.elasticity) if self.elasticity is not None else 0 + return np.zeros((n, 2)) + + demand = np.asarray(state_space.demand) + return np.column_stack([self.elasticity, demand]) diff --git a/experiments/procesing/pricers/simple.py b/experiments/procesing/pricers/simple.py index 1a03f9f..d7fa699 100644 --- a/experiments/procesing/pricers/simple.py +++ b/experiments/procesing/pricers/simple.py @@ -65,6 +65,11 @@ class StaticPricer(PricingFunction): raise ValueError("Must call fit() or provide base_prices in constructor") return self.base_prices.copy() + def _get_features(self, state_space=None) -> np.ndarray: + """Static pricer uses no features, returns empty array""" + n = len(self.base_prices) if self.base_prices is not None else 0 + return np.zeros((n, 0)) + class RandomPricer(PricingFunction): """Random pricing within bounds (for baseline comparison)""" @@ -87,6 +92,11 @@ class RandomPricer(PricingFunction): self.n_products = len(state_space.demand) return self.rng.uniform(self.price_min, self.price_max, size=self.n_products) + def _get_features(self, state_space=None) -> np.ndarray: + """Random pricer uses no features""" + n = self.n_products if self.n_products else 0 + return np.zeros((n, 0)) + class SimpleSurgePricer(PricingFunction): """ @@ -133,3 +143,16 @@ class SimpleSurgePricer(PricingFunction): new_prices[low_mask] *= self.discount_multiplier return new_prices + + def _get_features(self, state_space=None) -> np.ndarray: + """Extract demand and base price features for each product""" + if state_space is None: + n = len(self.base_prices) if self.base_prices is not None else 0 + return np.zeros((n, 2)) + + demand = np.asarray(state_space.demand) if hasattr(state_space, 'demand') else np.array([0]) + base = np.asarray(state_space.prices) if hasattr(state_space, 'prices') else self.base_prices + if base is None: + base = np.ones(len(demand)) * 99.99 + + return np.column_stack([demand, base]) From 4c368d48f2a489595a9c8bc0375f348568305782 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Tue, 13 Jan 2026 15:05:33 +0100 Subject: [PATCH 15/35] chore: fixing visual bugs in cart --- web/src/app/cart/page.tsx | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/web/src/app/cart/page.tsx b/web/src/app/cart/page.tsx index 30ac3f2..dbcb30b 100644 --- a/web/src/app/cart/page.tsx +++ b/web/src/app/cart/page.tsx @@ -32,7 +32,8 @@ export default function CartPage() { {itemCount > 0 && ( @@ -42,7 +43,7 @@ export default function CartPage() { {itemCount === 0 ? ( ) : ( <> @@ -54,15 +55,11 @@ export default function CartPage() { > - - {item.type} -{item.type === 'hotel' && ({item.name}
-@@ -81,7 +78,8 @@ export default function CartPage() {{String(item.metadata.roomType)}
{String(item.metadata.checkIn)} - {String(item.metadata.checkOut)}
{String(item.metadata.nights)} night{Number(item.metadata.nights) > 1 ? 's' : ''}
${item.price}
@@ -100,7 +98,7 @@ export default function CartPage() { dispatchInteraction('checkout_start', undefined, { total, itemCount }); window.location.href = '/checkout'; }} - className="w-full py-3 bg-blue-600 hover:bg-blue-700 text-white rounded-lg font-medium transition-colors" + className="btn-primary w-full" > Proceed to Checkout From 61dd621532fbe91eb5afccf01fbb87488e55978a Mon Sep 17 00:00:00 2001 From: Daniel RoselDate: Tue, 13 Jan 2026 15:09:52 +0100 Subject: [PATCH 16/35] chore: styling and title updates --- web/src/app/globals.css | 3 +++ web/src/app/layout.tsx | 4 ++-- web/src/components/ui/Navigation.tsx | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/web/src/app/globals.css b/web/src/app/globals.css index 4a5b0c9..457b974 100644 --- a/web/src/app/globals.css +++ b/web/src/app/globals.css @@ -8,6 +8,9 @@ --bg-secondary: #f5f5f5; --text-primary: #333333; --text-secondary: #666666; + --accent-primary: #007aff; + --accent-primary-hover: #0051d5; + --accent-primary-light: #e6f2ff; --spacing-sm: 8px; --spacing-md: 16px; --spacing-lg: 32px; diff --git a/web/src/app/layout.tsx b/web/src/app/layout.tsx index e9f9b63..5ff49ae 100644 --- a/web/src/app/layout.tsx +++ b/web/src/app/layout.tsx @@ -15,8 +15,8 @@ const geistMono = Geist_Mono({ }); export const metadata: Metadata = { - title: "Create Next App", - description: "Generated by create next app", + title: "Travel Booking Platform", + description: "Book flights and hotels with dynamic pricing", }; export default function RootLayout({ diff --git a/web/src/components/ui/Navigation.tsx b/web/src/components/ui/Navigation.tsx index 9d9d4cf..6f0ecbb 100644 --- a/web/src/components/ui/Navigation.tsx +++ b/web/src/components/ui/Navigation.tsx @@ -20,7 +20,7 @@ const NavLink = ({ href, children }: { href: string; children: React.ReactNode } href={href} className={`px-4 py-2 rounded-md transition-colors ${ isActive - ? 'bg-[var(--accent-primary)] font-semibold' + ? 'bg-[var(--accent-primary)] text-white font-semibold' : 'hover:bg-[var(--accent-primary-light)] text-[var(--text-primary)]' }`} > From eb9506038096497a0377636ce082f1be2f9e6840 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Alves=20R=C3=B6sel?= <60182044+velocitatem@users.noreply.github.com> Date: Tue, 13 Jan 2026 15:35:27 +0100 Subject: [PATCH 17/35] Pre run web refactors (#43) * chore: refactor date utilities * feat: improve images of hotel rooms * fix: adding date utils --- web/src/components/feats/hotel/HotelCard.tsx | 5 +- .../components/feats/hotel/HotelDetails.tsx | 5 +- web/src/lib/airline-utils.ts | 24 +-------- web/src/lib/date-utils.ts | 23 ++++++++ web/src/lib/hotel-utils.ts | 52 +++++++++++-------- 5 files changed, 60 insertions(+), 49 deletions(-) create mode 100644 web/src/lib/date-utils.ts diff --git a/web/src/components/feats/hotel/HotelCard.tsx b/web/src/components/feats/hotel/HotelCard.tsx index 5bf234d..847e1b2 100644 --- a/web/src/components/feats/hotel/HotelCard.tsx +++ b/web/src/components/feats/hotel/HotelCard.tsx @@ -2,6 +2,7 @@ import type { EventName } from '@/lib/events'; import type { Hotel } from '@/lib/hotel-utils'; +import { getHotelImageUrl } from '@/lib/hotel-utils'; import { useHoverTracking } from '@/hooks/useHoverTracking'; import PriceDisplay from '@/components/ui/PriceDisplay'; @@ -47,8 +48,6 @@ export default function HotelCard({ hotel }: { hotel: Hotel }) { window.location.href = `/hotel/products/${hotel.id}`; }; - const imageUrl = `https://images.unsplash.com/photo-1551882547-ff40c63fe5fa?w=400&h=300&fit=crop`; - return ( { diff --git a/web/src/components/feats/hotel/HotelDetails.tsx b/web/src/components/feats/hotel/HotelDetails.tsx index 6cdbbdd..030769f 100644 --- a/web/src/components/feats/hotel/HotelDetails.tsx +++ b/web/src/components/feats/hotel/HotelDetails.tsx @@ -2,6 +2,7 @@ import { useState, useEffect } from 'react'; import type { Hotel } from '@/lib/hotel-utils'; +import { getHotelImageUrl } from '@/lib/hotel-utils'; import PriceDisplay from '@/components/ui/PriceDisplay'; interface HotelDetailsProps { @@ -43,13 +44,11 @@ const PriceTotalDisplay = ({ productId, nights }: { productId: string; nights: n }; export default function HotelDetails({ product, onAddToCart, addedToCart }: HotelDetailsProps) { - const imageUrl = `https://images.unsplash.com/photo-1566073771259-6a8506099945?w=800&h=600&fit=crop`; - return (
{ diff --git a/web/src/lib/airline-utils.ts b/web/src/lib/airline-utils.ts index 74a1916..b801e14 100644 --- a/web/src/lib/airline-utils.ts +++ b/web/src/lib/airline-utils.ts @@ -31,7 +31,7 @@ export interface Flight { availability: number; } -const EPOCH = new Date(0); +import { dateToDaysFromToday, dateToIndex, todayIndex } from './date-utils'; export const transformProduct = (p: AirlineProduct): Flight => { const { id, flight_type, date_index, metadata, availability } = p; @@ -52,24 +52,4 @@ export const transformProduct = (p: AirlineProduct): Flight => { }; }; -// convert date string to days from today -export const dateToDaysFromToday = (dateStr: string): number => { - const target = new Date(dateStr); - target.setHours(0, 0, 0, 0); - const today = new Date(); - today.setHours(0, 0, 0, 0); - return Math.floor((target.getTime() - today.getTime()) / 86400000); -}; - -// convert date string to date_index (days since epoch) -export const dateToIndex = (dateStr: string): number => { - const d = new Date(dateStr); - return Math.floor((d.getTime() - EPOCH.getTime()) / 86400000); -}; - -// get current date_index -export const todayIndex = (): number => { - const now = new Date(); - now.setHours(0, 0, 0, 0); - return Math.floor((now.getTime() - EPOCH.getTime()) / 86400000); -}; +export { dateToDaysFromToday, dateToIndex, todayIndex }; diff --git a/web/src/lib/date-utils.ts b/web/src/lib/date-utils.ts new file mode 100644 index 0000000..bad1a90 --- /dev/null +++ b/web/src/lib/date-utils.ts @@ -0,0 +1,23 @@ +const EPOCH = new Date(0); +const MS_PER_DAY = 86400000; + +export const dateToDaysFromToday = (dateStr: string): number => { + const target = new Date(dateStr); + target.setHours(0, 0, 0, 0); + const today = new Date(); + today.setHours(0, 0, 0, 0); + return Math.floor((target.getTime() - today.getTime()) / MS_PER_DAY); +}; + +export const dateToIndex = (dateStr: string): number => { + const d = new Date(dateStr); + return Math.floor((d.getTime() - EPOCH.getTime()) / MS_PER_DAY); +}; + +export const todayIndex = (): number => { + const now = new Date(); + now.setHours(0, 0, 0, 0); + return Math.floor((now.getTime() - EPOCH.getTime()) / MS_PER_DAY); +}; + +export { EPOCH, MS_PER_DAY }; diff --git a/web/src/lib/hotel-utils.ts b/web/src/lib/hotel-utils.ts index b59994a..e5ba5c2 100644 --- a/web/src/lib/hotel-utils.ts +++ b/web/src/lib/hotel-utils.ts @@ -25,7 +25,7 @@ export interface Hotel { nights: number; } -const EPOCH = new Date(0); +import { EPOCH, MS_PER_DAY, dateToDaysFromToday, dateToIndex, todayIndex } from './date-utils'; export const transformProduct = (p: HotelProduct): Hotel => { const { id, room_type, date_index, metadata } = p; @@ -37,14 +37,14 @@ export const transformProduct = (p: HotelProduct): Hotel => { // legacy: treat as offset from today const today = new Date(); today.setHours(0, 0, 0, 0); - checkIn = new Date(today.getTime() + date_index * 86400000); + checkIn = new Date(today.getTime() + date_index * MS_PER_DAY); } else { // proper: days since epoch - checkIn = new Date(EPOCH.getTime() + date_index * 86400000); + checkIn = new Date(EPOCH.getTime() + date_index * MS_PER_DAY); } const nights = 1; - const checkOut = new Date(checkIn.getTime() + nights * 86400000); + const checkOut = new Date(checkIn.getTime() + nights * MS_PER_DAY); const formatOpts: Intl.DateTimeFormatOptions = { month: 'short', @@ -65,24 +65,34 @@ export const transformProduct = (p: HotelProduct): Hotel => { }; }; -// convert date string to days from today -export const dateToDaysFromToday = (dateStr: string): number => { - const target = new Date(dateStr); - target.setHours(0, 0, 0, 0); - const today = new Date(); - today.setHours(0, 0, 0, 0); - return Math.floor((target.getTime() - today.getTime()) / 86400000); +const hotelImagePool = [ + 'photo-1566073771259-6a8506099945', + 'photo-1551882547-ff40c63fe5fa', + 'photo-1590490360182-c33d57733427', + 'photo-1582719478250-c89cae4dc85b', + 'photo-1596701062351-8c2c14d1fdd0', + 'photo-1631049307264-da0ec9d70304', + 'photo-1578683010236-d716f9a3f461', + 'photo-1540518614846-7eded433c457', + 'photo-1505693416388-ac5ce068fe85', + 'photo-1522771739844-6a9f6d5f14af', + 'photo-1562438668-bcf0ca6578f0', + 'photo-1595576508898-0ad5c879a061', +]; + +const hashString = (s: string): number => { + let h = 0; + for (let i = 0; i < s.length; i++) { + h = ((h << 5) - h) + s.charCodeAt(i); + h = h & h; + } + return Math.abs(h); }; -// convert date string to date_index (days since epoch) -export const dateToIndex = (dateStr: string): number => { - const d = new Date(dateStr); - return Math.floor((d.getTime() - EPOCH.getTime()) / 86400000); +export const getHotelImageUrl = (hotelId: string, size: { w: number; h: number } = { w: 400, h: 300 }): string => { + const idx = hashString(hotelId) % hotelImagePool.length; + const photoId = hotelImagePool[idx]; + return `https://images.unsplash.com/${photoId}?w=${size.w}&h=${size.h}&fit=crop`; }; -// get current date_index -export const todayIndex = (): number => { - const now = new Date(); - now.setHours(0, 0, 0, 0); - return Math.floor((now.getTime() - EPOCH.getTime()) / 86400000); -}; +export { dateToDaysFromToday, dateToIndex, todayIndex }; From 7c330a19c698340341131561934653dc9e109d33 Mon Sep 17 00:00:00 2001 From: Daniel Rosel
Date: Tue, 13 Jan 2026 15:36:20 +0100 Subject: [PATCH 18/35] feat: added a runner script for agent orchestration --- experiments/agents/run.py | 117 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 experiments/agents/run.py diff --git a/experiments/agents/run.py b/experiments/agents/run.py new file mode 100644 index 0000000..823c3d9 --- /dev/null +++ b/experiments/agents/run.py @@ -0,0 +1,117 @@ +from supabase import create_client, Client +import os +import random +import asyncio +import json +from dotenv import load_dotenv + +from experiments.agents.agent import get_agent, AgentTypes +from lib.kafka_client import get_interactions + +load_dotenv() + +RESULTS="/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/" + +client = create_client( + os.getenv("NEXT_PUBLIC_SUPABASE_URL"), + os.getenv("NEXT_PUBLIC_SUPABASE_ANON_KEY") +) +def pick_random_task(): + mode = 'hotel' + tasks = client.table("tasks").select("*").execute().data + if mode == 'hotel': + # drop all that have 'flight' in the description + tasks = [task for task in tasks if 'flight' not in task['task_description'].lower()] + return random.choice(tasks) if tasks else None + +def clear_kafka_data(): + """Delete and recreate Kafka topics to clear all data""" + from kafka.admin import KafkaAdminClient, NewTopic + from kafka.errors import UnknownTopicOrPartitionError + import time + + kafka_host = os.getenv('KAFKA_HOST', 'localhost') + kafka_port = os.getenv('KAFKA_PORT', '9092') + broker = f'{kafka_host}:{kafka_port}' + + admin = KafkaAdminClient(bootstrap_servers=broker) + topics = ['user-interactions', 'price-logs'] + + try: + admin.delete_topics(topics, timeout_ms=5000) + print(f"Deleted topics: {topics}") + time.sleep(2) + except UnknownTopicOrPartitionError: + print("Topics don't exist, skipping delete") + except Exception as e: + print(f"Error deleting topics: {e}") + + new_topics = [ + NewTopic(name='user-interactions', num_partitions=3, replication_factor=1), + NewTopic(name='price-logs', num_partitions=3, replication_factor=1) + ] + + try: + admin.create_topics(new_topics=new_topics, validate_only=False) + print(f"Recreated topics: {topics}") + except Exception as e: + print(f"Error creating topics: {e}") + finally: + admin.close() + +def create_new_experiment(task_id): + import uuid + subject_name = f"agent_{str(uuid.uuid4())[:8]}" + experiment = { + "subject_name": subject_name, + "xp_human_only": False, + "xp_market_mode": "hotel", + "xp_task_id": task_id, + } + response = client.table("experiments").insert(experiment).execute() + return response.data[0] if response.data else None + +if __name__ == "__main__": + clear_kafka_data() + + task = pick_random_task() + if not task: + print("No tasks available") + exit(1) + + experiment = create_new_experiment(task['id']) + exp_id = experiment['id'] + exp_dir = f"{RESULTS}{exp_id}" + os.makedirs(exp_dir, exist_ok=True) + + # construct experiment URL with uuid param + base_url = os.getenv('NEXT_PUBLIC_API_BASE', 'http://localhost:3000') + agent_url = f"{base_url}/start-task?uuid={exp_id}" + + print(f"Created experiment {exp_id} for task {task['id']}") + print(f"Agent will interact with: {agent_url}") + + # instantiate and run agent + agent = get_agent( + AgentTypes.GENERIC_BROWSER_USE_AGENT, + goal=task['task_description'], + url=agent_url, + timeout=300, + headless=True + ) + + result = asyncio.run(agent.act()) + print(f"Agent result: {result}") + + # export interaction and price data from kafka + interactions = get_interactions(topic='user-interactions', timeout_ms=3000) + prices = get_interactions(topic='price-logs', timeout_ms=3000) + + with open(f"{exp_dir}/int.json", 'w') as f: + json.dump(interactions, f, indent=2) + + with open(f"{exp_dir}/price.json", 'w') as f: + json.dump(prices, f, indent=2) + + print(f"Experiment {exp_id} completed.") + print(f"Exported {len(interactions)} interactions and {len(prices)} price logs to {exp_dir}") From 9cb2b0fc4431f1a10af457d8fc17e1bb6e706032 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Tue, 13 Jan 2026 15:37:06 +0100 Subject: [PATCH 19/35] feat: forgot airflow helper staging --- tests/e2e/helpers/airflow.ts | 61 ++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 tests/e2e/helpers/airflow.ts diff --git a/tests/e2e/helpers/airflow.ts b/tests/e2e/helpers/airflow.ts new file mode 100644 index 0000000..82d4a75 --- /dev/null +++ b/tests/e2e/helpers/airflow.ts @@ -0,0 +1,61 @@ +const AIRFLOW_URL = process.env.AIRFLOW_URL || 'http://localhost:8085'; +const AUTH = 'Basic ' + Buffer.from(`${process.env.AIRFLOW_USER || 'admin'}:${process.env.AIRFLOW_PASS || 'admin'}`).toString('base64'); + +const req = (path: string, opts: any = {}) => { + const headers = { Authorization: AUTH, ...opts.headers }; + return fetch(`${AIRFLOW_URL}${path}`, { ...opts, headers }); +}; + +export const triggerDag = async (dagId: string, conf = {}) => { + const r = await req(`/api/v1/dags/${dagId}/dagRuns`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ conf }), + }); + if (!r.ok) throw new Error(`Trigger DAG failed: ${r.status}`); + return (await r.json()).dag_run_id; +}; + +export const getDagStatus = async (dagId: string, runId: string) => { + const r = await req(`/api/v1/dags/${dagId}/dagRuns/${runId}`); + if (!r.ok) throw new Error(`Get status failed: ${r.status}`); + return (await r.json()).state; +}; + +export const cancelDag = async (dagId: string, runId: string) => { + const r = await req(`/api/v1/dags/${dagId}/dagRuns/${runId}`, { + method: 'PATCH', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ state: 'failed' }), + }); + if (!r.ok) console.warn(`Failed to cancel DAG ${runId}: ${r.status}`); +}; + +export const waitForDag = async (dagId: string, runId: string, maxMs = 30000, pollMs = 1000) => { + const t0 = Date.now(); + while (Date.now() - t0 < maxMs) { + const state = await getDagStatus(dagId, runId); + if (state === 'success') return; + if (state === 'failed') throw new Error(`DAG ${runId} failed`); + await new Promise(r => setTimeout(r, pollMs)); + } + await cancelDag(dagId, runId); + throw new Error(`DAG ${runId} timeout`); +}; + +export const runDag = async (dagId: string, conf = {}, maxMs = 60000) => { + const runId = await triggerDag(dagId, conf); + await waitForDag(dagId, runId, maxMs); +}; + +export const runSessionPricing = (mode = 'hotel') => + runDag('session_pricing_pipeline', { store_mode: mode, session_limit: 10 }, 90000); + +export const runSurgePricing = (mode = 'hotel', highThresh = 10, lowThresh = 2) => + runDag('surge_pricing_pipeline', { + store_mode: mode, + high_threshold: highThresh, + low_threshold: lowThresh, + surge_multiplier: 1.2, + discount_multiplier: 0.9 + }, 90000); From af23d2f736a854fe2f835addc3112c31d3d38e30 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Tue, 13 Jan 2026 15:57:05 +0100 Subject: [PATCH 20/35] feat: introduction of agentinc MDPs and KL divergence of > 2 --- sim/rl/behavior_loader/loader.py | 20 +++++++ sim/rl/behavior_loader/models.py | 89 ++++++++++++++++++++++++++++---- 2 files changed, 98 insertions(+), 11 deletions(-) diff --git a/sim/rl/behavior_loader/loader.py b/sim/rl/behavior_loader/loader.py index 99a1541..bd18442 100644 --- a/sim/rl/behavior_loader/loader.py +++ b/sim/rl/behavior_loader/loader.py @@ -56,7 +56,27 @@ class Loader: def get_entries(self) -> tuple[list[str], int]: return self.entries, len(self.entries) +class AgentLoader(Loader): + """Loader for agent interaction data with simplified schema (direct PayloadModel format)""" + + def _is_admin_page_simple(self, interaction: PayloadModel) -> bool: + return interaction.page and interaction.page.startswith("/admin/") + + def _load_sessions(self) -> dict: + sessions = {} + for entry in self.entries: + int_path = f"{self.src_dir}/{entry}/int.json" + raw = json.load(open(int_path)) + ints = [PayloadModel(**i) for i in raw] + sessions[entry] = [i for i in ints if not self._is_admin_page_simple(i)] + return sessions + if __name__ == "__main__": + DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/" + loader = AgentLoader(DIR) + _, n = loader.get_entries() + print(f"Loaded {n} sessions from {DIR}") + DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/" loader = Loader(DIR) _, n = loader.get_entries() diff --git a/sim/rl/behavior_loader/models.py b/sim/rl/behavior_loader/models.py index bce2429..7254606 100644 --- a/sim/rl/behavior_loader/models.py +++ b/sim/rl/behavior_loader/models.py @@ -1,10 +1,12 @@ -from loader import Loader +from experiments.agents.base import Agent +from loader import Loader, AgentLoader from collections import defaultdict from typing import Dict, List, Tuple, Set import numpy as np import graphviz DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/" +AGENT_DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/" class BehaviorModel: def __init__(self, src_dir: str = DIR): @@ -85,13 +87,32 @@ class BehaviorModel: path.append(curr) return path -def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = "mdp_graph", fmt: str = "svg", view: bool = False, export_dot: bool = False): - """visualize MDP as directed graph using graphviz, aggregated by event type""" - if not model.mdp: raise ValueError("build MDP first") +class AgentBehaviorModel(BehaviorModel): + """behavior model for agent interaction data (simplified PayloadModel schema)""" - # aggregate transitions by event type + def __init__(self, src_dir: str = AGENT_DIR): + self.loader = AgentLoader(src_dir) + self.data = self.loader.get_data() + self.entries, self.num_entries = self.loader.get_entries() + self.mdp = None + + def _state_repr(self, evt) -> str: + # direct access to PayloadModel fields (no .value.payload nesting) + return f"{evt.page or 'unk'}|{evt.productId or 'none'}|{evt.eventName}" + + def _extract_sessions(self): + trajectories = [] + for sid, evts in self.data.items(): + if len(evts) < 2: continue + # sort by timestamp string (ISO format sorts lexicographically) + states = [self._state_repr(e) for e in sorted(evts, key=lambda x: x.ts)] + trajectories.append(states) + return trajectories + +def aggregate_event_transitions(mdp: Dict) -> Dict[str, Dict[str, float]]: + """aggregate state transitions by event type and normalize""" evt_trans = defaultdict(lambda: defaultdict(float)) - for s, trans in model.mdp['transitions'].items(): + for s, trans in mdp['transitions'].items(): evt_src = s.split('|')[2] for s_next, prob in trans.items(): evt_dst = s_next.split('|')[2] @@ -103,6 +124,13 @@ def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = " if total > 0: for evt_dst in evt_trans[evt_src]: evt_trans[evt_src][evt_dst] /= total + return dict(evt_trans) + +def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = "mdp_graph", fmt: str = "svg", view: bool = False, export_dot: bool = False): + """visualize MDP as directed graph using graphviz, aggregated by event type""" + if not model.mdp: raise ValueError("build MDP first") + + evt_trans = aggregate_event_transitions(model.mdp) g = graphviz.Digraph(format=fmt) g.attr(rankdir='LR', size='30') @@ -134,11 +162,50 @@ def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = " return g + +def kl_divergence(p: Dict[str, float], q: Dict[str, float]) -> float: + """Compute KL divergence D_KL(P || Q) for discrete distributions P and Q.""" + epsilon = 1e-10 # small constant to avoid log(0) + kl_div = 0.0 + for key in p: + p_val = p[key] + epsilon + q_val = q.get(key, 0.0) + epsilon + kl_div += p_val * np.log(p_val / q_val) + return kl_div + if __name__ == "__main__": - model = BehaviorModel(DIR) - mdp = model.build_MDP() - print(f"Built MDP: {mdp['num_states']} states, {sum(len(t) for t in mdp['transitions'].values())} transitions") - if not mdp['states']: + human_model = BehaviorModel(DIR) + human_mdp = human_model.build_MDP() + print(f"Built MDP: {human_mdp['num_states']} states, {sum(len(t) for t in human_mdp['transitions'].values())} transitions") + if not human_mdp['states']: print("No states found") exit(1) - visualize_mdp(model, threshold=0.05, output="mdp_viz", fmt="pdf", export_dot=True) + visualize_mdp(human_model, threshold=0.05, output="human_mdp_viz", fmt="pdf", export_dot=True) + + agent_model = AgentBehaviorModel() + agent_mdp = agent_model.build_MDP() + print(f"AGENT... Built MDP: {agent_mdp['num_states']} states, {sum(len(t) for t in agent_mdp['transitions'].values())} transitions") + if not agent_mdp['states']: + print("No states found") + exit(1) + visualize_mdp(agent_model, threshold=0.05, output="agent_mdp_viz", fmt="pdf", export_dot=True) + + # aggregate transitions by event type for both models + human_evt_trans = aggregate_event_transitions(human_mdp) + agent_evt_trans = aggregate_event_transitions(agent_mdp) + + common_evts = set(human_evt_trans.keys()) & set(agent_evt_trans.keys()) + if not common_evts: import sys; sys.exit("No common event types for KL divergence analysis") + + kl_divs = [] + for evt in common_evts: + kl = kl_divergence(human_evt_trans[evt], agent_evt_trans[evt]) + kl_divs.append((evt, kl)) + + kl_divs.sort(key=lambda x: x[1], reverse=True) + avg_kl = np.mean([kl for _, kl in kl_divs]) + + print(f"Average KL divergence: {avg_kl:.4f}") + print(f"\nMost divergent event types:") + for evt, kl in kl_divs: + print(f" {evt}: {kl:.4f}") From 87a35fad2c9c0954de5332edf4a55b53ca6b7049 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Tue, 13 Jan 2026 16:42:50 +0100 Subject: [PATCH 21/35] feat: joint loader --- sim/rl/behavior_loader/loader.py | 47 ++++++++++++++++++++++++++------ sim/rl/behavior_loader/models.py | 32 +++++++++++++++++++++- 2 files changed, 70 insertions(+), 9 deletions(-) diff --git a/sim/rl/behavior_loader/loader.py b/sim/rl/behavior_loader/loader.py index bd18442..620576c 100644 --- a/sim/rl/behavior_loader/loader.py +++ b/sim/rl/behavior_loader/loader.py @@ -71,13 +71,44 @@ class AgentLoader(Loader): sessions[entry] = [i for i in ints if not self._is_admin_page_simple(i)] return sessions -if __name__ == "__main__": - DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/" - loader = AgentLoader(DIR) - _, n = loader.get_entries() - print(f"Loaded {n} sessions from {DIR}") +class JointLoader: + """Loader for combined human (Kafka) and agent (direct) data without discrimination""" - DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/" - loader = Loader(DIR) + def __init__(self, human_dir: str, agent_dir: str): + self.human_dir = human_dir + self.agent_dir = agent_dir + self.human_loader = Loader(human_dir) + self.agent_loader = AgentLoader(agent_dir) + self.data = self._load_joint_sessions() + self.entries = list(self.data.keys()) + + def _load_joint_sessions(self) -> dict: + sessions = {} + # load human sessions (unwrap from Kafka format to PayloadModel) + for sid, evts in self.human_loader.get_data().items(): + sessions[f"human_{sid}"] = [evt.value.payload for evt in evts] + # load agent sessions (already PayloadModel) + for sid, evts in self.agent_loader.get_data().items(): + sessions[f"agent_{sid}"] = evts + return sessions + + def get_data(self) -> dict: + return self.data + + def get_entries(self) -> tuple[list[str], int]: + return self.entries, len(self.entries) + +if __name__ == "__main__": + AGENT_DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/" + loader = AgentLoader(AGENT_DIR) _, n = loader.get_entries() - print(f"Loaded {n} sessions from {DIR}") + print(f"Loaded {n} agent sessions from {AGENT_DIR}") + + HUMAN_DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/" + loader = Loader(HUMAN_DIR) + _, n = loader.get_entries() + print(f"Loaded {n} human sessions from {HUMAN_DIR}") + + joint_loader = JointLoader(HUMAN_DIR, AGENT_DIR) + _, n = joint_loader.get_entries() + print(f"Loaded {n} total sessions (combined) from joint loader") diff --git a/sim/rl/behavior_loader/models.py b/sim/rl/behavior_loader/models.py index 7254606..46ac99d 100644 --- a/sim/rl/behavior_loader/models.py +++ b/sim/rl/behavior_loader/models.py @@ -1,5 +1,5 @@ from experiments.agents.base import Agent -from loader import Loader, AgentLoader +from loader import Loader, AgentLoader, JointLoader from collections import defaultdict from typing import Dict, List, Tuple, Set import numpy as np @@ -109,6 +109,28 @@ class AgentBehaviorModel(BehaviorModel): trajectories.append(states) return trajectories +class JointBehaviorModel(BehaviorModel): + """behavior model for combined human+agent data (flat PayloadModel distribution)""" + + def __init__(self, human_dir: str = DIR, agent_dir: str = AGENT_DIR): + self.loader = JointLoader(human_dir, agent_dir) + self.data = self.loader.get_data() + self.entries, self.num_entries = self.loader.get_entries() + self.mdp = None + + def _state_repr(self, evt) -> str: + # direct access to PayloadModel fields (JointLoader unwraps to PayloadModel) + return f"{evt.page or 'unk'}|{evt.productId or 'none'}|{evt.eventName}" + + def _extract_sessions(self): + trajectories = [] + for sid, evts in self.data.items(): + if len(evts) < 2: continue + # sort by timestamp string (ISO format sorts lexicographically) + states = [self._state_repr(e) for e in sorted(evts, key=lambda x: x.ts)] + trajectories.append(states) + return trajectories + def aggregate_event_transitions(mdp: Dict) -> Dict[str, Dict[str, float]]: """aggregate state transitions by event type and normalize""" evt_trans = defaultdict(lambda: defaultdict(float)) @@ -209,3 +231,11 @@ if __name__ == "__main__": print(f"\nMost divergent event types:") for evt, kl in kl_divs: print(f" {evt}: {kl:.4f}") + + # build joint model (combined distribution) + print("\n=== Joint Model (Human + Agent Combined) ===") + joint_model = JointBehaviorModel() + joint_mdp = joint_model.build_MDP() + print(f"Built joint MDP: {joint_mdp['num_states']} states, {sum(len(t) for t in joint_mdp['transitions'].values())} transitions") + if joint_mdp['states']: + visualize_mdp(joint_model, threshold=0.05, output="joint_mdp_viz", fmt="pdf", export_dot=True) From 82b54428b7494858597dfd91acaa7733378362aa Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Tue, 13 Jan 2026 16:46:17 +0100 Subject: [PATCH 22/35] chore: refactor the loader class --- sim/rl/behavior_loader/loader.py | 67 ++++++++++++-------------------- 1 file changed, 25 insertions(+), 42 deletions(-) diff --git a/sim/rl/behavior_loader/loader.py b/sim/rl/behavior_loader/loader.py index 620576c..3336956 100644 --- a/sim/rl/behavior_loader/loader.py +++ b/sim/rl/behavior_loader/loader.py @@ -1,6 +1,6 @@ import os -from pydantic import BaseModel as Base import json +from pydantic import BaseModel as Base class PayloadModel(Base): sessionId: str @@ -30,6 +30,9 @@ class InteractionModel(Base): key: dict value: ValueModel +def _is_admin(page: str | None) -> bool: + return page is not None and page.startswith("/admin/") + class Loader: def __init__(self, src_dir: str): self.src_dir = src_dir @@ -37,17 +40,13 @@ class Loader: if not self.entries: raise ValueError("empty directory") self.data = self._load_sessions() - def _is_admin_page(self, interaction: InteractionModel) -> bool: - page = interaction.value.payload.page - return page and page.startswith("/admin/") - def _load_sessions(self) -> dict: sessions = {} for entry in self.entries: - int_path = f"{self.src_dir}/{entry}/int.json" - raw = json.load(open(int_path)) + with open(f"{self.src_dir}/{entry}/int.json") as f: + raw = json.load(f) ints = [InteractionModel(**i) for i in raw] - sessions[entry] = [i for i in ints if not self._is_admin_page(i)] + sessions[entry] = [i for i in ints if not _is_admin(i.value.payload.page)] return sessions def get_data(self) -> dict: @@ -57,40 +56,29 @@ class Loader: return self.entries, len(self.entries) class AgentLoader(Loader): - """Loader for agent interaction data with simplified schema (direct PayloadModel format)""" - - def _is_admin_page_simple(self, interaction: PayloadModel) -> bool: - return interaction.page and interaction.page.startswith("/admin/") - def _load_sessions(self) -> dict: sessions = {} for entry in self.entries: - int_path = f"{self.src_dir}/{entry}/int.json" - raw = json.load(open(int_path)) + with open(f"{self.src_dir}/{entry}/int.json") as f: + raw = json.load(f) ints = [PayloadModel(**i) for i in raw] - sessions[entry] = [i for i in ints if not self._is_admin_page_simple(i)] + sessions[entry] = [i for i in ints if not _is_admin(i.page)] return sessions class JointLoader: - """Loader for combined human (Kafka) and agent (direct) data without discrimination""" - def __init__(self, human_dir: str, agent_dir: str): - self.human_dir = human_dir - self.agent_dir = agent_dir self.human_loader = Loader(human_dir) self.agent_loader = AgentLoader(agent_dir) - self.data = self._load_joint_sessions() + self.data = self._merge() self.entries = list(self.data.keys()) - def _load_joint_sessions(self) -> dict: - sessions = {} - # load human sessions (unwrap from Kafka format to PayloadModel) - for sid, evts in self.human_loader.get_data().items(): - sessions[f"human_{sid}"] = [evt.value.payload for evt in evts] - # load agent sessions (already PayloadModel) - for sid, evts in self.agent_loader.get_data().items(): - sessions[f"agent_{sid}"] = evts - return sessions + def _merge(self) -> dict: + return { + **{f"human_{sid}": [e.value.payload for e in evts] + for sid, evts in self.human_loader.get_data().items()}, + **{f"agent_{sid}": evts + for sid, evts in self.agent_loader.get_data().items()} + } def get_data(self) -> dict: return self.data @@ -99,16 +87,11 @@ class JointLoader: return self.entries, len(self.entries) if __name__ == "__main__": - AGENT_DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/" - loader = AgentLoader(AGENT_DIR) - _, n = loader.get_entries() - print(f"Loaded {n} agent sessions from {AGENT_DIR}") + agent_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/" + human_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/" - HUMAN_DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/" - loader = Loader(HUMAN_DIR) - _, n = loader.get_entries() - print(f"Loaded {n} human sessions from {HUMAN_DIR}") - - joint_loader = JointLoader(HUMAN_DIR, AGENT_DIR) - _, n = joint_loader.get_entries() - print(f"Loaded {n} total sessions (combined) from joint loader") + for name, cls, path in [("agent", AgentLoader, agent_dir), + ("human", Loader, human_dir), + ("joint", lambda d: JointLoader(human_dir, d), agent_dir)]: + ldr = cls(path) if name != "joint" else cls(agent_dir) + print(f"Loaded {len(ldr.get_entries()[0])} {name} sessions") From e9cf5f07367e3ad85b94caaf038eb7a0e6f8d852 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Tue, 13 Jan 2026 16:51:00 +0100 Subject: [PATCH 23/35] refactor models computations --- sim/rl/behavior_loader/models.py | 186 ++++++++++++------------------- 1 file changed, 69 insertions(+), 117 deletions(-) diff --git a/sim/rl/behavior_loader/models.py b/sim/rl/behavior_loader/models.py index 46ac99d..84c2fe4 100644 --- a/sim/rl/behavior_loader/models.py +++ b/sim/rl/behavior_loader/models.py @@ -1,16 +1,12 @@ -from experiments.agents.base import Agent from loader import Loader, AgentLoader, JointLoader from collections import defaultdict from typing import Dict, List, Tuple, Set import numpy as np import graphviz -DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/" -AGENT_DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/" - class BehaviorModel: - def __init__(self, src_dir: str = DIR): - self.loader = Loader(src_dir) + def __init__(self, src_dir: str, loader_cls=Loader): + self.loader = loader_cls(src_dir) self.data = self.loader.get_data() self.entries, self.num_entries = self.loader.get_entries() self.mdp = None @@ -19,50 +15,48 @@ class BehaviorModel: p = evt.value.payload return f"{p.page or 'unk'}|{p.productId or 'none'}|{p.eventName}" - def _extract_sessions(self): - # transform raw events into sequential state trajectories per session - trajectories = [] - for sid, evts in self.data.items(): - if len(evts) < 2: continue - states = [self._state_repr(e) for e in sorted(evts, key=lambda x: x.timestamp)] - trajectories.append(states) - return trajectories + def _sort_key(self, evt): + return evt.timestamp - def _calc_transitions(self, trajectories: List[List[str]]) -> Tuple[Dict, Set]: - trans = defaultdict(lambda: defaultdict(int)) - states = set() - for traj in trajectories: - for i in range(len(traj) - 1): - s, s_next = traj[i], traj[i+1] + def _extract_sessions(self) -> List[List[str]]: + trajs = [] + for evts in self.data.values(): + if len(evts) < 2: continue + states = [self._state_repr(e) for e in sorted(evts, key=self._sort_key)] + trajs.append(states) + return trajs + + def _calc_transitions(self, trajs: List[List[str]]) -> Tuple[Dict, Set]: + trans, states = defaultdict(lambda: defaultdict(int)), set() + for traj in trajs: + for s, s_next in zip(traj, traj[1:]): trans[s][s_next] += 1 states.update([s, s_next]) return trans, states - def _calc_rewards(self, trajectories: List[List[str]]) -> Dict: - # reward based on session progression depth + def _calc_rewards(self, trajs: List[List[str]]) -> Dict: rwd = defaultdict(list) - for traj in trajectories: + for traj in trajs: n = len(traj) for i, s in enumerate(traj): rwd[s].append(i / n) return rwd - def _normalize_trans(self, counts: Dict) -> Dict: + def _normalize_trans(self, cnts: Dict) -> Dict: return {s: {s_n: cnt/sum(nxt.values()) for s_n, cnt in nxt.items()} - for s, nxt in counts.items()} + for s, nxt in cnts.items()} def build_MDP(self) -> Dict: trajs = self._extract_sessions() trans_cnt, states = self._calc_transitions(trajs) trans_prob = self._normalize_trans(trans_cnt) state_rwd = self._calc_rewards(trajs) - state_val = {s: np.mean(r) for s, r in state_rwd.items()} self.mdp = { - 'states': sorted(list(states)), + 'states': sorted(states), 'num_states': len(states), 'transitions': trans_prob, - 'state_values': state_val, + 'state_values': {s: np.mean(r) for s, r in state_rwd.items()}, 'state_rewards': state_rwd, 'trans_counts': trans_cnt, } @@ -78,8 +72,7 @@ class BehaviorModel: def sample_traj(self, start: str, max_len: int = 50) -> List[str]: if not self.mdp: raise ValueError("build MDP first") - path = [start] - curr = start + path, curr = [start], start for _ in range(max_len): nxt = self.mdp['transitions'].get(curr, {}) if not nxt: break @@ -88,154 +81,113 @@ class BehaviorModel: return path class AgentBehaviorModel(BehaviorModel): - """behavior model for agent interaction data (simplified PayloadModel schema)""" - - def __init__(self, src_dir: str = AGENT_DIR): - self.loader = AgentLoader(src_dir) - self.data = self.loader.get_data() - self.entries, self.num_entries = self.loader.get_entries() - self.mdp = None + def __init__(self, src_dir: str): + super().__init__(src_dir, AgentLoader) def _state_repr(self, evt) -> str: - # direct access to PayloadModel fields (no .value.payload nesting) return f"{evt.page or 'unk'}|{evt.productId or 'none'}|{evt.eventName}" - def _extract_sessions(self): - trajectories = [] - for sid, evts in self.data.items(): - if len(evts) < 2: continue - # sort by timestamp string (ISO format sorts lexicographically) - states = [self._state_repr(e) for e in sorted(evts, key=lambda x: x.ts)] - trajectories.append(states) - return trajectories + def _sort_key(self, evt): + return evt.ts class JointBehaviorModel(BehaviorModel): - """behavior model for combined human+agent data (flat PayloadModel distribution)""" - - def __init__(self, human_dir: str = DIR, agent_dir: str = AGENT_DIR): + def __init__(self, human_dir: str, agent_dir: str): self.loader = JointLoader(human_dir, agent_dir) self.data = self.loader.get_data() self.entries, self.num_entries = self.loader.get_entries() self.mdp = None def _state_repr(self, evt) -> str: - # direct access to PayloadModel fields (JointLoader unwraps to PayloadModel) return f"{evt.page or 'unk'}|{evt.productId or 'none'}|{evt.eventName}" - def _extract_sessions(self): - trajectories = [] - for sid, evts in self.data.items(): - if len(evts) < 2: continue - # sort by timestamp string (ISO format sorts lexicographically) - states = [self._state_repr(e) for e in sorted(evts, key=lambda x: x.ts)] - trajectories.append(states) - return trajectories + def _sort_key(self, evt): + return evt.ts def aggregate_event_transitions(mdp: Dict) -> Dict[str, Dict[str, float]]: - """aggregate state transitions by event type and normalize""" evt_trans = defaultdict(lambda: defaultdict(float)) for s, trans in mdp['transitions'].items(): - evt_src = s.split('|')[2] + src = s.split('|')[2] for s_next, prob in trans.items(): - evt_dst = s_next.split('|')[2] - evt_trans[evt_src][evt_dst] += prob + dst = s_next.split('|')[2] + evt_trans[src][dst] += prob - # normalize aggregated transitions - for evt_src in evt_trans: - total = sum(evt_trans[evt_src].values()) + for src in evt_trans: + total = sum(evt_trans[src].values()) if total > 0: - for evt_dst in evt_trans[evt_src]: - evt_trans[evt_src][evt_dst] /= total + evt_trans[src] = {dst: p/total for dst, p in evt_trans[src].items()} return dict(evt_trans) -def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = "mdp_graph", fmt: str = "svg", view: bool = False, export_dot: bool = False): - """visualize MDP as directed graph using graphviz, aggregated by event type""" +def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = "mdp_graph", + fmt: str = "svg", view: bool = False, export_dot: bool = False): if not model.mdp: raise ValueError("build MDP first") evt_trans = aggregate_event_transitions(model.mdp) - g = graphviz.Digraph(format=fmt) g.attr(rankdir='LR', size='30') g.attr('node', shape='circle', width='1', height='1') - # collect all event types - events = set(evt_trans.keys()) - for trans in evt_trans.values(): - events.update(trans.keys()) - - # add nodes for each event type + events = set(evt_trans.keys()) | {e for trans in evt_trans.values() for e in trans.keys()} for evt in events: g.node(evt) - # add edges above threshold - for evt_src in evt_trans: - for evt_dst, prob in evt_trans[evt_src].items(): + for src, dsts in evt_trans.items(): + for dst, prob in dsts.items(): if prob > threshold: - g.edge(evt_src, evt_dst, label=f'{prob:.2f}') + g.edge(src, dst, label=f'{prob:.2f}') g.render(output, view=view, cleanup=True) print(f"Saved MDP graph to {output}.{fmt}") if export_dot: - dot_file = f"{output}.dot" - with open(dot_file, 'w') as f: + with open(f"{output}.dot", 'w') as f: f.write(g.source) - print(f"Exported DOT source to {dot_file}") + print(f"Exported DOT source to {output}.dot") return g - def kl_divergence(p: Dict[str, float], q: Dict[str, float]) -> float: - """Compute KL divergence D_KL(P || Q) for discrete distributions P and Q.""" - epsilon = 1e-10 # small constant to avoid log(0) - kl_div = 0.0 - for key in p: - p_val = p[key] + epsilon - q_val = q.get(key, 0.0) + epsilon - kl_div += p_val * np.log(p_val / q_val) - return kl_div + eps = 1e-10 + return sum((p[k] + eps) * np.log((p[k] + eps) / (q.get(k, 0.0) + eps)) for k in p) if __name__ == "__main__": - human_model = BehaviorModel(DIR) + base_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments" + human_dir, agent_dir = f"{base_dir}/collected_data/", f"{base_dir}/agents/collected_data/" + + human_model = BehaviorModel(human_dir) human_mdp = human_model.build_MDP() - print(f"Built MDP: {human_mdp['num_states']} states, {sum(len(t) for t in human_mdp['transitions'].values())} transitions") + print(f"Built MDP: {human_mdp['num_states']} states, " + f"{sum(len(t) for t in human_mdp['transitions'].values())} transitions") if not human_mdp['states']: - print("No states found") - exit(1) + exit("No states found") visualize_mdp(human_model, threshold=0.05, output="human_mdp_viz", fmt="pdf", export_dot=True) - agent_model = AgentBehaviorModel() + agent_model = AgentBehaviorModel(agent_dir) agent_mdp = agent_model.build_MDP() - print(f"AGENT... Built MDP: {agent_mdp['num_states']} states, {sum(len(t) for t in agent_mdp['transitions'].values())} transitions") + print(f"AGENT... Built MDP: {agent_mdp['num_states']} states, " + f"{sum(len(t) for t in agent_mdp['transitions'].values())} transitions") if not agent_mdp['states']: - print("No states found") - exit(1) + exit("No states found") visualize_mdp(agent_model, threshold=0.05, output="agent_mdp_viz", fmt="pdf", export_dot=True) - # aggregate transitions by event type for both models - human_evt_trans = aggregate_event_transitions(human_mdp) - agent_evt_trans = aggregate_event_transitions(agent_mdp) + human_evt = aggregate_event_transitions(human_mdp) + agent_evt = aggregate_event_transitions(agent_mdp) + common = set(human_evt.keys()) & set(agent_evt.keys()) - common_evts = set(human_evt_trans.keys()) & set(agent_evt_trans.keys()) - if not common_evts: import sys; sys.exit("No common event types for KL divergence analysis") + if not common: + exit("No common event types for KL divergence analysis") - kl_divs = [] - for evt in common_evts: - kl = kl_divergence(human_evt_trans[evt], agent_evt_trans[evt]) - kl_divs.append((evt, kl)) + kl_divs = sorted([(e, kl_divergence(human_evt[e], agent_evt[e])) for e in common], + key=lambda x: x[1], reverse=True) - kl_divs.sort(key=lambda x: x[1], reverse=True) - avg_kl = np.mean([kl for _, kl in kl_divs]) - - print(f"Average KL divergence: {avg_kl:.4f}") - print(f"\nMost divergent event types:") + print(f"Average KL divergence: {np.mean([kl for _, kl in kl_divs]):.4f}") + print("\nMost divergent event types:") for evt, kl in kl_divs: print(f" {evt}: {kl:.4f}") - # build joint model (combined distribution) print("\n=== Joint Model (Human + Agent Combined) ===") - joint_model = JointBehaviorModel() + joint_model = JointBehaviorModel(human_dir, agent_dir) joint_mdp = joint_model.build_MDP() - print(f"Built joint MDP: {joint_mdp['num_states']} states, {sum(len(t) for t in joint_mdp['transitions'].values())} transitions") + print(f"Built joint MDP: {joint_mdp['num_states']} states, " + f"{sum(len(t) for t in joint_mdp['transitions'].values())} transitions") if joint_mdp['states']: visualize_mdp(joint_model, threshold=0.05, output="joint_mdp_viz", fmt="pdf", export_dot=True) From 0ce12fbc3beb086f627b1269e470170f76c319d2 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Tue, 13 Jan 2026 19:50:36 +0100 Subject: [PATCH 24/35] chore: ignores --- .gitignore | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 9db7742..ef6746f 100644 --- a/.gitignore +++ b/.gitignore @@ -5,18 +5,22 @@ **/.virtual_documents/ **/session_*.svg **/*graph.svg -paper/src/bib/auto +**/auto/*.el +*.old +**/package-lock.json +**/*.parquet -# Airflow logs - exclude DAG run logs +paper/src/auto/* +paper/src/bib/auto +docs/goals/*.md +PHANTOM.wiki/ experiments/airflow/logs/* experiments/airflow/logs/scheduler/ experiments/airflow/logs/dag_processor_manager/ -experiments/collected_data/* - -paper/src/auto/* -lib/ -docs/goals/*.md -PHANTOM.wiki/ +experiments/collected_data/ +experiments/agents/collected_data/ +sim/rl/behavior_loader/*.dot +sim/rl/behavior_loader/*.png +sim/rl/behavior_loader/*.svg +sim/rl/behavior_loader/*.pdf tests/e2e/node_modules/** -**/auto/*.el -*.old From 7b2d80ac4c96f4583028e4a049265be459662a72 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Tue, 20 Jan 2026 21:00:47 +0100 Subject: [PATCH 25/35] feat: wip contaminator --- experiments/procesing/contaminator.py | 44 +++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 experiments/procesing/contaminator.py diff --git a/experiments/procesing/contaminator.py b/experiments/procesing/contaminator.py new file mode 100644 index 0000000..0a3651d --- /dev/null +++ b/experiments/procesing/contaminator.py @@ -0,0 +1,44 @@ +import pandas as pd +import random +from sim.rl.behavior_loader import AgentBehaviorModel + +base_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments" +human_dir, agent_dir = f"{base_dir}/collected_data/", f"{base_dir}/agents/collected_data/" + + + +def remap_schema(df : pd.DataFrame, mapping: dict, on: str = "event_type"): + df = df.copy() + df[on] = df[on].map(mapping).fillna(df[on]) + return df + + +def contaminate_dataset(df : pd.DataFrame, on : str = "event_type", + contamination_rate: float = 0.1) -> pd.DataFrame: + model = AgentBehaviorModel(agent_dir) + target_df_schema = df[on].unique().tolist() + mapping = { + 'view': 'view_page' + # TODO: define properly for the given dataset + } + OG_event_distribution = df[on].value_counts(normalize=True).to_dict() + # normalize to weights + OG_event_distribution = {k: v / sum(OG_event_distribution.values()) for k, v in OG_event_distribution.items()} + mapped_df = remap_schema(df, mapping, on=on) + N = len(df) + N_final = N / (1 - contamination_rate) # TODO: explain this in paper + N_contaminate = int(N_final - N) + start_event_types = random.choices(list(OG_event_distribution.keys()), + weights=list(OG_event_distribution.values()), k=N_contaminate) + # it makes sense + new_trajectories = [] + for start_event in start_event_types: + # sample from og start + start = None # TODO: defin start accoding to dataset (randomly sample with weights of event distr) + trajectory = model.sample_trajectory(start) # TODO: explain this method in paper + new_trajectories.extend(trajectory) + + # TODO: make sure the new trajctories schema conforms with dataset + contaminate_df = pd.DataFrame(new_trajectories) + df = pd.concat([df, contaminate_df], ignore_index=True) + return df From b2f0746c01585a4fc6189feed7b0244be4d5be3b Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Wed, 21 Jan 2026 11:11:49 +0100 Subject: [PATCH 26/35] chore: extra commenting --- experiments/procesing/contaminator.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/experiments/procesing/contaminator.py b/experiments/procesing/contaminator.py index 0a3651d..da44c3d 100644 --- a/experiments/procesing/contaminator.py +++ b/experiments/procesing/contaminator.py @@ -1,9 +1,9 @@ import pandas as pd import random -from sim.rl.behavior_loader import AgentBehaviorModel +from sim.rl.behavior_loader import AgentBehaviorModel # TODO: proper import this base_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments" -human_dir, agent_dir = f"{base_dir}/collected_data/", f"{base_dir}/agents/collected_data/" +agent_dir = f"{base_dir}/agents/collected_data/" @@ -21,6 +21,7 @@ def contaminate_dataset(df : pd.DataFrame, on : str = "event_type", 'view': 'view_page' # TODO: define properly for the given dataset } + # think about replacing with freqdist method from library OG_event_distribution = df[on].value_counts(normalize=True).to_dict() # normalize to weights OG_event_distribution = {k: v / sum(OG_event_distribution.values()) for k, v in OG_event_distribution.items()} From 04907df393149c61ff3efd3ffcdfa37bed2d8db5 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Wed, 21 Jan 2026 11:27:03 +0100 Subject: [PATCH 27/35] feat: weak train scaffold --- experiments/ml/arch.py | 117 +++-------------------------------- experiments/ml/weak.train.py | 30 +++++++++ 2 files changed, 39 insertions(+), 108 deletions(-) create mode 100644 experiments/ml/weak.train.py diff --git a/experiments/ml/arch.py b/experiments/ml/arch.py index 4f36e18..a187959 100644 --- a/experiments/ml/arch.py +++ b/experiments/ml/arch.py @@ -12,111 +12,12 @@ TASK = 'classification' LABELS = ['human', 'agent'] -class BaseAgentClassifier(BaseEstimator, ClassifierMixin, ABC): - """Base class for tree-based agent detection classifiers with common logic""" - - def __init__(self, context: Optional[PipelineContext] = None, n_estimators: int = 200, - max_depth: int = 6, learning_rate: float = 0.05, - early_stopping_rounds: int = 20): - self.context = context - self.n_estimators = n_estimators - self.max_depth = max_depth - self.learning_rate = learning_rate - self.early_stopping_rounds = early_stopping_rounds - self.model_ = None - self.feature_names_ = None - - def _to_array(self, X): - """Convert pandas structures to numpy arrays""" - return X.values if isinstance(X, (pd.DataFrame, pd.Series)) else X - - def _compute_pos_weight(self, y_arr): - """Calculate scale_pos_weight for class imbalance handling""" - n_neg, n_pos = (y_arr == 0).sum(), (y_arr == 1).sum() - return n_neg / n_pos if n_pos > 0 else 1.0 - - def _prepare_eval_set(self, eval_set): - """Convert eval_set to numpy arrays if needed""" - if not eval_set: - return None - X_val, y_val = eval_set[0] - return [(self._to_array(X_val), self._to_array(y_val))] - - @abstractmethod - def _build_model(self, scale_pos: float): - """Build the underlying model instance (must be implemented by subclasses)""" - pass - - @abstractmethod - def _fit_with_eval(self, X_arr, y_arr, eval_arr): - """Fit model with evaluation set (must be implemented by subclasses)""" - pass - - def fit(self, X, y, eval_set=None): - X_arr, y_arr = self._to_array(X), self._to_array(y) - - if isinstance(X, pd.DataFrame): - self.feature_names_ = X.columns.tolist() - - scale_pos = self._compute_pos_weight(y_arr) - self.model_ = self._build_model(scale_pos) - - eval_arr = self._prepare_eval_set(eval_set) - if eval_arr: - self._fit_with_eval(X_arr, y_arr, eval_arr) - else: - self.model_.fit(X_arr, y_arr) - - return self - - def predict(self, X): - return self.model_.predict(self._to_array(X)) - - def predict_proba(self, X): - return self.model_.predict_proba(self._to_array(X)) - - @property - def feature_importances_(self): - return self.model_.feature_importances_ if self.model_ else None - - -class XGBoostAgentClassifier(BaseAgentClassifier): - """XGBoost binary classifier for agent detection with class imbalance handling""" - - def _build_model(self, scale_pos: float): - return xgb.XGBClassifier( - n_estimators=self.n_estimators, - max_depth=self.max_depth, - learning_rate=self.learning_rate, - scale_pos_weight=scale_pos, - eval_metric='auc', - early_stopping_rounds=self.early_stopping_rounds, - random_state=42, - tree_method='hist', - enable_categorical=False - ) - - def _fit_with_eval(self, X_arr, y_arr, eval_arr): - self.model_.fit(X_arr, y_arr, eval_set=eval_arr, verbose=False) - - -class LightGBMAgentClassifier(BaseAgentClassifier): - """LightGBM binary classifier for agent detection with class imbalance handling""" - - def _build_model(self, scale_pos: float): - return lgb.LGBMClassifier( - n_estimators=self.n_estimators, - max_depth=self.max_depth, - learning_rate=self.learning_rate, - scale_pos_weight=scale_pos, - metric='auc', - random_state=42, - verbosity=-1 - ) - - def _fit_with_eval(self, X_arr, y_arr, eval_arr): - self.model_.fit( - X_arr, y_arr, - eval_set=eval_arr, - callbacks=[lgb.early_stopping(self.early_stopping_rounds, verbose=False)] - ) +class WeakClassifier(BaseEstimator, ClassifierMixin, ABC): + # a simple contrastive machine learning model + # this model should learn to distinguish between human and agent behavior + # using a weakly supervised approach and contrastive learning + augmentation + # + def __init__(self, **kwargs): + super().__init__() + self.model = None + self.kwargs = kwargs diff --git a/experiments/ml/weak.train.py b/experiments/ml/weak.train.py new file mode 100644 index 0000000..36e11ee --- /dev/null +++ b/experiments/ml/weak.train.py @@ -0,0 +1,30 @@ +from sim.rl.behavior_loader.loader import AgentLoader, Loader, JointLoader +from sim.rl.behavior_loader.loader import PayloadModel +from arch import WeakClassifier + +agent_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/" +human_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/" + +def augment_trajectory(trajectory : list[PayloadModel], augmentation_rate: float = 0.1) -> list[PayloadModel]: + # augmentations possible: + # return a sub-trajectory window of the original trajectory + # insert random noise events + # shuffle a few events (find a few indices and swap them with i+1 neighbor) + # adjust metadata + return trajectory + + +def train(): + pass + + + +if __name__ == "__main__": + joint_loader = JointLoader(human_dir, agent_dir) + data = joint_loader.get_data() + entries, num_entries = joint_loader.get_entries() + print(f"Loaded {num_entries} entries") + # TODO: augment + # fit model + model = WeakClassifier() + model.fit(data) From b05b510f7098778c84ea84636f7958ded3e558d3 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Wed, 21 Jan 2026 14:05:30 +0100 Subject: [PATCH 28/35] strong dataset gathering --- sim/strong_learner/data.py | 99 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 sim/strong_learner/data.py diff --git a/sim/strong_learner/data.py b/sim/strong_learner/data.py new file mode 100644 index 0000000..80129aa --- /dev/null +++ b/sim/strong_learner/data.py @@ -0,0 +1,99 @@ +import os, requests, py7zr +import pandas as pd +from typing import Generator +try: + from sim.rl.behavior_loader.loader import PayloadModel, ValueModel, InteractionModel, Loader +except ImportError: + from loader import PayloadModel, ValueModel, InteractionModel, Loader + +class YooChooseLoader(Loader): + URL = "https://s3-eu-west-1.amazonaws.com/yc-rdata/yoochoose-data.7z" + CLICK_COLS = ['session_id', 'ts', 'item_id', 'category'] + BUY_COLS = ['session_id', 'ts', 'item_id', 'price', 'quantity'] + + def __init__(self, root_dir: str = "data/yoochoose", chunk_size: int = 500_000, max_sessions: int = 1000): + self.root = root_dir + self.chunk_size = chunk_size + self.max_sessions = max_sessions + self.click_path = f"{root_dir}/yoochoose-clicks.dat" + self.buy_path = f"{root_dir}/yoochoose-buys.dat" + if not os.path.exists(self.click_path): self._setup() + self.data = self._load_sessions(max_sessions) + self.entries = list(self.data.keys()) + + def _setup(self): + os.makedirs(self.root, exist_ok=True) + zip_path = f"{self.root}/temp.7z" + with requests.get(self.URL, stream=True) as r: + with open(zip_path, 'wb') as f: + for chunk in r.iter_content(8192): f.write(chunk) + with py7zr.SevenZipFile(zip_path, 'r') as z: z.extractall(self.root) + os.remove(zip_path) + + def _make_interaction(self, sid: str, ts: str, item_id: str, event: str, page: str, meta: dict) -> InteractionModel: + payload = PayloadModel( + sessionId=sid, experimentId=None, eventName=event, + page=page, productId=item_id, metadata=meta, + storeMode="yoochoose", userAgent="dataset", ts=ts + ) + return InteractionModel( + partitionID=0, offset=0, timestamp=0, compression="", + isTransactional=False, headers=[], key={}, + value=ValueModel(payload=payload, encoding="json", isPayloadNull=False, schemaId=1, size=0) + ) + + def _parse_category(self, cat) -> str: + if pd.isna(cat) or cat == "0": return "unknown" + if cat == "S": return "special_offer" + try: + n = int(cat) + return f"category_{n}" if 1 <= n <= 12 else f"brand_{n}" + except: return str(cat) + + def stream_clicks(self) -> Generator[InteractionModel, None, None]: + with pd.read_csv(self.click_path, names=self.CLICK_COLS, chunksize=self.chunk_size, header=None) as reader: + for chunk in reader: + for r in chunk.itertuples(index=False): + yield self._make_interaction( + str(r.session_id), r.ts, str(r.item_id), + "view_item_page", self._parse_category(r.category), {} + ) + + def stream_buys(self) -> Generator[InteractionModel, None, None]: + with pd.read_csv(self.buy_path, names=self.BUY_COLS, chunksize=self.chunk_size, header=None) as reader: + for chunk in reader: + for r in chunk.itertuples(index=False): + yield self._make_interaction( + str(r.session_id), r.ts, str(r.item_id), + "purchase_complete", "/checkout", {"price": r.price, "quantity": r.quantity} + ) + + def stream(self) -> Generator[InteractionModel, None, None]: + yield from self.stream_clicks() + yield from self.stream_buys() + + def _load_sessions(self, max_sessions: int | None = None) -> dict: + sessions = {} + for interaction in self.stream(): + sid = interaction.value.payload.sessionId + if sid not in sessions: + if max_sessions and len(sessions) >= max_sessions: continue + sessions[sid] = [] + sessions[sid].append(interaction) + for sid in sessions: sessions[sid].sort(key=lambda x: x.value.payload.ts) + return sessions + + def get_data(self) -> dict: + return self.data + + def get_entries(self) -> tuple[list[str], int]: + return self.entries, len(self.entries) + +if __name__ == "__main__": + loader = YooChooseLoader(max_sessions=100) + views, purchases = 0, 0 + for sid, evts in loader.get_data().items(): + for e in evts: + if e.value.payload.eventName == "view_item_page": views += 1 + elif e.value.payload.eventName == "purchase_complete": purchases += 1 + print(f"Loaded {len(loader.entries)} sessions: {views} view_item_page, {purchases} purchase_complete") From 440371dba40e39ce5159cb1edc21a899fcd0740b Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Wed, 21 Jan 2026 14:05:39 +0100 Subject: [PATCH 29/35] feat: initial feature engineering of trajectories --- sim/rl/behavior_loader/models.py | 49 +++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/sim/rl/behavior_loader/models.py b/sim/rl/behavior_loader/models.py index 84c2fe4..4c6bf21 100644 --- a/sim/rl/behavior_loader/models.py +++ b/sim/rl/behavior_loader/models.py @@ -1,4 +1,7 @@ -from loader import Loader, AgentLoader, JointLoader +try: + from loader import Loader, AgentLoader, JointLoader +except ImportError: + from sim.rl.behavior_loader.loader import Loader, AgentLoader, JointLoader from collections import defaultdict from typing import Dict, List, Tuple, Set import numpy as np @@ -80,6 +83,50 @@ class BehaviorModel: path.append(curr) return path + def extract_trajectory_features(self, events: List, max_trans_dim: int = 50) -> np.ndarray: + """Convert trajectory to feature vector using MDP structure for contrastive learning""" + if not self.mdp: + self.build_MDP() + + states = [self._state_repr(e) for e in sorted(events, key=self._sort_key)] + features = [] + + # transition histogram over MDP state space + trans_counts = defaultdict(int) + for s, s_next in zip(states, states[1:]): + trans_counts[(s, s_next)] += 1 + all_trans = [(s, t) for s in self.mdp['states'] for t in self.mdp['transitions'].get(s, {}).keys()] + trans_vec = [trans_counts.get(tr, 0) for tr in all_trans[:max_trans_dim]] + trans_vec = trans_vec + [0] * (max_trans_dim - len(trans_vec)) # pad + total_trans = sum(trans_counts.values()) or 1 + features.extend([v / total_trans for v in trans_vec]) + + # state coverage ratio + visited = set(states) + features.append(len(visited) / max(self.mdp['num_states'], 1)) + + # temporal entropy of transitions + if len(states) > 1: + trans_probs = [self.transition_prob(s, s_n) for s, s_n in zip(states, states[1:])] + entropy = -sum(p * np.log(p + 1e-10) for p in trans_probs if p > 0) + features.append(entropy / max(len(states), 1)) + else: + features.append(0.0) + + # trajectory length and unique state count + features.append(len(states)) + features.append(len(visited)) + + # state value statistics along trajectory + vals = [self.state_value(s) for s in states] + if vals: + features.extend([np.mean(vals), np.std(vals), np.min(vals), np.max(vals)]) + else: + features.extend([0.0, 0.0, 0.0, 0.0]) + + return np.array(features, dtype=np.float32) + + class AgentBehaviorModel(BehaviorModel): def __init__(self, src_dir: str): super().__init__(src_dir, AgentLoader) From 00e3eff2fadbc4b6153220971c68729464b8b46b Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Wed, 21 Jan 2026 18:22:31 +0100 Subject: [PATCH 30/35] migrating weak learning --- experiments/ml/weak.train.py | 30 ----- experiments/ml/weak_train.py | 246 +++++++++++++++++++++++++++++++++++ 2 files changed, 246 insertions(+), 30 deletions(-) delete mode 100644 experiments/ml/weak.train.py create mode 100644 experiments/ml/weak_train.py diff --git a/experiments/ml/weak.train.py b/experiments/ml/weak.train.py deleted file mode 100644 index 36e11ee..0000000 --- a/experiments/ml/weak.train.py +++ /dev/null @@ -1,30 +0,0 @@ -from sim.rl.behavior_loader.loader import AgentLoader, Loader, JointLoader -from sim.rl.behavior_loader.loader import PayloadModel -from arch import WeakClassifier - -agent_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/" -human_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/" - -def augment_trajectory(trajectory : list[PayloadModel], augmentation_rate: float = 0.1) -> list[PayloadModel]: - # augmentations possible: - # return a sub-trajectory window of the original trajectory - # insert random noise events - # shuffle a few events (find a few indices and swap them with i+1 neighbor) - # adjust metadata - return trajectory - - -def train(): - pass - - - -if __name__ == "__main__": - joint_loader = JointLoader(human_dir, agent_dir) - data = joint_loader.get_data() - entries, num_entries = joint_loader.get_entries() - print(f"Loaded {num_entries} entries") - # TODO: augment - # fit model - model = WeakClassifier() - model.fit(data) diff --git a/experiments/ml/weak_train.py b/experiments/ml/weak_train.py new file mode 100644 index 0000000..eb87a9c --- /dev/null +++ b/experiments/ml/weak_train.py @@ -0,0 +1,246 @@ +import sys +sys.path.insert(0, "/home/velocitatem/Documents/Projects/PHANTOM/sim/rl/behavior_loader") +sys.path.insert(0, "/home/velocitatem/Documents/Projects/PHANTOM/experiments/ml") + +from sim.rl.behavior_loader.loader import AgentLoader, Loader, JointLoader, PayloadModel +from sim.rl.behavior_loader.models import JointBehaviorModel +from arch import ContrastiveWeakClassifier, contrastive_loss, featurize_trajectory +from typing import List, Optional, Dict +from datetime import datetime, timedelta +from copy import deepcopy +import numpy as np +import random +import torch +from torch.utils.data import Dataset, DataLoader +from torch.optim import Adam +from torch.utils.tensorboard import SummaryWriter + +RUNS_DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/ml/runs" +agent_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/" +human_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/" + + +def _perturb_ts(evt: PayloadModel, jitter_ms: int = 500) -> PayloadModel: + """Add random jitter to event timestamp""" + new_evt = deepcopy(evt) + try: + ts = datetime.fromisoformat(evt.ts.replace('Z', '+00:00')) + delta = timedelta(milliseconds=random.randint(-jitter_ms, jitter_ms)) + new_evt.ts = (ts + delta).isoformat() + except: + pass + return new_evt + + +def augment_trajectory(trajectory: List[PayloadModel], rate: float = 0.1) -> List[PayloadModel]: + """Apply random augmentation to trajectory for contrastive learning""" + if len(trajectory) < 2: + return trajectory + + aug_type = random.choice(['window', 'shuffle', 'noise', 'drop']) + + if aug_type == 'window': # random contiguous sub-sequence (70-100% length) + min_len = max(2, int(len(trajectory) * 0.7)) + sub_len = random.randint(min_len, len(trajectory)) + start = random.randint(0, len(trajectory) - sub_len) + return trajectory[start:start + sub_len] + + elif aug_type == 'shuffle': # swap adjacent pairs with probability rate + result = list(trajectory) + for i in range(len(result) - 1): + if random.random() < rate: + result[i], result[i + 1] = result[i + 1], result[i] + return result + + elif aug_type == 'drop': # drop events with probability rate + result = [e for e in trajectory if random.random() > rate] + return result if len(result) >= 2 else trajectory[:2] + + elif aug_type == 'noise': # perturb timestamps + return [_perturb_ts(e, jitter_ms=500) for e in trajectory] + + return trajectory + + +class TripletDataset(Dataset): + """Generate (anchor, positive, negative) triplets on-the-fly with augmentation""" + def __init__(self, data: Dict[str, List[PayloadModel]], mdp: Optional[Dict], augment_fn, input_dim: int = 64, multiplier: int = 10): + self.sessions = list(data.items()) + self.human_ids = [i for i, (sid, _) in enumerate(self.sessions) if sid.startswith('human_')] + self.agent_ids = [i for i, (sid, _) in enumerate(self.sessions) if sid.startswith('agent_')] + self.mdp = mdp + self.augment = augment_fn + self.input_dim = input_dim + self.multiplier = multiplier + + if not self.human_ids or not self.agent_ids: + raise ValueError(f"Need both human ({len(self.human_ids)}) and agent ({len(self.agent_ids)}) sessions") + + def __len__(self) -> int: + return len(self.sessions) * self.multiplier + + def __getitem__(self, idx: int): + anchor_idx = idx % len(self.sessions) + sid, events = self.sessions[anchor_idx] + is_human = sid.startswith('human_') + + anchor = featurize_trajectory(events, self.mdp, self.input_dim) + positive = featurize_trajectory(self.augment(events), self.mdp, self.input_dim) + + neg_pool = self.agent_ids if is_human else self.human_ids + neg_idx = random.choice(neg_pool) + negative = featurize_trajectory(self.sessions[neg_idx][1], self.mdp, self.input_dim) + + label = 0 if is_human else 1 # 0=human, 1=agent + return (torch.tensor(anchor, dtype=torch.float32), + torch.tensor(positive, dtype=torch.float32), + torch.tensor(negative, dtype=torch.float32), + torch.tensor(label, dtype=torch.long)) + + +def train(epochs: int = 100, lr: float = 1e-3, batch_size: int = 4, input_dim: int = 64, + embed_dim: int = 32, margin: float = 0.3, verbose: bool = True, run_name: str = None): + """Train contrastive weak classifier on human/agent trajectories""" + joint = JointLoader(human_dir, agent_dir) + data = joint.get_data() + if verbose: + print(f"Loaded {len(data)} sessions") + + joint_model = JointBehaviorModel(human_dir, agent_dir) + ref_mdp = joint_model.build_MDP() + + dataset = TripletDataset(data, ref_mdp, augment_trajectory, input_dim=input_dim) + loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True) + + model = ContrastiveWeakClassifier(input_dim=input_dim, embed_dim=embed_dim, margin=margin) + model.to_device() + + run_name = run_name or f"d{input_dim}_e{embed_dim}_lr{lr}_m{margin}_{datetime.now():%Y%m%d_%H%M%S}" + writer = SummaryWriter(f"{RUNS_DIR}/train/{run_name}") + + optimizer = Adam(list(model.encoder.parameters()) + list(model.classifier.parameters()), lr=lr) + ce_loss_fn = torch.nn.CrossEntropyLoss() + + best_loss = float('inf') + for epoch in range(epochs): + model.encoder.train() + model.classifier.train() + total_loss, n_batches = 0.0, 0 + + for anchor, positive, negative, labels in loader: + anchor, positive, negative, labels = [t.to(model.device) for t in [anchor, positive, negative, labels]] + z_a, z_p, z_n = [model.encoder(t.unsqueeze(1)) for t in [anchor, positive, negative]] + + trip_loss = contrastive_loss(z_a, z_p, z_n, margin=model.margin) + ce = ce_loss_fn(model.classifier(z_a), labels) + loss = trip_loss + 0.5 * ce + + optimizer.zero_grad() + loss.backward() + optimizer.step() + total_loss += loss.item() + n_batches += 1 + + avg_loss = total_loss / max(n_batches, 1) + writer.add_scalar('loss', avg_loss, epoch) + + if verbose and (epoch + 1) % 10 == 0: + print(f"Epoch {epoch+1}/{epochs}: loss={avg_loss:.4f}") + if avg_loss < best_loss: + best_loss = avg_loss + + writer.close() + if verbose: + print(f"Done. Best={best_loss:.4f} TB:{RUNS_DIR}/train/{run_name}") + + return model, ref_mdp + + +def evaluate_loocv(input_dim: int = 64, embed_dim: int = 32, epochs_per_fold: int = 50, + lr: float = 1e-3, margin: float = 0.3, run_name: str = None): + """Leave-one-out cross-validation given limited samples""" + joint = JointLoader(human_dir, agent_dir) + data = joint.get_data() + session_ids = list(data.keys()) + + joint_model = JointBehaviorModel(human_dir, agent_dir) + ref_mdp = joint_model.build_MDP() + + run_name = run_name or f"loocv_d{input_dim}_e{embed_dim}_m{margin}_{datetime.now():%Y%m%d_%H%M%S}" + writer = SummaryWriter(f"{RUNS_DIR}/eval/{run_name}") + + predictions, actuals = [], [] + + for fold_idx, test_sid in enumerate(session_ids): + train_data = {k: v for k, v in data.items() if k != test_sid} + test_events = data[test_sid] + test_label = 0 if test_sid.startswith('human_') else 1 + + n_human = sum(1 for k in train_data if k.startswith('human_')) + n_agent = sum(1 for k in train_data if k.startswith('agent_')) + if n_human == 0 or n_agent == 0: + continue + + try: + dataset = TripletDataset(train_data, ref_mdp, augment_trajectory, input_dim=input_dim, multiplier=5) + loader = DataLoader(dataset, batch_size=2, shuffle=True, drop_last=True) + + model = ContrastiveWeakClassifier(input_dim=input_dim, embed_dim=embed_dim, margin=margin) + model.to_device() + optimizer = Adam(list(model.encoder.parameters()) + list(model.classifier.parameters()), lr=lr) + + model.encoder.train() + model.classifier.train() + for _ in range(epochs_per_fold): + for anchor, positive, negative, labels in loader: + z_a, z_p, z_n = [model.encoder(t.unsqueeze(1).to(model.device)) for t in [anchor, positive, negative]] + loss = contrastive_loss(z_a, z_p, z_n, margin=margin) + optimizer.zero_grad() + loss.backward() + optimizer.step() + + test_feat = featurize_trajectory(test_events, ref_mdp, input_dim) + pred = model.predict(test_feat.reshape(1, -1))[0] + predictions.append(pred) + actuals.append(test_label) + print(f" {test_sid[:12]}...: pred={pred}, actual={test_label}, {'OK' if pred == test_label else 'MISS'}") + + except Exception as e: + print(f"Error: {e}") + + if predictions: + acc = sum(p == a for p, a in zip(predictions, actuals)) / len(predictions) + tp = sum(1 for p, a in zip(predictions, actuals) if p == 1 and a == 1) + fp = sum(1 for p, a in zip(predictions, actuals) if p == 1 and a == 0) + fn = sum(1 for p, a in zip(predictions, actuals) if p == 0 and a == 1) + prec, rec = tp / max(tp + fp, 1), tp / max(tp + fn, 1) + f1 = 2 * prec * rec / max(prec + rec, 1e-10) + writer.add_scalar('accuracy', acc, 0) + writer.add_scalar('f1', f1, 0) + writer.add_scalar('precision', prec, 0) + writer.add_scalar('recall', rec, 0) + writer.close() + print(f"\nAccuracy: {acc:.2%} F1: {f1:.3f} TB:{RUNS_DIR}/eval/{run_name}") + return acc, predictions, actuals + writer.close() + return 0.0, [], [] + + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--mode', choices=['train', 'eval'], default='train') + parser.add_argument('--epochs', type=int, default=100) + parser.add_argument('--lr', type=float, default=1e-3) + parser.add_argument('--margin', type=float, default=0.3) + parser.add_argument('--input-dim', type=int, default=64) + parser.add_argument('--embed-dim', type=int, default=32) + parser.add_argument('--run-name', type=str, default=None) + args = parser.parse_args() + + if args.mode == 'train': + model, mdp = train(epochs=args.epochs, lr=args.lr, input_dim=args.input_dim, + embed_dim=args.embed_dim, margin=args.margin, run_name=args.run_name) + else: + evaluate_loocv(input_dim=args.input_dim, embed_dim=args.embed_dim, epochs_per_fold=args.epochs, + lr=args.lr, margin=args.margin, run_name=args.run_name) From ccc19f349385511e3b0e9f0cb3a9290c11095bb3 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Wed, 21 Jan 2026 18:22:39 +0100 Subject: [PATCH 31/35] acapting some architectures --- experiments/ml/__init__.py | 16 ++- experiments/ml/arch.py | 242 +++++++++++++++++++++++++++++++++++-- 2 files changed, 247 insertions(+), 11 deletions(-) diff --git a/experiments/ml/__init__.py b/experiments/ml/__init__.py index 11b65df..c97eaa9 100644 --- a/experiments/ml/__init__.py +++ b/experiments/ml/__init__.py @@ -1,11 +1,21 @@ from .evals import evaluate from .arch import ( XGBoostAgentClassifier, - LightGBMAgentClassifier + LightGBMAgentClassifier, + ContrastiveWeakClassifier, + TrajectoryEncoder, + WeakClassifier, + contrastive_loss, + featurize_trajectory, ) -__all__ =[ +__all__ = [ 'evaluate', 'XGBoostAgentClassifier', - 'LightGBMAgentClassifier' + 'LightGBMAgentClassifier', + 'ContrastiveWeakClassifier', + 'TrajectoryEncoder', + 'WeakClassifier', + 'contrastive_loss', + 'featurize_trajectory', ] diff --git a/experiments/ml/arch.py b/experiments/ml/arch.py index a187959..4ceb2e0 100644 --- a/experiments/ml/arch.py +++ b/experiments/ml/arch.py @@ -1,23 +1,249 @@ # sklearn compatible models for agent detection from sklearn.base import BaseEstimator, ClassifierMixin -from procesing.context import PipelineContext -from typing import Any, Optional, Tuple +from typing import Any, Optional, Tuple, Dict, List from abc import ABC, abstractmethod -import xgboost as xgb -import lightgbm as lgb +from collections import defaultdict import numpy as np import pandas as pd +import torch +import torch.nn as nn +import torch.nn.functional as F TASK = 'classification' LABELS = ['human', 'agent'] class WeakClassifier(BaseEstimator, ClassifierMixin, ABC): - # a simple contrastive machine learning model - # this model should learn to distinguish between human and agent behavior - # using a weakly supervised approach and contrastive learning + augmentation - # + # a simple contrastive machine learning model learns to distinguish human/agent behavior + # using weakly supervised contrastive learning + augmentation def __init__(self, **kwargs): super().__init__() self.model = None self.kwargs = kwargs + + +class TrajectoryEncoder(nn.Module): + """Encode variable-length event sequences to fixed-dim embedding via bidirectional LSTM""" + def __init__(self, input_dim: int, embed_dim: int = 32, hidden_dim: int = 64): + super().__init__() + self.event_embed = nn.Linear(input_dim, hidden_dim) + self.lstm = nn.LSTM(hidden_dim, hidden_dim, batch_first=True, bidirectional=True) + self.proj = nn.Linear(hidden_dim * 2, embed_dim) + + def forward(self, x: torch.Tensor) -> torch.Tensor: # x: (batch, seq_len, input_dim) + h = F.relu(self.event_embed(x)) + _, (hn, _) = self.lstm(h) + hn = torch.cat([hn[-2], hn[-1]], dim=1) # concat bidirectional hidden states + return F.normalize(self.proj(hn), dim=1) # L2 normalized + + +class ContrastiveWeakClassifier(WeakClassifier): + """Contrastive learning classifier for human/agent trajectory discrimination""" + def __init__(self, input_dim: int = 64, embed_dim: int = 32, margin: float = 1.0, **kwargs): + super().__init__(**kwargs) + self.input_dim = input_dim + self.embed_dim = embed_dim + self.margin = margin + self.encoder = TrajectoryEncoder(input_dim, embed_dim) + self.classifier = nn.Linear(embed_dim, 2) + self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + self._fitted = False + + def to_device(self): + self.encoder.to(self.device) + self.classifier.to(self.device) + return self + + def encode(self, x: torch.Tensor) -> torch.Tensor: + return self.encoder(x.to(self.device)) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + emb = self.encode(x) + return self.classifier(emb) + + def fit(self, X, y=None): # sklearn interface - actual training in weak.train.py + self._fitted = True + return self + + def predict(self, X: np.ndarray) -> np.ndarray: + self.encoder.eval() + self.classifier.eval() + with torch.no_grad(): + x = torch.tensor(X, dtype=torch.float32).unsqueeze(1).to(self.device) + logits = self.forward(x) + return torch.argmax(logits, dim=1).cpu().numpy() + + def predict_proba(self, X: np.ndarray) -> np.ndarray: + self.encoder.eval() + self.classifier.eval() + with torch.no_grad(): + x = torch.tensor(X, dtype=torch.float32).unsqueeze(1).to(self.device) + logits = self.forward(x) + return F.softmax(logits, dim=1).cpu().numpy() + + +def contrastive_loss(anchor: torch.Tensor, positive: torch.Tensor, negative: torch.Tensor, margin: float = 0.3) -> torch.Tensor: + """Triplet loss using cosine similarity (for L2-normalized embeddings). margin in [0,1] range.""" + pos_sim = F.cosine_similarity(anchor, positive) # higher = more similar + neg_sim = F.cosine_similarity(anchor, negative) + return F.relu(neg_sim - pos_sim + margin).mean() # want pos_sim > neg_sim + margin + + +def nt_xent_loss(z_i: torch.Tensor, z_j: torch.Tensor, temperature: float = 0.5) -> torch.Tensor: + """Normalized temperature-scaled cross entropy loss (SimCLR style)""" + batch_size = z_i.size(0) + z = torch.cat([z_i, z_j], dim=0) # (2N, embed_dim) + sim = F.cosine_similarity(z.unsqueeze(1), z.unsqueeze(0), dim=2) / temperature + mask = torch.eye(2 * batch_size, dtype=torch.bool, device=z.device) + sim.masked_fill_(mask, -float('inf')) + labels = torch.arange(batch_size, device=z.device) + labels = torch.cat([labels + batch_size, labels]) # positive pairs + return F.cross_entropy(sim, labels) + + +# feature extraction utilities for trajectory -> feature vector +def transition_histogram(events: List, state_fn, max_states: int = 50) -> np.ndarray: + """Compute normalized histogram of state transitions in trajectory""" + if len(events) < 2: + return np.zeros(max_states) + states = [state_fn(e) for e in events] + trans_counts = defaultdict(int) + for s, s_next in zip(states, states[1:]): + trans_counts[(s, s_next)] += 1 + total = sum(trans_counts.values()) + hist = np.array(list(trans_counts.values())[:max_states], dtype=np.float32) + hist = np.pad(hist, (0, max(0, max_states - len(hist)))) + return hist / (total + 1e-10) + + +def temporal_signature(events: List, ts_fn) -> np.ndarray: + """Extract temporal features: mean/std/skew of inter-event times""" + if len(events) < 2: + return np.zeros(4, dtype=np.float32) + times = sorted([ts_fn(e) for e in events]) + diffs = np.diff(times).astype(np.float32) + if len(diffs) == 0: + return np.zeros(4, dtype=np.float32) + mean_dt, std_dt = np.mean(diffs), np.std(diffs) + 1e-10 + skew = np.mean(((diffs - mean_dt) / std_dt) ** 3) if std_dt > 1e-8 else 0.0 + return np.array([mean_dt, std_dt, skew, len(diffs)], dtype=np.float32) + + +def state_coverage(events: List, state_fn, mdp_states: set) -> float: + """Fraction of MDP states visited by trajectory""" + if not mdp_states: + return 0.0 + visited = set(state_fn(e) for e in events) + return len(visited & mdp_states) / len(mdp_states) + + +def transition_entropy(events: List, state_fn) -> float: + """Compute entropy of transition distribution (randomness of navigation)""" + if len(events) < 2: + return 0.0 + states = [state_fn(e) for e in events] + trans_counts = defaultdict(int) + for s, s_next in zip(states, states[1:]): + trans_counts[(s, s_next)] += 1 + total = sum(trans_counts.values()) + probs = [c / total for c in trans_counts.values()] + return -sum(p * np.log(p + 1e-10) for p in probs) + + +def featurize_trajectory(events: List, mdp: Optional[Dict] = None, input_dim: int = 64) -> np.ndarray: + """Convert trajectory to fixed-dim feature vector""" + def _state_repr(e): + return f"{getattr(e, 'page', None) or 'unk'}|{getattr(e, 'productId', None) or 'none'}|{e.eventName}" + + def _ts_fn(e): + ts = getattr(e, 'ts', None) + if isinstance(ts, str): + from datetime import datetime + try: + return datetime.fromisoformat(ts.replace('Z', '+00:00')).timestamp() + except: + return 0.0 + return float(ts) if ts else 0.0 + + feats = [] + feats.extend(transition_histogram(events, _state_repr, max_states=40)) # 40 dims + feats.extend(temporal_signature(events, _ts_fn)) # 4 dims + mdp_states = set(mdp.get('states', [])) if mdp else set() + feats.append(state_coverage(events, _state_repr, mdp_states)) # 1 dim + feats.append(transition_entropy(events, _state_repr)) # 1 dim + feats.append(len(events)) # trajectory length + feats.append(len(set(_state_repr(e) for e in events))) # unique states + + # event type distribution (page_view, hover, cart, purchase indicators) + event_names = [e.eventName for e in events] + feats.append(sum(1 for n in event_names if 'page' in n.lower()) / (len(events) + 1)) + feats.append(sum(1 for n in event_names if 'hover' in n.lower()) / (len(events) + 1)) + feats.append(sum(1 for n in event_names if 'cart' in n.lower()) / (len(events) + 1)) + feats.append(sum(1 for n in event_names if 'purchase' in n.lower() or 'checkout' in n.lower()) / (len(events) + 1)) + + # pad/truncate to input_dim + feats = np.array(feats[:input_dim], dtype=np.float32) + if len(feats) < input_dim: + feats = np.pad(feats, (0, input_dim - len(feats))) + return feats + + +# gradient boosting classifiers for comparison baselines +class XGBoostAgentClassifier(BaseEstimator, ClassifierMixin): + """XGBoost classifier for human/agent detection from session features""" + def __init__(self, n_estimators: int = 100, max_depth: int = 6, learning_rate: float = 0.1, **kwargs): + self.n_estimators = n_estimators + self.max_depth = max_depth + self.learning_rate = learning_rate + self.model = None + self.kwargs = kwargs + + def fit(self, X: np.ndarray, y: np.ndarray): + try: + import xgboost as xgb + self.model = xgb.XGBClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth, + learning_rate=self.learning_rate, **self.kwargs) + self.model.fit(X, y) + except ImportError: + raise ImportError("xgboost required for XGBoostAgentClassifier") + return self + + def predict(self, X: np.ndarray) -> np.ndarray: + if self.model is None: + raise ValueError("fit the model first") + return self.model.predict(X) + + def predict_proba(self, X: np.ndarray) -> np.ndarray: + if self.model is None: + raise ValueError("fit the model first") + return self.model.predict_proba(X) + + +class LightGBMAgentClassifier(BaseEstimator, ClassifierMixin): + """LightGBM classifier for human/agent detection from session features""" + def __init__(self, n_estimators: int = 100, max_depth: int = -1, learning_rate: float = 0.1, **kwargs): + self.n_estimators = n_estimators + self.max_depth = max_depth + self.learning_rate = learning_rate + self.model = None + self.kwargs = kwargs + + def fit(self, X: np.ndarray, y: np.ndarray): + try: + import lightgbm as lgb + self.model = lgb.LGBMClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth, + learning_rate=self.learning_rate, verbose=-1, **self.kwargs) + self.model.fit(X, y) + except ImportError: + raise ImportError("lightgbm required for LightGBMAgentClassifier") + return self + + def predict(self, X: np.ndarray) -> np.ndarray: + if self.model is None: + raise ValueError("fit the model first") + return self.model.predict(X) + + def predict_proba(self, X: np.ndarray) -> np.ndarray: + if self.model is None: + raise ValueError("fit the model first") + return self.model.predict_proba(X) From 22a2c255bd23f8717275fe99a34a65253deed3c8 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Wed, 21 Jan 2026 19:11:54 +0100 Subject: [PATCH 32/35] chore: remove boilerplate --- web/src/app/page.tsx | 64 ++------------------------------------------ 1 file changed, 2 insertions(+), 62 deletions(-) diff --git a/web/src/app/page.tsx b/web/src/app/page.tsx index 295f8fd..c97c8ed 100644 --- a/web/src/app/page.tsx +++ b/web/src/app/page.tsx @@ -1,65 +1,5 @@ -import Image from "next/image"; +import { redirect } from 'next/navigation'; export default function Home() { - return ( - -- ); + redirect('/hotel'); } From ee70f02a1f2feae8c52f02aeb8d61837a0ad1787 Mon Sep 17 00:00:00 2001 From: Daniel Rosel- -- -- -- To get started, edit the page.tsx file. -
-- Looking for a starting point or more instructions? Head over to{" "} - - Templates - {" "} - or the{" "} - - Learning - {" "} - center. -
-Date: Wed, 21 Jan 2026 19:12:11 +0100 Subject: [PATCH 33/35] chore: export repeated methods into lib --- lib/__init__.py | 41 +++++++++++++++ lib/config.py | 65 +++++++++++++++++++++++ lib/features.py | 125 ++++++++++++++++++++++++++++++++++++++++++++ lib/kafka_client.py | 54 +++++++++++++++++++ lib/state.py | 72 +++++++++++++++++++++++++ 5 files changed, 357 insertions(+) create mode 100644 lib/__init__.py create mode 100644 lib/config.py create mode 100644 lib/features.py create mode 100755 lib/kafka_client.py create mode 100644 lib/state.py diff --git a/lib/__init__.py b/lib/__init__.py new file mode 100644 index 0000000..7f8ec2d --- /dev/null +++ b/lib/__init__.py @@ -0,0 +1,41 @@ +"""PHANTOM shared library +Exports unified utilities for features, state, config, kafka, and model registry +""" +from .config import ( + PROJECT_ROOT, DATA_DIR, EXPERIMENTS_DIR, + AGENT_DATA_DIR, HUMAN_DATA_DIR, SIM_RUNS_DIR, MODEL_REGISTRY_DIR, + COLLECTED_DATA_DIR, NOTEBOOK_OUTPUT_DIR, + ensure_dir, get_data_path, get_experiments_path, get_sim_path, + KAFKA_HOST, KAFKA_PORT, KAFKA_BROKER, + REDIS_HOST, REDIS_PORT, + SUPABASE_URL, SUPABASE_ANON_KEY, + BACKEND_PORT, PROVIDER_PORT +) +from .state import ( + make_state_repr, event_to_state, parse_state, + get_event_name, get_timestamp, + create_state_fn, create_event_name_fn, create_timestamp_fn +) +from .features import ( + transition_histogram, temporal_signature, state_coverage, transition_entropy, + event_type_distribution, featurize_trajectory, parse_timestamp +) + +__all__ = [ + # config + 'PROJECT_ROOT', 'DATA_DIR', 'EXPERIMENTS_DIR', + 'AGENT_DATA_DIR', 'HUMAN_DATA_DIR', 'SIM_RUNS_DIR', 'MODEL_REGISTRY_DIR', + 'COLLECTED_DATA_DIR', 'NOTEBOOK_OUTPUT_DIR', + 'ensure_dir', 'get_data_path', 'get_experiments_path', 'get_sim_path', + 'KAFKA_HOST', 'KAFKA_PORT', 'KAFKA_BROKER', + 'REDIS_HOST', 'REDIS_PORT', + 'SUPABASE_URL', 'SUPABASE_ANON_KEY', + 'BACKEND_PORT', 'PROVIDER_PORT', + # state + 'make_state_repr', 'event_to_state', 'parse_state', + 'get_event_name', 'get_timestamp', + 'create_state_fn', 'create_event_name_fn', 'create_timestamp_fn', + # features + 'transition_histogram', 'temporal_signature', 'state_coverage', 'transition_entropy', + 'event_type_distribution', 'featurize_trajectory', 'parse_timestamp', +] diff --git a/lib/config.py b/lib/config.py new file mode 100644 index 0000000..a27ffd9 --- /dev/null +++ b/lib/config.py @@ -0,0 +1,65 @@ +"""Unified path configuration for PHANTOM project +All hardcoded paths should reference this module +Paths can be overridden via environment variables +""" +import os +from pathlib import Path + +# project root (directory containing lib/, experiments/, sim/, web/, backend/) +PROJECT_ROOT = Path(__file__).parent.parent.resolve() + +# data directories +DATA_DIR = Path(os.getenv('PHANTOM_DATA_DIR', PROJECT_ROOT / 'data')) +EXPERIMENTS_DIR = Path(os.getenv('PHANTOM_EXPERIMENTS_DIR', PROJECT_ROOT / 'experiments')) + +# agent/human interaction data +AGENT_DATA_DIR = Path(os.getenv('PHANTOM_AGENT_DATA_DIR', DATA_DIR / 'agents')) +HUMAN_DATA_DIR = Path(os.getenv('PHANTOM_HUMAN_DATA_DIR', DATA_DIR / 'humans')) + +# RL simulation runs +SIM_RUNS_DIR = Path(os.getenv('PHANTOM_SIM_RUNS_DIR', PROJECT_ROOT / 'sim' / 'rl' / 'runs')) + +# model artifacts +MODEL_REGISTRY_DIR = Path(os.getenv('PHANTOM_MODEL_REGISTRY_DIR', DATA_DIR / 'models')) + +# collected experiment data +COLLECTED_DATA_DIR = Path(os.getenv('PHANTOM_COLLECTED_DATA_DIR', EXPERIMENTS_DIR / 'agents' / 'collected_data')) + +# notebook outputs +NOTEBOOK_OUTPUT_DIR = Path(os.getenv('PHANTOM_NOTEBOOK_OUTPUT_DIR', EXPERIMENTS_DIR / 'notebooks' / 'outputs')) + + +def ensure_dir(path: Path) -> Path: + """ensure directory exists, create if needed""" + path.mkdir(parents=True, exist_ok=True) + return path + + +def get_data_path(*parts: str) -> Path: + """construct path relative to DATA_DIR""" + return DATA_DIR.joinpath(*parts) + + +def get_experiments_path(*parts: str) -> Path: + """construct path relative to EXPERIMENTS_DIR""" + return EXPERIMENTS_DIR.joinpath(*parts) + + +def get_sim_path(*parts: str) -> Path: + """construct path relative to SIM_RUNS_DIR""" + return SIM_RUNS_DIR.joinpath(*parts) + + +# service configuration (from .env) +KAFKA_HOST = os.getenv('KAFKA_HOST', 'localhost') +KAFKA_PORT = os.getenv('KAFKA_PORT', '9092') +KAFKA_BROKER = f"{KAFKA_HOST}:{KAFKA_PORT}" + +REDIS_HOST = os.getenv('REDIS_HOST', 'localhost') +REDIS_PORT = int(os.getenv('REDIS_PORT', '6379')) + +SUPABASE_URL = os.getenv('NEXT_PUBLIC_SUPABASE_URL', '') +SUPABASE_ANON_KEY = os.getenv('NEXT_PUBLIC_SUPABASE_ANON_KEY', '') + +BACKEND_PORT = int(os.getenv('BACKEND_PORT', '5000')) +PROVIDER_PORT = int(os.getenv('PROVIDER_PORT', '5001')) diff --git a/lib/features.py b/lib/features.py new file mode 100644 index 0000000..f2d88f5 --- /dev/null +++ b/lib/features.py @@ -0,0 +1,125 @@ +"""Unified featurization utilities for trajectory -> feature vector conversion +Used by both experiments/ml/ and sim/rl/ components +""" +import numpy as np +from collections import defaultdict +from typing import List, Dict, Callable, Optional, Any, Set +from datetime import datetime + + +def transition_histogram(events: List, state_fn: Callable, max_states: int = 50) -> np.ndarray: + """compute normalized histogram of state transitions in trajectory + events: list of event objects/dicts + state_fn: function mapping event -> state string + max_states: maximum dimensions for histogram + """ + if len(events) < 2: + return np.zeros(max_states, dtype=np.float32) + states = [state_fn(e) for e in events] + trans_counts = defaultdict(int) + for s, s_next in zip(states, states[1:]): + trans_counts[(s, s_next)] += 1 + total = sum(trans_counts.values()) + hist = np.array(list(trans_counts.values())[:max_states], dtype=np.float32) + hist = np.pad(hist, (0, max(0, max_states - len(hist)))) + return hist / (total + 1e-10) + + +def temporal_signature(events: List, ts_fn: Callable) -> np.ndarray: + """extract temporal features: mean/std/skew of inter-event times plus count + events: list of event objects/dicts + ts_fn: function mapping event -> timestamp (float seconds) + returns: [mean_dt, std_dt, skew, n_intervals] array + """ + if len(events) < 2: + return np.zeros(4, dtype=np.float32) + times = sorted([ts_fn(e) for e in events]) + diffs = np.diff(times).astype(np.float32) + if len(diffs) == 0: + return np.zeros(4, dtype=np.float32) + mean_dt, std_dt = np.mean(diffs), np.std(diffs) + 1e-10 + skew = np.mean(((diffs - mean_dt) / std_dt) ** 3) if std_dt > 1e-8 else 0.0 + return np.array([mean_dt, std_dt, skew, len(diffs)], dtype=np.float32) + + +def state_coverage(events: List, state_fn: Callable, mdp_states: Set[str]) -> float: + """fraction of MDP states visited by trajectory + events: list of event objects/dicts + state_fn: function mapping event -> state string + mdp_states: set of all possible MDP states + """ + if not mdp_states: + return 0.0 + visited = set(state_fn(e) for e in events) + return len(visited & mdp_states) / len(mdp_states) + + +def transition_entropy(events: List, state_fn: Callable) -> float: + """compute entropy of transition distribution (randomness of navigation) + higher entropy = more random browsing pattern + """ + if len(events) < 2: + return 0.0 + states = [state_fn(e) for e in events] + trans_counts = defaultdict(int) + for s, s_next in zip(states, states[1:]): + trans_counts[(s, s_next)] += 1 + total = sum(trans_counts.values()) + probs = [c / total for c in trans_counts.values()] + return -sum(p * np.log(p + 1e-10) for p in probs) + + +def event_type_distribution(events: List, event_name_fn: Callable) -> np.ndarray: + """compute proportions of different event type categories + returns: [page_view_ratio, hover_ratio, cart_ratio, purchase_ratio] + """ + if not events: + return np.zeros(4, dtype=np.float32) + n = len(events) + names = [event_name_fn(e).lower() for e in events] + return np.array([ + sum(1 for nm in names if 'page' in nm or 'view' in nm) / n, + sum(1 for nm in names if 'hover' in nm) / n, + sum(1 for nm in names if 'cart' in nm) / n, + sum(1 for nm in names if 'purchase' in nm or 'checkout' in nm) / n + ], dtype=np.float32) + + +def featurize_trajectory(events: List, state_fn: Callable, ts_fn: Callable, + event_name_fn: Callable, mdp_states: Optional[Set[str]] = None, + output_dim: int = 64) -> np.ndarray: + """convert trajectory to fixed-dimension feature vector + events: list of event objects/dicts + state_fn: function mapping event -> state string + ts_fn: function mapping event -> timestamp (float) + event_name_fn: function mapping event -> event name string + mdp_states: optional set of all MDP states for coverage calculation + output_dim: desired output dimension (will pad/truncate) + """ + feats = [] + feats.extend(transition_histogram(events, state_fn, max_states=40)) # 40 dims + feats.extend(temporal_signature(events, ts_fn)) # 4 dims + feats.append(state_coverage(events, state_fn, mdp_states or set())) # 1 dim + feats.append(transition_entropy(events, state_fn)) # 1 dim + feats.append(float(len(events))) # trajectory length + feats.append(float(len(set(state_fn(e) for e in events)))) # unique states + feats.extend(event_type_distribution(events, event_name_fn)) # 4 dims + + feats = np.array(feats[:output_dim], dtype=np.float32) + if len(feats) < output_dim: + feats = np.pad(feats, (0, output_dim - len(feats))) + return feats + + +def parse_timestamp(ts: Any) -> float: + """parse various timestamp formats to float seconds""" + if ts is None: + return 0.0 + if isinstance(ts, (int, float)): + return float(ts) + if isinstance(ts, str): + try: + return datetime.fromisoformat(ts.replace('Z', '+00:00')).timestamp() + except ValueError: + return 0.0 + return 0.0 diff --git a/lib/kafka_client.py b/lib/kafka_client.py new file mode 100755 index 0000000..d61cd9e --- /dev/null +++ b/lib/kafka_client.py @@ -0,0 +1,54 @@ +from kafka import KafkaConsumer +import json +import os +from dotenv import load_dotenv +load_dotenv() + +def get_interactions( + topic='user-interactions', + bootstrap_servers=None, + from_beginning=True, + max_records=None, + timeout_ms=5000 +): + """Consume interaction events from Kafka. + + Args: + topic: Kafka topic name + bootstrap_servers: Kafka broker address (default from env) + from_beginning: Start from earliest offset if True + max_records: Max number of records to fetch (None = all available) + timeout_ms: Consumer poll timeout + + Returns: + List of parsed interaction event dicts + """ + if not bootstrap_servers: + host = os.getenv('KAFKA_HOST', 'localhost') + port = os.getenv('KAFKA_PORT', '9092') + bootstrap_servers = f'{host}:{port}' + + consumer = KafkaConsumer( + topic, + bootstrap_servers=bootstrap_servers, + auto_offset_reset='earliest' if from_beginning else 'latest', + enable_auto_commit=False, + value_deserializer=lambda m: json.loads(m.decode('utf-8')), + consumer_timeout_ms=timeout_ms + ) + + events = [] + try: + for msg in consumer: + events.append(msg.value) + if max_records and len(events) >= max_records: + break + finally: + consumer.close() + + return events + +if __name__ == '__main__': + interactions = get_interactions(max_records=10) + for event in interactions: + print(event) diff --git a/lib/state.py b/lib/state.py new file mode 100644 index 0000000..cfb4251 --- /dev/null +++ b/lib/state.py @@ -0,0 +1,72 @@ +"""Unified state representation utilities for MDP state encoding +Used by both experiments/ and sim/ components for consistent state handling +""" +from typing import Any, Callable + + +def make_state_repr(page: str = None, product_id: str = None, event_name: str = None) -> str: + """create canonical state representation string from components + format: page|productId|eventName + """ + p = page or 'unk' + pid = product_id or 'none' + en = event_name or 'unknown' + return f"{p}|{pid}|{en}" + + +def event_to_state(evt: Any) -> str: + """convert event object/dict to state string + supports both object attributes and dict keys + """ + if isinstance(evt, dict): + return make_state_repr( + page=evt.get('page'), + product_id=evt.get('productId'), + event_name=evt.get('eventName') or evt.get('event_type') + ) + return make_state_repr( + page=getattr(evt, 'page', None), + product_id=getattr(evt, 'productId', None), + event_name=getattr(evt, 'eventName', None) or getattr(evt, 'event_type', None) + ) + + +def parse_state(state_str: str) -> dict: + """parse state string back to components + returns: {'page': str, 'productId': str, 'eventName': str} + """ + parts = state_str.split('|') + return { + 'page': parts[0] if len(parts) > 0 and parts[0] != 'unk' else None, + 'productId': parts[1] if len(parts) > 1 and parts[1] != 'none' else None, + 'eventName': parts[2] if len(parts) > 2 and parts[2] != 'unknown' else None + } + + +def get_event_name(evt: Any) -> str: + """extract event name from event object/dict""" + if isinstance(evt, dict): + return evt.get('eventName') or evt.get('event_type') or '' + return getattr(evt, 'eventName', None) or getattr(evt, 'event_type', None) or '' + + +def get_timestamp(evt: Any) -> Any: + """extract timestamp from event object/dict""" + if isinstance(evt, dict): + return evt.get('ts') or evt.get('timestamp') + return getattr(evt, 'ts', None) or getattr(evt, 'timestamp', None) + + +def create_state_fn() -> Callable: + """factory for state representation function""" + return event_to_state + + +def create_event_name_fn() -> Callable: + """factory for event name extraction function""" + return get_event_name + + +def create_timestamp_fn() -> Callable: + """factory for timestamp extraction function (returns raw value, use features.parse_timestamp to convert)""" + return get_timestamp From 0f5f8affab007789dcfad9aea52cf4f2791b41f1 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Wed, 21 Jan 2026 19:12:35 +0100 Subject: [PATCH 34/35] chore: make lib backwards compatible --- experiments/ml/arch.py | 91 +++++----------- sim/rl/behavior_loader/models.py | 12 +++ sim/rl/environment.py | 175 +++++++++++++++---------------- 3 files changed, 126 insertions(+), 152 deletions(-) diff --git a/experiments/ml/arch.py b/experiments/ml/arch.py index 4ceb2e0..1fa4f96 100644 --- a/experiments/ml/arch.py +++ b/experiments/ml/arch.py @@ -8,6 +8,20 @@ import pandas as pd import torch import torch.nn as nn import torch.nn.functional as F +import sys +from pathlib import Path + +# add lib to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'lib')) +from lib.features import ( + transition_histogram as _lib_transition_histogram, + temporal_signature as _lib_temporal_signature, + state_coverage as _lib_state_coverage, + transition_entropy as _lib_transition_entropy, + featurize_trajectory as _lib_featurize_trajectory, + parse_timestamp +) +from lib.state import event_to_state, get_event_name, get_timestamp TASK = 'classification' LABELS = ['human', 'agent'] @@ -101,91 +115,40 @@ def nt_xent_loss(z_i: torch.Tensor, z_j: torch.Tensor, temperature: float = 0.5) return F.cross_entropy(sim, labels) -# feature extraction utilities for trajectory -> feature vector +# feature extraction utilities - delegating to lib.features for unified implementation +# these wrappers maintain backwards compatibility for existing imports + def transition_histogram(events: List, state_fn, max_states: int = 50) -> np.ndarray: """Compute normalized histogram of state transitions in trajectory""" - if len(events) < 2: - return np.zeros(max_states) - states = [state_fn(e) for e in events] - trans_counts = defaultdict(int) - for s, s_next in zip(states, states[1:]): - trans_counts[(s, s_next)] += 1 - total = sum(trans_counts.values()) - hist = np.array(list(trans_counts.values())[:max_states], dtype=np.float32) - hist = np.pad(hist, (0, max(0, max_states - len(hist)))) - return hist / (total + 1e-10) + return _lib_transition_histogram(events, state_fn, max_states) def temporal_signature(events: List, ts_fn) -> np.ndarray: """Extract temporal features: mean/std/skew of inter-event times""" - if len(events) < 2: - return np.zeros(4, dtype=np.float32) - times = sorted([ts_fn(e) for e in events]) - diffs = np.diff(times).astype(np.float32) - if len(diffs) == 0: - return np.zeros(4, dtype=np.float32) - mean_dt, std_dt = np.mean(diffs), np.std(diffs) + 1e-10 - skew = np.mean(((diffs - mean_dt) / std_dt) ** 3) if std_dt > 1e-8 else 0.0 - return np.array([mean_dt, std_dt, skew, len(diffs)], dtype=np.float32) + return _lib_temporal_signature(events, ts_fn) def state_coverage(events: List, state_fn, mdp_states: set) -> float: """Fraction of MDP states visited by trajectory""" - if not mdp_states: - return 0.0 - visited = set(state_fn(e) for e in events) - return len(visited & mdp_states) / len(mdp_states) + return _lib_state_coverage(events, state_fn, mdp_states) def transition_entropy(events: List, state_fn) -> float: """Compute entropy of transition distribution (randomness of navigation)""" - if len(events) < 2: - return 0.0 - states = [state_fn(e) for e in events] - trans_counts = defaultdict(int) - for s, s_next in zip(states, states[1:]): - trans_counts[(s, s_next)] += 1 - total = sum(trans_counts.values()) - probs = [c / total for c in trans_counts.values()] - return -sum(p * np.log(p + 1e-10) for p in probs) + return _lib_transition_entropy(events, state_fn) def featurize_trajectory(events: List, mdp: Optional[Dict] = None, input_dim: int = 64) -> np.ndarray: - """Convert trajectory to fixed-dim feature vector""" - def _state_repr(e): - return f"{getattr(e, 'page', None) or 'unk'}|{getattr(e, 'productId', None) or 'none'}|{e.eventName}" + """Convert trajectory to fixed-dim feature vector - uses lib.features implementation""" + mdp_states = set(mdp.get('states', [])) if mdp else set() def _ts_fn(e): - ts = getattr(e, 'ts', None) - if isinstance(ts, str): - from datetime import datetime - try: - return datetime.fromisoformat(ts.replace('Z', '+00:00')).timestamp() - except: - return 0.0 - return float(ts) if ts else 0.0 + return parse_timestamp(get_timestamp(e)) - feats = [] - feats.extend(transition_histogram(events, _state_repr, max_states=40)) # 40 dims - feats.extend(temporal_signature(events, _ts_fn)) # 4 dims - mdp_states = set(mdp.get('states', [])) if mdp else set() - feats.append(state_coverage(events, _state_repr, mdp_states)) # 1 dim - feats.append(transition_entropy(events, _state_repr)) # 1 dim - feats.append(len(events)) # trajectory length - feats.append(len(set(_state_repr(e) for e in events))) # unique states + def _event_name_fn(e): + return get_event_name(e) - # event type distribution (page_view, hover, cart, purchase indicators) - event_names = [e.eventName for e in events] - feats.append(sum(1 for n in event_names if 'page' in n.lower()) / (len(events) + 1)) - feats.append(sum(1 for n in event_names if 'hover' in n.lower()) / (len(events) + 1)) - feats.append(sum(1 for n in event_names if 'cart' in n.lower()) / (len(events) + 1)) - feats.append(sum(1 for n in event_names if 'purchase' in n.lower() or 'checkout' in n.lower()) / (len(events) + 1)) - - # pad/truncate to input_dim - feats = np.array(feats[:input_dim], dtype=np.float32) - if len(feats) < input_dim: - feats = np.pad(feats, (0, input_dim - len(feats))) - return feats + return _lib_featurize_trajectory(events, event_to_state, _ts_fn, _event_name_fn, mdp_states, input_dim) # gradient boosting classifiers for comparison baselines diff --git a/sim/rl/behavior_loader/models.py b/sim/rl/behavior_loader/models.py index 4c6bf21..3530724 100644 --- a/sim/rl/behavior_loader/models.py +++ b/sim/rl/behavior_loader/models.py @@ -6,6 +6,18 @@ from collections import defaultdict from typing import Dict, List, Tuple, Set import numpy as np import graphviz +import sys +from pathlib import Path + +# import lib utilities for optional use - models keep their own _state_repr for backwards compat +# with the specific event structure (evt.value.payload) +sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / 'lib')) +try: + from lib.state import make_state_repr as lib_make_state_repr + from lib.features import transition_histogram as lib_transition_histogram +except ImportError: + lib_make_state_repr = None + lib_transition_histogram = None class BehaviorModel: def __init__(self, src_dir: str, loader_cls=Loader): diff --git a/sim/rl/environment.py b/sim/rl/environment.py index fd725f8..d9ccbcb 100644 --- a/sim/rl/environment.py +++ b/sim/rl/environment.py @@ -1,7 +1,5 @@ -from sys import intern import gymnasium as gym from gymnasium import spaces -from matplotlib import interactive import numpy as np from dataclasses import dataclass import pandas as pd @@ -15,7 +13,7 @@ class BusinessLogicConstraints(): max_price_adjustment: float = 0.30 system_max_price: float = 500.0 system_min_price: float = 1.0 - product_catelogue_size: int = 100 + product_catalogue_size: int = 100 episode_length: int = 200 sessions_per_step: int = 250 agent_share: float = 0.25 @@ -37,17 +35,42 @@ class BusinessLogicConstraints(): def _sigmoid(x: np.ndarray) -> np.ndarray: return 1.0 / (1.0 + np.exp(-x)) +class BehavioralProfile: + """simple markov chain model for generating synthetic interaction events""" + def __init__(self, actor: str, purchase_probs: np.ndarray): + self.actor = actor + self.purchase_probs = purchase_probs + self.states = ['view', 'cart', 'checkout'] + # transition matrix: view->cart 0.3, view->view 0.6, view->exit 0.1, cart->checkout 0.5, cart->view 0.4, cart->exit 0.1 + self.trans = {'view': {'view': 0.6, 'cart': 0.3, 'exit': 0.1}, 'cart': {'checkout': 0.5, 'view': 0.4, 'exit': 0.1}, 'checkout': {'exit': 1.0}} + if actor == 'agents': # agents browse more before purchasing + self.trans['view'] = {'view': 0.75, 'cart': 0.15, 'exit': 0.1} + self.trans['cart'] = {'checkout': 0.3, 'view': 0.6, 'exit': 0.1} + + def sample(self, rng: np.random.Generator) -> Dict[str, Any]: + """sample single interaction event""" + product_idx = rng.integers(0, len(self.purchase_probs)) + state = 'view' # always start with view + # pick next state based on transition probs + trans = self.trans.get(state, {'exit': 1.0}) + next_state = rng.choice(list(trans.keys()), p=list(trans.values())) + price_paid = 0.0 if next_state != 'checkout' else float(rng.uniform(50, 200)) + return {'action': state, 'product_idx': product_idx, 'actor': 'agent' if self.actor == 'agents' else 'human', 't': 0.0, 'price_paid': price_paid} + + +def _load_behavioral_profile(actor: str, demand_forcing: np.ndarray) -> BehavioralProfile: + """returns a behavioral profile for generating synthetic sessions + actor: 'humans' or 'agents' + demand_forcing: per-product purchase probabilities used to weight interactions + """ + return BehavioralProfile(actor, demand_forcing) + + class CommercePlatform: - """ - This is just an extension of the state management for the environment, it does not implement anything dynamic just helps us simulate demand. - """ - def __init__(self, - product_catelogue_size: int, - max_price: float, - min_price: float, - constraints: BusinessLogicConstraints): - self.product_catelogue_size = product_catelogue_size - self.product_supply = np.random.uniform(low=10, high=50, size=(self.product_catelogue_size,)) + """state management for the environment, simulates demand""" + def __init__(self, product_catalogue_size: int, max_price: float, min_price: float, constraints: BusinessLogicConstraints): + self.product_catalogue_size = product_catalogue_size + self.product_supply = np.random.uniform(low=10, high=50, size=(self.product_catalogue_size,)) self.max_price = max_price self.min_price = min_price self.constraints = constraints @@ -55,27 +78,12 @@ class CommercePlatform: self._rng = np.random.default_rng(constraints.seed) self._last_interaction_df: pd.DataFrame = pd.DataFrame() - def setup_true_demand(self, prices: np.ndarray) -> Dict[str, np.ndarray]: - # ground truth purchase propensities p = np.clip(prices, self.min_price, self.max_price) pn = p / self.max_price human_prob = self.constraints.base_human_demand * (pn ** self.constraints.human_price_elasticity) agent_prob = self.constraints.base_agent_demand * (pn ** self.constraints.agent_price_elasticity) - return { - "human_purchase_prob": np.clip(human_prob, 0.0, 0.95), - "agent_purchase_prob": np.clip(agent_prob, 0.0, 0.95) - } - - def _load_behavioral_profile(actor : str, demand_forcing): - """ - This returns a markov chain with average weights which we get from interaction data of our experiments. - This defines transition probabilities between different events: - search -> view_item_price_binN: 0.7 - view_item_price_binN -> add_to_cart: 0.2 - we also must reweight with the demand_forcing vector or purchase probabilities per-product - """ - + return {"human_purchase_prob": np.clip(human_prob, 0.0, 0.95), "agent_purchase_prob": np.clip(agent_prob, 0.0, 0.95)} def _simulate_sessions(self, base_prices: np.ndarray) -> pd.DataFrame: demand = self.setup_true_demand(base_prices) @@ -162,22 +170,22 @@ class PHANTOMEnv(gym.Env): self.constraints = BusinessLogicConstraints() self.action_space = spaces.Box(low=-self.constraints.max_price_adjustment, high=self.constraints.max_price_adjustment, - shape=(self.constraints.product_catelogue_size,), dtype=np.float32) + shape=(self.constraints.product_catalogue_size,), dtype=np.float32) self.observation_space = spaces.Dict({ "elasticity": spaces.Dict({ "price": spaces.Box( - low=np.full((self.constraints.product_catelogue_size,), self.constraints.system_min_price, dtype=np.float32), - high=np.full((self.constraints.product_catelogue_size,), self.constraints.system_max_price, dtype=np.float32), + low=np.full((self.constraints.product_catalogue_size,), self.constraints.system_min_price, dtype=np.float32), + high=np.full((self.constraints.product_catalogue_size,), self.constraints.system_max_price, dtype=np.float32), dtype=np.float32), "demand": spaces.Box( - low=np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32), - high=np.full((self.constraints.product_catelogue_size,), 1e6, dtype=np.float32), + low=np.zeros((self.constraints.product_catalogue_size,), dtype=np.float32), + high=np.full((self.constraints.product_catalogue_size,), 1e6, dtype=np.float32), dtype=np.float32), }) # TODO: define more features that we compute from the interaction data }) self.commerce_platform = CommercePlatform( - product_catelogue_size=self.constraints.product_catelogue_size, + product_catalogue_size=self.constraints.product_catalogue_size, max_price=self.constraints.system_max_price, min_price=self.constraints.system_min_price, constraints=self.constraints) @@ -192,12 +200,12 @@ class PHANTOMEnv(gym.Env): self._rng = np.random.default_rng(seed) self.commerce_platform._rng = np.random.default_rng(seed) self.t = 0 - init_prices = self._rng.uniform(low=60.0, high=140.0, size=(self.constraints.product_catelogue_size,)).astype(np.float32) + init_prices = self._rng.uniform(low=60.0, high=140.0, size=(self.constraints.product_catalogue_size,)).astype(np.float32) self._prev_prices = init_prices.copy() self.state = { "elasticity": { "price": init_prices, - "demand": np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32), + "demand": np.zeros((self.constraints.product_catalogue_size,), dtype=np.float32), } } return self.state, {} @@ -210,38 +218,35 @@ class PHANTOMEnv(gym.Env): self.constraints.system_max_price).astype(np.float32) self.state["elasticity"]["price"] = new_prices - # TODO: use the commerce platform to simulate sessions interactions_df = self.commerce_platform._simulate_sessions(new_prices) result = self.commerce_platform.compute_interaction_features(interactions_df) - # TODO: implement COI computation to use in reward - COI = 0.0 + COI = 0.0 # TODO: implement cost-of-information computation volatility = 0.0 if self._prev_prices is None else \ float(np.mean(np.abs((new_prices - self._prev_prices) / (self._prev_prices + 1e-6)))) self._prev_prices = new_prices.copy() - revenue_observed = float(result["revenue_observed"]) - agent_loss = float(result["agent_loss"]) + # extract metrics with safe defaults for incomplete simulation + revenue_observed = float(result.get("revenue_observed", result.get("mean_sale_price", 0.0))) + agent_loss = float(result.get("agent_loss", 0.0)) reward = (revenue_observed - COI - self.constraints.w_agent_loss * agent_loss - self.constraints.w_volatility * volatility - - self.constraints.w_estimation_error - ) + - self.constraints.w_estimation_error) terminated = self.t >= self.constraints.episode_length info = { "t": self.t, "revenue_observed": revenue_observed, - "revenue_oracle": float(result["revenue_oracle"]), + "revenue_oracle": float(result.get("revenue_oracle", revenue_observed)), "agent_loss": agent_loss, "ux_volatility": volatility, - "mean_internal_error": err_mean, - "look_to_book": float(result["interaction_features"].get("look_to_book", 0.0)), - "mean_sale_price": float(result["interaction_features"].get("mean_sale_price", 0.0)), - "true_human_purchases_total": float(np.sum(result["true_human_demand"])), - "true_agent_purchases_total": float(np.sum(result["true_agent_purchases"])), + "look_to_book": float(result.get("look_to_book", 0.0)), + "mean_sale_price": float(result.get("mean_sale_price", 0.0)), + "true_human_purchases_total": 0.0, # TODO: track from simulation + "true_agent_purchases_total": 0.0, # TODO: track from simulation } return self.state, float(reward), terminated, False, info @@ -250,46 +255,43 @@ if __name__ == "__main__": import matplotlib.pyplot as plt from collections import defaultdict - runs = {} - for use_defense in (False, True): - env = PHANTOMEnv(use_defense=use_defense) - obs, _ = env.reset(seed=42) - metrics = defaultdict(list) - total_reward = 0.0 - done = False + env = PHANTOMEnv(constraints=BusinessLogicConstraints()) + obs, _ = env.reset(seed=42) + metrics = defaultdict(list) + total_reward = 0.0 + done = False - while not done: - action = env.action_space.sample() - obs, reward, done, _, info = env.step(action) - total_reward += reward - p_mean = float(np.mean(obs["elasticity"]["price"])) - q_mean = float(np.mean(obs["elasticity"]["demand"])) - p_std = float(np.std(obs["elasticity"]["price"])) + while not done: + action = env.action_space.sample() + obs, reward, done, _, info = env.step(action) + total_reward += reward + p_mean = float(np.mean(obs["elasticity"]["price"])) + q_mean = float(np.mean(obs["elasticity"]["demand"])) + p_std = float(np.std(obs["elasticity"]["price"])) - metrics['t'].append(info['t']) - metrics['price_mean'].append(p_mean) - metrics['price_std'].append(p_std) - metrics['demand_mean'].append(q_mean) - metrics['revenue_observed'].append(info['revenue_observed']) - metrics['revenue_oracle'].append(info['revenue_oracle']) - metrics['agent_loss'].append(info['agent_loss']) - metrics['ux_volatility'].append(info['ux_volatility']) - metrics['look_to_book'].append(info['look_to_book']) - metrics['reward'].append(reward) - metrics['human_purchases'].append(info['true_human_purchases_total']) - metrics['agent_purchases'].append(info['true_agent_purchases_total']) + metrics['t'].append(info['t']) + metrics['price_mean'].append(p_mean) + metrics['price_std'].append(p_std) + metrics['demand_mean'].append(q_mean) + metrics['revenue_observed'].append(info['revenue_observed']) + metrics['revenue_oracle'].append(info['revenue_oracle']) + metrics['agent_loss'].append(info['agent_loss']) + metrics['ux_volatility'].append(info['ux_volatility']) + metrics['look_to_book'].append(info['look_to_book']) + metrics['reward'].append(reward) + metrics['human_purchases'].append(info['true_human_purchases_total']) + metrics['agent_purchases'].append(info['true_agent_purchases_total']) - if info['t'] % 20 == 0 or done: - print(f"defense={'ON ' if use_defense else 'OFF'} t={info['t']:03d} p={p_mean:6.2f}±{p_std:4.2f} " - f"q={q_mean:6.2f} rev={info['revenue_observed']:7.2f} oracle={info['revenue_oracle']:7.2f} " - f"loss={info['agent_loss']:6.2f} ux={info['ux_volatility']:.3f} " - f"ltb={info['look_to_book']:5.2f} r={reward:7.2f}") + if info['t'] % 20 == 0 or done: + print(f"t={info['t']:03d} p={p_mean:6.2f}±{p_std:4.2f} q={q_mean:6.2f} " + f"rev={info['revenue_observed']:7.2f} oracle={info['revenue_oracle']:7.2f} " + f"loss={info['agent_loss']:6.2f} ux={info['ux_volatility']:.3f} " + f"ltb={info['look_to_book']:5.2f} r={reward:7.2f}") - runs[use_defense] = metrics - print(f"defense={'ON ' if use_defense else 'OFF'} total_reward={total_reward:.2f}\n") + print(f"total_reward={total_reward:.2f}") fig, axes = plt.subplots(3, 3, figsize=(15, 12)) - fig.suptitle('PHANTOM Environment: Defense OFF vs ON', fontsize=14, fontweight='bold') + fig.suptitle('PHANTOM Environment Run', fontsize=14, fontweight='bold') plot_configs = [ ('price_mean', 'Mean Price', 'Price'), @@ -305,13 +307,10 @@ if __name__ == "__main__": for idx, (key, title, ylabel) in enumerate(plot_configs): ax = axes[idx // 3, idx % 3] - for use_defense, label, color in [(False, 'No Defense', 'red'), (True, 'With Defense', 'blue')]: - m = runs[use_defense] - ax.plot(m['t'], m[key], label=label, color=color, alpha=0.7, linewidth=1.5) + ax.plot(metrics['t'], metrics[key], color='blue', alpha=0.7, linewidth=1.5) ax.set_xlabel('Step') ax.set_ylabel(ylabel) ax.set_title(title, fontsize=10, fontweight='bold') - ax.legend(loc='best', fontsize=8) ax.grid(True, alpha=0.3) plt.tight_layout() From 72877439ca8133613f19173eb6b47099d68141dc Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Wed, 21 Jan 2026 19:12:56 +0100 Subject: [PATCH 35/35] feat: contaminator and training --- experiments/procesing/contaminator.py | 87 ++++++++++++++++---------- sim/rl/train.py | 89 ++++++++++++++------------- 2 files changed, 100 insertions(+), 76 deletions(-) diff --git a/experiments/procesing/contaminator.py b/experiments/procesing/contaminator.py index da44c3d..2f23b2b 100644 --- a/experiments/procesing/contaminator.py +++ b/experiments/procesing/contaminator.py @@ -1,45 +1,66 @@ import pandas as pd import random -from sim.rl.behavior_loader import AgentBehaviorModel # TODO: proper import this +import os +from pathlib import Path -base_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments" -agent_dir = f"{base_dir}/agents/collected_data/" +# use relative import when in package context, fallback for standalone +try: + from sim.rl.behavior_loader.models import AgentBehaviorModel +except ImportError: + import sys + sys.path.insert(0, str(Path(__file__).parent.parent.parent / "sim" / "rl" / "behavior_loader")) + from models import AgentBehaviorModel + +# paths should be configurable via environment or relative to project root +PROJECT_ROOT = Path(__file__).parent.parent.parent +AGENT_DATA_DIR = Path(os.getenv('PHANTOM_AGENT_DATA_DIR', PROJECT_ROOT / "experiments" / "agents" / "collected_data")) - -def remap_schema(df : pd.DataFrame, mapping: dict, on: str = "event_type"): +def remap_schema(df: pd.DataFrame, mapping: dict, on: str = "event_type") -> pd.DataFrame: + """remap column values according to mapping dict, preserving unmapped values""" df = df.copy() df[on] = df[on].map(mapping).fillna(df[on]) return df -def contaminate_dataset(df : pd.DataFrame, on : str = "event_type", - contamination_rate: float = 0.1) -> pd.DataFrame: - model = AgentBehaviorModel(agent_dir) - target_df_schema = df[on].unique().tolist() - mapping = { - 'view': 'view_page' - # TODO: define properly for the given dataset - } - # think about replacing with freqdist method from library - OG_event_distribution = df[on].value_counts(normalize=True).to_dict() - # normalize to weights - OG_event_distribution = {k: v / sum(OG_event_distribution.values()) for k, v in OG_event_distribution.items()} - mapped_df = remap_schema(df, mapping, on=on) - N = len(df) - N_final = N / (1 - contamination_rate) # TODO: explain this in paper - N_contaminate = int(N_final - N) - start_event_types = random.choices(list(OG_event_distribution.keys()), - weights=list(OG_event_distribution.values()), k=N_contaminate) - # it makes sense - new_trajectories = [] - for start_event in start_event_types: - # sample from og start - start = None # TODO: defin start accoding to dataset (randomly sample with weights of event distr) - trajectory = model.sample_trajectory(start) # TODO: explain this method in paper - new_trajectories.extend(trajectory) +def contaminate_dataset(df: pd.DataFrame, on: str = "event_type", + contamination_rate: float = 0.1, + agent_data_dir: Path = None) -> pd.DataFrame: + """inject synthetic agent trajectories into a dataset + contamination_rate: fraction of final dataset that should be agent data (0.1 = 10% agents) + """ + data_dir = agent_data_dir or AGENT_DATA_DIR + model = AgentBehaviorModel(str(data_dir)) + model.build_MDP() # ensure MDP is built before sampling - # TODO: make sure the new trajctories schema conforms with dataset - contaminate_df = pd.DataFrame(new_trajectories) - df = pd.concat([df, contaminate_df], ignore_index=True) + # compute event distribution from original data + event_dist = df[on].value_counts(normalize=True).to_dict() + total = sum(event_dist.values()) + event_dist = {k: v / total for k, v in event_dist.items()} + + # calculate how many synthetic events to add + N = len(df) + N_final = N / (1 - contamination_rate) + N_contaminate = int(N_final - N) + + # sample start states weighted by original distribution + start_events = random.choices(list(event_dist.keys()), weights=list(event_dist.values()), k=N_contaminate) + + # generate synthetic trajectories + new_rows = [] + for start_event in start_events: + # sample trajectory from agent model, using a state that contains the event type + mdp_states = model.mdp.get('states', []) if model.mdp else [] + matching_starts = [s for s in mdp_states if start_event in s] + if not matching_starts: + continue # skip if no matching start state + start_state = random.choice(matching_starts) + trajectory = model.sample_traj(start_state, max_len=20) + for state in trajectory: + parts = state.split('|') # page|productId|eventName format + new_rows.append({on: parts[-1] if parts else start_event, 'source': 'synthetic_agent'}) + + if new_rows: + contaminate_df = pd.DataFrame(new_rows) + df = pd.concat([df, contaminate_df], ignore_index=True) return df diff --git a/sim/rl/train.py b/sim/rl/train.py index ba257de..01e6809 100644 --- a/sim/rl/train.py +++ b/sim/rl/train.py @@ -3,15 +3,17 @@ import logging from pathlib import Path from typing import Dict, Type, Optional import pickle -from torch import neg_ from torch.utils.tensorboard import SummaryWriter -from environment import PHANTOMEnv, FastTrainingConstraints, BusinessLogicConstraints -from engine import (BasePricingEngine, WildPricingEngine, StaticPricingEngine, - SimpleDemandEngine, RandomWalkEngine, ThompsonSamplingEngine) +from environment import PHANTOMEnv, BusinessLogicConstraints logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s') logger = logging.getLogger(__name__) +try: + from engine import (BasePricingEngine, WildPricingEngine, StaticPricingEngine, + SimpleDemandEngine, RandomWalkEngine, ThompsonSamplingEngine) +except ImportError: + BasePricingEngine = None # engines not required for basic usage """ @@ -26,8 +28,7 @@ CURRENT SOLUTION BELOW does not implement correct learning or updates. class EngineTrainer: """wrapper to run pricing engines through episodes and collect metrics""" - def __init__(self, engine: BasePricingEngine, env: PHANTOMEnv, - tb_writer: Optional[SummaryWriter] = None): + def __init__(self, engine, env: PHANTOMEnv, tb_writer: Optional[SummaryWriter] = None): self.engine = engine self.env = env self.episode_metrics = [] @@ -35,7 +36,6 @@ class EngineTrainer: self.global_step = 0 def train(self, n_episodes: int, seed: int = 42): - obs, _ = self.env.reset(seed=seed) prices = None for ep in range(n_episodes): @@ -44,12 +44,21 @@ class EngineTrainer: self.engine.update(obs, reward, done, info) return self - - - - - - return self.episode_metrics + def run_episode(self, seed: int = 42) -> Dict: + """run single evaluation episode and return metrics""" + obs, _ = self.env.reset(seed=seed) + self.engine.reset() + total_reward, prices = 0.0, None + ep_metrics = {'total_reward': 0.0} + done = False + while not done: + prices = self.engine.compute_prices(prices, obs) if prices is not None else obs["elasticity"]["price"] + obs, reward, done, _, info = self.env.step(prices) + total_reward += reward + for k, v in info.items(): + ep_metrics[k] = v + ep_metrics['total_reward'] = total_reward + return ep_metrics def evaluate(self, n_episodes: int = 10, seed: int = 100) -> Dict: """evaluate trained engine""" @@ -57,17 +66,16 @@ class EngineTrainer: 'agent_loss', 'ux_volatility', 'look_to_book']} for ep in range(n_episodes): metrics = self.run_episode(seed=seed + ep) - for k in results: results[k].append(metrics[k]) + for k in results: + results[k].append(metrics.get(k, 0.0)) return {k: (np.mean(v), np.std(v)) for k, v in results.items()} -def make_env(fast: bool = True): - constraints = FastTrainingConstraints() if fast else BusinessLogicConstraints() - return PHANTOMEnv(constraints=constraints) +def make_env(): + return PHANTOMEnv(constraints=BusinessLogicConstraints()) -def train_engine(engine_cls: Type[BasePricingEngine], env: PHANTOMEnv, - n_episodes: int, seed: int = 42, +def train_engine(engine_cls, env: PHANTOMEnv, n_episodes: int, seed: int = 42, tb_writer: Optional[SummaryWriter] = None) -> EngineTrainer: constraints = env.constraints engine = engine_cls(constraints=constraints, seed=seed) @@ -80,15 +88,11 @@ def save_trainer(trainer: EngineTrainer, path: Path): """save engine state and metrics""" path.parent.mkdir(parents=True, exist_ok=True) with open(path, 'wb') as f: - pickle.dump({ - 'engine': trainer.engine, - 'metrics': trainer.episode_metrics - }, f) + pickle.dump({'engine': trainer.engine, 'metrics': trainer.episode_metrics}, f) logger.info(f"Saved trainer to {path}") -def load_trainer(path: Path, env: PHANTOMEnv, - tb_writer: Optional[SummaryWriter] = None) -> EngineTrainer: +def load_trainer(path: Path, env: PHANTOMEnv, tb_writer: Optional[SummaryWriter] = None) -> EngineTrainer: """load saved engine""" with open(path, 'rb') as f: data = pickle.load(f) @@ -98,45 +102,44 @@ def load_trainer(path: Path, env: PHANTOMEnv, if __name__ == "__main__": + if BasePricingEngine is None: + logger.error("Engines not available, cannot run training") + exit(1) + base_dir = Path("./runs") base_dir.mkdir(exist_ok=True) engines = { "Wild": WildPricingEngine, "Static": StaticPricingEngine, -# "SimpleDemand": SimpleDemandEngine, "RandomWalk": RandomWalkEngine, "ThompsonSampling": ThompsonSamplingEngine, } - defenses = [False, True] n_train_episodes = 50 n_eval_episodes = 10 seed = 42 - fast_mode = True - logger.info(f"Training config: {n_train_episodes} episodes per engine, fast_mode={fast_mode}") + logger.info(f"Training config: {n_train_episodes} episodes per engine") trained_trainers = {} for engine_name, engine_cls in engines.items(): - for use_defense in defenses: - defense_label = "defense_on" if use_defense else "defense_off" - run_name = f"{engine_name}_{defense_label}" - log_dir = base_dir / run_name - log_dir.mkdir(parents=True, exist_ok=True) + run_name = engine_name + log_dir = base_dir / run_name + log_dir.mkdir(parents=True, exist_ok=True) - logger.info(f"Training {engine_name} with defense={use_defense}") - logger.info(f"Log directory: {log_dir}") + logger.info(f"Training {engine_name}") + logger.info(f"Log directory: {log_dir}") - env = make_env(fast=fast_mode) - tb_writer = SummaryWriter(log_dir=str(log_dir)) - trainer = train_engine(engine_cls, env, n_train_episodes, seed, tb_writer=tb_writer) - tb_writer.close() + env = make_env() + tb_writer = SummaryWriter(log_dir=str(log_dir)) + trainer = train_engine(engine_cls, env, n_train_episodes, seed, tb_writer=tb_writer) + tb_writer.close() - save_path = log_dir / "trainer.pkl" - save_trainer(trainer, save_path) + save_path = log_dir / "trainer.pkl" + save_trainer(trainer, save_path) - trained_trainers[run_name] = (trainer, env) + trained_trainers[run_name] = (trainer, env) logger.info("Starting evaluation")