diff --git a/experiments/agents/agent.py b/experiments/agents/agent.py index eb54bcb..4619c27 100644 --- a/experiments/agents/agent.py +++ b/experiments/agents/agent.py @@ -38,7 +38,7 @@ def get_agent(agent_type: AgentTypes, **kwargs) -> Agent: if __name__ == "__main__": import asyncio - JTBD= "Name of the company of this website" - agent = get_agent(AgentTypes.GENERIC_BROWSER_USE_AGENT, goal=JTBD, url="https://ie.edu", timeout=300) + JTBD= "Name all the products on this site and try to find out more about each product by clicking into them (they might not open)" + agent = get_agent(AgentTypes.GENERIC_BROWSER_USE_AGENT, goal=JTBD, url="http://localhost:3000/products", timeout=300) R=asyncio.run(agent.act()) print(R) diff --git a/experiments/data_export.ipynb b/experiments/data_export.ipynb index 4ba73fb..4f4c2af 100644 --- a/experiments/data_export.ipynb +++ b/experiments/data_export.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 9, + "execution_count": 51, "id": "62eafcd9-5462-4063-8873-0e7fb9add907", "metadata": {}, "outputs": [ @@ -12,7 +12,7 @@ "True" ] }, - "execution_count": 9, + "execution_count": 51, "metadata": {}, "output_type": "execute_result" } @@ -31,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 52, "id": "4af65cb4-e8cf-4877-b2db-13ac19f3838f", "metadata": {}, "outputs": [ @@ -40,31 +40,25 @@ "output_type": "stream", "text": [ "\n", - "RangeIndex: 528 entries, 0 to 527\n", - "Data columns (total 19 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 sessionId 528 non-null object \n", - " 1 eventType 467 non-null object \n", - " 2 ts 528 non-null object \n", - " 3 targetEl 401 non-null object \n", - " 4 eventName 61 non-null object \n", - " 5 page 61 non-null object \n", - " 6 storeMode 61 non-null object \n", - " 7 userAgent 61 non-null object \n", - " 8 productId 21 non-null object \n", - " 9 metadata_path 467 non-null object \n", - " 10 metadata_referrer 82 non-null object \n", - " 11 metadata_x 425 non-null float64\n", - " 12 metadata_y 425 non-null float64\n", - " 13 metadata_event 7 non-null object \n", - " 14 metadata_targetEl 24 non-null object \n", - " 15 metadata_roomType 5 non-null object \n", - " 16 metadata_price 5 non-null float64\n", - " 17 metadata_nights 5 non-null float64\n", - " 18 metadata_targetUrl 4 non-null object \n", - "dtypes: float64(4), object(15)\n", - "memory usage: 78.5+ KB\n" + "RangeIndex: 73 entries, 0 to 72\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 sessionId 73 non-null object \n", + " 1 eventName 73 non-null object \n", + " 2 page 73 non-null object \n", + " 3 productId 67 non-null object \n", + " 4 storeMode 73 non-null object \n", + " 5 userAgent 73 non-null object \n", + " 6 ts 73 non-null object \n", + " 7 metadata_referrer 6 non-null object \n", + " 8 metadata_roomType 45 non-null object \n", + " 9 metadata_price 45 non-null float64\n", + " 10 metadata_nights 45 non-null float64\n", + " 11 metadata_elementText 22 non-null object \n", + " 12 metadata_dwellTime 22 non-null float64\n", + "dtypes: float64(3), object(10)\n", + "memory usage: 7.5+ KB\n" ] } ], @@ -75,7 +69,7 @@ " topic, \n", " enable_auto_commit=True,\n", " value_deserializer=lambda x: json.loads(x.decode('utf-8')),\n", - " auto_offset_reset='earliest',\n", + " auto_offset_reset='earliest', \n", " bootstrap_servers=['localhost:9092'])\n", "messages=consumer.poll(timeout_ms=1000,max_records=10000)\n", "df = []\n", @@ -90,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 53, "id": "f6819a1c-32ab-49c7-845b-5df7bf60f561", "metadata": {}, "outputs": [ @@ -116,65 +110,47 @@ " \n", " \n", " sessionId\n", - " eventType\n", - " ts\n", - " targetEl\n", " eventName\n", " page\n", + " productId\n", " storeMode\n", " userAgent\n", - " productId\n", - " metadata_path\n", + " ts\n", " metadata_referrer\n", - " metadata_x\n", - " metadata_y\n", - " metadata_event\n", - " metadata_targetEl\n", " metadata_roomType\n", " metadata_price\n", " metadata_nights\n", - " metadata_targetUrl\n", + " metadata_elementText\n", + " metadata_dwellTime\n", " \n", " \n", " \n", " \n", " 0\n", - " 1762434923440-66hdhq8qicd\n", - " pageview\n", - " 1762434924107\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " /\n", + " d176d7c9-4027-4702-9e31-2a71395cdda0\n", + " page_view\n", + " /products\n", + " None\n", + " hotel\n", + " Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...\n", + " 2025-11-14T13:23:46.270Z\n", " \n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", " \n", " \n", " 1\n", - " 1762434923440-66hdhq8qicd\n", - " click\n", - " 1762434925198\n", - " DIV\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", + " f0317a5d-e424-44e9-b784-c8f7291ffe31\n", + " page_view\n", " /\n", - " NaN\n", - " 1098.0\n", - " 663.0\n", - " NaN\n", + " None\n", + " hotel\n", + " Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck...\n", + " 2025-11-14T13:26:00.291Z\n", + " \n", " NaN\n", " NaN\n", " NaN\n", @@ -183,20 +159,14 @@ " \n", " \n", " 2\n", - " 1762434923440-66hdhq8qicd\n", - " click\n", - " 1762434925371\n", - " MAIN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " /\n", - " NaN\n", - " 1098.0\n", - " 663.0\n", - " NaN\n", + " f0317a5d-e424-44e9-b784-c8f7291ffe31\n", + " page_view\n", + " /products\n", + " None\n", + " hotel\n", + " Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck...\n", + " 2025-11-14T13:26:07.769Z\n", + " \n", " NaN\n", " NaN\n", " NaN\n", @@ -205,549 +175,251 @@ " \n", " \n", " 3\n", - " 1762434923440-66hdhq8qicd\n", - " pageview\n", - " 1762437192910\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " /\n", - " \n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", + " f0317a5d-e424-44e9-b784-c8f7291ffe31\n", + " view_item_page\n", + " /products\n", + " htl-0\n", + " hotel\n", + " Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck...\n", + " 2025-11-14T13:26:15.010Z\n", " NaN\n", + " Premium Room\n", + " 269.0\n", + " 1.0\n", " NaN\n", " NaN\n", " \n", " \n", " 4\n", - " 1762434923440-66hdhq8qicd\n", - " pageview\n", - " 1762437198539\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " /\n", + " 238dc588-a7ab-4c0e-bccd-6abca5076c66\n", + " page_view\n", + " /products\n", + " None\n", + " hotel\n", + " Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...\n", + " 2025-11-14T13:27:15.457Z\n", " \n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", + " \n", + " \n", + " 5\n", + " 238dc588-a7ab-4c0e-bccd-6abca5076c66\n", + " view_item_page\n", + " /products\n", + " htl-0\n", + " hotel\n", + " Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...\n", + " 2025-11-14T13:27:15.591Z\n", " NaN\n", + " Premium Room\n", + " 264.0\n", + " 2.0\n", " NaN\n", " NaN\n", " \n", " \n", - " 390\n", - " d423ce8a-77aa-4c9a-94d4-d1adddcc3472\n", - " click\n", - " 1762443115648\n", - " DIV\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " /\n", - " NaN\n", - " 245.0\n", - " 595.0\n", - " NaN\n", - " NaN\n", - " NaN\n", + " 6\n", + " 238dc588-a7ab-4c0e-bccd-6abca5076c66\n", + " view_item_page\n", + " /products\n", + " htl-0\n", + " hotel\n", + " Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...\n", + " 2025-11-14T13:27:21.483Z\n", " NaN\n", + " Premium Room\n", + " 264.0\n", + " 2.0\n", " NaN\n", " NaN\n", " \n", " \n", - " 391\n", - " d423ce8a-77aa-4c9a-94d4-d1adddcc3472\n", - " click\n", - " 1762443174606\n", - " DIV\n", + " 7\n", + " 238dc588-a7ab-4c0e-bccd-6abca5076c66\n", + " hover_over_title\n", + " /products\n", + " htl-0\n", + " hotel\n", + " Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...\n", + " 2025-11-14T13:27:22.646Z\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", + " Grand Plaza Hotel\n", + " 1200.0\n", + " \n", + " \n", + " 8\n", + " 238dc588-a7ab-4c0e-bccd-6abca5076c66\n", + " view_item_page\n", + " /products\n", + " htl-0\n", + " hotel\n", + " Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...\n", + " 2025-11-14T13:27:25.889Z\n", " NaN\n", - " /\n", - " NaN\n", - " 475.0\n", - " 428.0\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", + " Premium Room\n", + " 264.0\n", + " 2.0\n", " NaN\n", " NaN\n", " \n", " \n", - " 392\n", - " d423ce8a-77aa-4c9a-94d4-d1adddcc3472\n", - " click\n", - " 1762443183406\n", - " INPUT\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " /\n", - " NaN\n", - " 832.0\n", - " 219.0\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 393\n", - " d423ce8a-77aa-4c9a-94d4-d1adddcc3472\n", - " click\n", - " 1762443208588\n", - " DIV\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " /\n", - " NaN\n", - " 485.0\n", - " 155.0\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 394\n", - " d423ce8a-77aa-4c9a-94d4-d1adddcc3472\n", - " click\n", - " 1762443225474\n", - " DIV\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " /\n", - " NaN\n", - " 281.0\n", - " 281.0\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 407\n", - " 1762444018243-0120z6z5u42f\n", - " pageview\n", - " 1762444018256\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " /\n", + " 35\n", + " 013fc334-4045-4d5a-8739-dd0a8766a63b\n", + " page_view\n", + " /products\n", + " None\n", + " hotel\n", + " Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...\n", + " 2025-11-14T13:53:59.993Z\n", " \n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", + " \n", + " \n", + " 36\n", + " 013fc334-4045-4d5a-8739-dd0a8766a63b\n", + " view_item_page\n", + " /products\n", + " htl-0\n", + " hotel\n", + " Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...\n", + " 2025-11-14T13:54:10.705Z\n", " NaN\n", + " Premium Room\n", + " 223.0\n", + " 3.0\n", " NaN\n", " NaN\n", " \n", " \n", - " 408\n", - " 1762444018243-0120z6z5u42f\n", - " click\n", - " 1762445774344\n", - " DIV\n", + " 37\n", + " 013fc334-4045-4d5a-8739-dd0a8766a63b\n", + " hover_over_title\n", + " /products\n", + " htl-0\n", + " hotel\n", + " Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...\n", + " 2025-11-14T13:54:11.771Z\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", + " Grand Plaza Hotel\n", + " 1200.0\n", + " \n", + " \n", + " 38\n", + " 013fc334-4045-4d5a-8739-dd0a8766a63b\n", + " view_item_page\n", + " /products\n", + " htl-1\n", + " hotel\n", + " Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...\n", + " 2025-11-14T13:54:29.772Z\n", " NaN\n", - " /\n", - " NaN\n", - " 299.0\n", - " 214.0\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", + " Standard Room\n", + " 267.0\n", + " 5.0\n", " NaN\n", " NaN\n", " \n", " \n", - " 431\n", - " 214d9fad-9b00-40c3-bd0e-7739b6acd654\n", - " pageview\n", - " 1762448190973\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " /\n", - " \n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 432\n", - " 214d9fad-9b00-40c3-bd0e-7739b6acd654\n", - " click\n", - " 1762448192425\n", - " DIV\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " /\n", - " NaN\n", - " 1623.0\n", - " 493.0\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 433\n", - " 214d9fad-9b00-40c3-bd0e-7739b6acd654\n", - " click\n", - " 1762448192645\n", - " DIV\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " /\n", - " NaN\n", - " 1623.0\n", - " 493.0\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 434\n", - " 214d9fad-9b00-40c3-bd0e-7739b6acd654\n", - " pageview\n", - " 1762448205850\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " /\n", - " \n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 435\n", - " 214d9fad-9b00-40c3-bd0e-7739b6acd654\n", - " click\n", - " 1762448207922\n", - " DIV\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " /\n", - " NaN\n", - " 421.0\n", - " 216.0\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 438\n", - " f0d40ca6-c1d3-4ecd-beb3-796adc74349d\n", - " pageview\n", - " 1762448283244\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " /\n", - " \n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 439\n", - " f0d40ca6-c1d3-4ecd-beb3-796adc74349d\n", - " click\n", - " 1762448295524\n", - " HTML\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " /\n", - " NaN\n", - " 614.0\n", - " 720.0\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 440\n", - " f0d40ca6-c1d3-4ecd-beb3-796adc74349d\n", - " click\n", - " 1762448342763\n", - " DIV\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " /\n", - " NaN\n", - " 416.0\n", - " 397.0\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 441\n", - " f0d40ca6-c1d3-4ecd-beb3-796adc74349d\n", - " pageview\n", - " 1762448343396\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " /\n", - " \n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 442\n", - " f0d40ca6-c1d3-4ecd-beb3-796adc74349d\n", - " click\n", - " 1762448829631\n", - " DIV\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " /\n", - " NaN\n", - " 45.0\n", - " 44.0\n", - " NaN\n", - " NaN\n", + " 39\n", + " 013fc334-4045-4d5a-8739-dd0a8766a63b\n", + " hover_over_title\n", + " /products\n", + " htl-1\n", + " hotel\n", + " Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...\n", + " 2025-11-14T13:54:30.833Z\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", + " Seaside Resort\n", + " 1200.0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " sessionId eventType ts targetEl \\\n", - "0 1762434923440-66hdhq8qicd pageview 1762434924107 NaN \n", - "1 1762434923440-66hdhq8qicd click 1762434925198 DIV \n", - "2 1762434923440-66hdhq8qicd click 1762434925371 MAIN \n", - "3 1762434923440-66hdhq8qicd pageview 1762437192910 NaN \n", - "4 1762434923440-66hdhq8qicd pageview 1762437198539 NaN \n", - "390 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 click 1762443115648 DIV \n", - "391 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 click 1762443174606 DIV \n", - "392 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 click 1762443183406 INPUT \n", - "393 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 click 1762443208588 DIV \n", - "394 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 click 1762443225474 DIV \n", - "407 1762444018243-0120z6z5u42f pageview 1762444018256 NaN \n", - "408 1762444018243-0120z6z5u42f click 1762445774344 DIV \n", - "431 214d9fad-9b00-40c3-bd0e-7739b6acd654 pageview 1762448190973 NaN \n", - "432 214d9fad-9b00-40c3-bd0e-7739b6acd654 click 1762448192425 DIV \n", - "433 214d9fad-9b00-40c3-bd0e-7739b6acd654 click 1762448192645 DIV \n", - "434 214d9fad-9b00-40c3-bd0e-7739b6acd654 pageview 1762448205850 NaN \n", - "435 214d9fad-9b00-40c3-bd0e-7739b6acd654 click 1762448207922 DIV \n", - "438 f0d40ca6-c1d3-4ecd-beb3-796adc74349d pageview 1762448283244 NaN \n", - "439 f0d40ca6-c1d3-4ecd-beb3-796adc74349d click 1762448295524 HTML \n", - "440 f0d40ca6-c1d3-4ecd-beb3-796adc74349d click 1762448342763 DIV \n", - "441 f0d40ca6-c1d3-4ecd-beb3-796adc74349d pageview 1762448343396 NaN \n", - "442 f0d40ca6-c1d3-4ecd-beb3-796adc74349d click 1762448829631 DIV \n", + " sessionId eventName page \\\n", + "0 d176d7c9-4027-4702-9e31-2a71395cdda0 page_view /products \n", + "1 f0317a5d-e424-44e9-b784-c8f7291ffe31 page_view / \n", + "2 f0317a5d-e424-44e9-b784-c8f7291ffe31 page_view /products \n", + "3 f0317a5d-e424-44e9-b784-c8f7291ffe31 view_item_page /products \n", + "4 238dc588-a7ab-4c0e-bccd-6abca5076c66 page_view /products \n", + "5 238dc588-a7ab-4c0e-bccd-6abca5076c66 view_item_page /products \n", + "6 238dc588-a7ab-4c0e-bccd-6abca5076c66 view_item_page /products \n", + "7 238dc588-a7ab-4c0e-bccd-6abca5076c66 hover_over_title /products \n", + "8 238dc588-a7ab-4c0e-bccd-6abca5076c66 view_item_page /products \n", + "35 013fc334-4045-4d5a-8739-dd0a8766a63b page_view /products \n", + "36 013fc334-4045-4d5a-8739-dd0a8766a63b view_item_page /products \n", + "37 013fc334-4045-4d5a-8739-dd0a8766a63b hover_over_title /products \n", + "38 013fc334-4045-4d5a-8739-dd0a8766a63b view_item_page /products \n", + "39 013fc334-4045-4d5a-8739-dd0a8766a63b hover_over_title /products \n", "\n", - " eventName page storeMode userAgent productId metadata_path \\\n", - "0 NaN NaN NaN NaN NaN / \n", - "1 NaN NaN NaN NaN NaN / \n", - "2 NaN NaN NaN NaN NaN / \n", - "3 NaN NaN NaN NaN NaN / \n", - "4 NaN NaN NaN NaN NaN / \n", - "390 NaN NaN NaN NaN NaN / \n", - "391 NaN NaN NaN NaN NaN / \n", - "392 NaN NaN NaN NaN NaN / \n", - "393 NaN NaN NaN NaN NaN / \n", - "394 NaN NaN NaN NaN NaN / \n", - "407 NaN NaN NaN NaN NaN / \n", - "408 NaN NaN NaN NaN NaN / \n", - "431 NaN NaN NaN NaN NaN / \n", - "432 NaN NaN NaN NaN NaN / \n", - "433 NaN NaN NaN NaN NaN / \n", - "434 NaN NaN NaN NaN NaN / \n", - "435 NaN NaN NaN NaN NaN / \n", - "438 NaN NaN NaN NaN NaN / \n", - "439 NaN NaN NaN NaN NaN / \n", - "440 NaN NaN NaN NaN NaN / \n", - "441 NaN NaN NaN NaN NaN / \n", - "442 NaN NaN NaN NaN NaN / \n", + " productId storeMode userAgent \\\n", + "0 None hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", + "1 None hotel Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck... \n", + "2 None hotel Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck... \n", + "3 htl-0 hotel Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck... \n", + "4 None hotel Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7... \n", + "5 htl-0 hotel Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7... \n", + "6 htl-0 hotel Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7... \n", + "7 htl-0 hotel Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7... \n", + "8 htl-0 hotel Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7... \n", + "35 None hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", + "36 htl-0 hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", + "37 htl-0 hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", + "38 htl-1 hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", + "39 htl-1 hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", "\n", - " metadata_referrer metadata_x metadata_y metadata_event \\\n", - "0 NaN NaN NaN \n", - "1 NaN 1098.0 663.0 NaN \n", - "2 NaN 1098.0 663.0 NaN \n", - "3 NaN NaN NaN \n", - "4 NaN NaN NaN \n", - "390 NaN 245.0 595.0 NaN \n", - "391 NaN 475.0 428.0 NaN \n", - "392 NaN 832.0 219.0 NaN \n", - "393 NaN 485.0 155.0 NaN \n", - "394 NaN 281.0 281.0 NaN \n", - "407 NaN NaN NaN \n", - "408 NaN 299.0 214.0 NaN \n", - "431 NaN NaN NaN \n", - "432 NaN 1623.0 493.0 NaN \n", - "433 NaN 1623.0 493.0 NaN \n", - "434 NaN NaN NaN \n", - "435 NaN 421.0 216.0 NaN \n", - "438 NaN NaN NaN \n", - "439 NaN 614.0 720.0 NaN \n", - "440 NaN 416.0 397.0 NaN \n", - "441 NaN NaN NaN \n", - "442 NaN 45.0 44.0 NaN \n", + " ts metadata_referrer metadata_roomType \\\n", + "0 2025-11-14T13:23:46.270Z NaN \n", + "1 2025-11-14T13:26:00.291Z NaN \n", + "2 2025-11-14T13:26:07.769Z NaN \n", + "3 2025-11-14T13:26:15.010Z NaN Premium Room \n", + "4 2025-11-14T13:27:15.457Z NaN \n", + "5 2025-11-14T13:27:15.591Z NaN Premium Room \n", + "6 2025-11-14T13:27:21.483Z NaN Premium Room \n", + "7 2025-11-14T13:27:22.646Z NaN NaN \n", + "8 2025-11-14T13:27:25.889Z NaN Premium Room \n", + "35 2025-11-14T13:53:59.993Z NaN \n", + "36 2025-11-14T13:54:10.705Z NaN Premium Room \n", + "37 2025-11-14T13:54:11.771Z NaN NaN \n", + "38 2025-11-14T13:54:29.772Z NaN Standard Room \n", + "39 2025-11-14T13:54:30.833Z NaN NaN \n", "\n", - " metadata_targetEl metadata_roomType metadata_price metadata_nights \\\n", - "0 NaN NaN NaN NaN \n", - "1 NaN NaN NaN NaN \n", - "2 NaN NaN NaN NaN \n", - "3 NaN NaN NaN NaN \n", - "4 NaN NaN NaN NaN \n", - "390 NaN NaN NaN NaN \n", - "391 NaN NaN NaN NaN \n", - "392 NaN NaN NaN NaN \n", - "393 NaN NaN NaN NaN \n", - "394 NaN NaN NaN NaN \n", - "407 NaN NaN NaN NaN \n", - "408 NaN NaN NaN NaN \n", - "431 NaN NaN NaN NaN \n", - "432 NaN NaN NaN NaN \n", - "433 NaN NaN NaN NaN \n", - "434 NaN NaN NaN NaN \n", - "435 NaN NaN NaN NaN \n", - "438 NaN NaN NaN NaN \n", - "439 NaN NaN NaN NaN \n", - "440 NaN NaN NaN NaN \n", - "441 NaN NaN NaN NaN \n", - "442 NaN NaN NaN NaN \n", - "\n", - " metadata_targetUrl \n", - "0 NaN \n", - "1 NaN \n", - "2 NaN \n", - "3 NaN \n", - "4 NaN \n", - "390 NaN \n", - "391 NaN \n", - "392 NaN \n", - "393 NaN \n", - "394 NaN \n", - "407 NaN \n", - "408 NaN \n", - "431 NaN \n", - "432 NaN \n", - "433 NaN \n", - "434 NaN \n", - "435 NaN \n", - "438 NaN \n", - "439 NaN \n", - "440 NaN \n", - "441 NaN \n", - "442 NaN " + " metadata_price metadata_nights metadata_elementText metadata_dwellTime \n", + "0 NaN NaN NaN NaN \n", + "1 NaN NaN NaN NaN \n", + "2 NaN NaN NaN NaN \n", + "3 269.0 1.0 NaN NaN \n", + "4 NaN NaN NaN NaN \n", + "5 264.0 2.0 NaN NaN \n", + "6 264.0 2.0 NaN NaN \n", + "7 NaN NaN Grand Plaza Hotel 1200.0 \n", + "8 264.0 2.0 NaN NaN \n", + "35 NaN NaN NaN NaN \n", + "36 223.0 3.0 NaN NaN \n", + "37 NaN NaN Grand Plaza Hotel 1200.0 \n", + "38 267.0 5.0 NaN NaN \n", + "39 NaN NaN Seaside Resort 1200.0 " ] }, - "execution_count": 11, + "execution_count": 53, "metadata": {}, "output_type": "execute_result" } @@ -758,32 +430,31 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 54, "id": "380eca5f-8304-4fb2-be32-e8bcfd312085", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['214d9fad-9b00-40c3-bd0e-7739b6acd654',\n", - " '1762444018243-0120z6z5u42f',\n", - " 'f0d40ca6-c1d3-4ecd-beb3-796adc74349d',\n", - " 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',\n", - " '1762434923440-66hdhq8qicd']" + "['238dc588-a7ab-4c0e-bccd-6abca5076c66',\n", + " 'f0317a5d-e424-44e9-b784-c8f7291ffe31',\n", + " 'd176d7c9-4027-4702-9e31-2a71395cdda0',\n", + " '013fc334-4045-4d5a-8739-dd0a8766a63b']" ] }, - "execution_count": 12, + "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "sessions = list(set(df['sessionId'])); sessions" + "sessions = list(set(df['sessionId'])); sessions # 238dc588-a7ab-4c0e-bccd-6abca5076c66" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 55, "id": "f4ae6f81-dcb8-44be-aee7-30dbc3a6bae1", "metadata": {}, "outputs": [], @@ -793,7 +464,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 56, "id": "050d90a4-20a9-47f5-b998-c31178a54cb3", "metadata": {}, "outputs": [], @@ -814,7 +485,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 57, "id": "e68f9004-82f5-4826-aece-e3dc6e15a18f", "metadata": {}, "outputs": [], @@ -876,38 +547,15 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 58, "id": "e255a2c1-6454-4e5e-89f6-ef8ac51ab6cc", "metadata": {}, "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "name": "stdout", "output_type": "stream", "text": [ - "[]\n" + "238dc588-a7ab-4c0e-bccd-6abca5076c66\n" ] }, { @@ -919,169 +567,148 @@ "\n", "\n", - "\n", - "\n", - "\n", - "\n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[]\n" - ] - }, - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[]\n" - ] - }, - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", "\n", "\n", "page_view\n", - "\n", - "page_view\n", + "\n", + "page_view\n", + "\n", + "\n", + "\n", + "view_item_page\n", + "\n", + "view_item_page\n", + "\n", + "\n", + "\n", + "page_view->view_item_page\n", + "\n", + "\n", + "1.00\n", + "\n", + "\n", + "\n", + "view_item_page->view_item_page\n", + "\n", + "\n", + "0.19\n", + "\n", + "\n", + "\n", + "hover_over_title\n", + "\n", + "hover_over_title\n", + "\n", + "\n", + "\n", + "view_item_page->hover_over_title\n", + "\n", + "\n", + "0.38\n", + "\n", + "\n", + "\n", + "hover_over_paragraph\n", + "\n", + "hover_over_paragraph\n", + "\n", + "\n", + "\n", + "view_item_page->hover_over_paragraph\n", + "\n", + "\n", + "0.44\n", + "\n", + "\n", + "\n", + "hover_over_title->view_item_page\n", + "\n", + "\n", + "1.00\n", + "\n", + "\n", + "\n", + "hover_over_paragraph->page_view\n", + "\n", + "\n", + "0.14\n", + "\n", + "\n", + "\n", + "hover_over_paragraph->view_item_page\n", + "\n", + "\n", + "0.86\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0. 1. 0. 0. ]\n", + " [0. 0.1875 0.375 0.4375 ]\n", + " [0. 1. 0. 0. ]\n", + " [0.14285714 0.85714286 0. 0. ]]\n", + "f0317a5d-e424-44e9-b784-c8f7291ffe31\n" + ] + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "page_view\n", + "\n", + "page_view\n", "\n", "\n", "\n", "page_view->page_view\n", - "\n", - "\n", - "0.70\n", + "\n", + "\n", + "0.50\n", "\n", - "\n", + "\n", "\n", - "click\n", - "\n", - "click\n", + "view_item_page\n", + "\n", + "view_item_page\n", "\n", - "\n", + "\n", "\n", - "page_view->click\n", - "\n", - "\n", - "0.17\n", - "\n", - "\n", - "\n", - "product_hover\n", - "\n", - "product_hover\n", - "\n", - "\n", - "\n", - "page_view->product_hover\n", - "\n", - "\n", - "0.13\n", - "\n", - "\n", - "\n", - "click->page_view\n", - "\n", - "\n", - "0.35\n", - "\n", - "\n", - "\n", - "click->click\n", - "\n", - "\n", - "0.41\n", - "\n", - "\n", - "\n", - "click->product_hover\n", - "\n", - "\n", - "0.24\n", - "\n", - "\n", - "\n", - "product_hover->click\n", - "\n", - "\n", - "0.07\n", - "\n", - "\n", - "\n", - "product_hover->product_hover\n", - "\n", - "\n", - "0.60\n", - "\n", - "\n", - "\n", - "product_view\n", - "\n", - "product_view\n", - "\n", - "\n", - "\n", - "product_hover->product_view\n", - "\n", - "\n", - "0.33\n", - "\n", - "\n", - "\n", - "product_view->click\n", - "\n", - "\n", - "1.00\n", + "page_view->view_item_page\n", + "\n", + "\n", + "0.50\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1091,10 +718,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "[[0.69565217 0.17391304 0.13043478 0. ]\n", - " [0.35294118 0.41176471 0.23529412 0. ]\n", - " [0. 0.06666667 0.6 0.33333333]\n", - " [0. 1. 0. 0. ]]\n" + "[[5.0e-001 5.0e-001]\n", + " [9.9e-324 1.5e-323]]\n", + "d176d7c9-4027-4702-9e31-2a71395cdda0\n" ] }, { @@ -1106,15 +732,21 @@ "\n", "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "page_view\n", + "\n", + "page_view\n", + "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1124,13 +756,121 @@ "name": "stdout", "output_type": "stream", "text": [ - "[]\n" + "[[0.]]\n", + "013fc334-4045-4d5a-8739-dd0a8766a63b\n" + ] + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "page_view\n", + "\n", + "page_view\n", + "\n", + "\n", + "\n", + "view_item_page\n", + "\n", + "view_item_page\n", + "\n", + "\n", + "\n", + "page_view->view_item_page\n", + "\n", + "\n", + "1.00\n", + "\n", + "\n", + "\n", + "view_item_page->view_item_page\n", + "\n", + "\n", + "0.68\n", + "\n", + "\n", + "\n", + "hover_over_title\n", + "\n", + "hover_over_title\n", + "\n", + "\n", + "\n", + "view_item_page->hover_over_title\n", + "\n", + "\n", + "0.29\n", + "\n", + "\n", + "\n", + "hover_over_paragraph\n", + "\n", + "hover_over_paragraph\n", + "\n", + "\n", + "\n", + "view_item_page->hover_over_paragraph\n", + "\n", + "\n", + "0.04\n", + "\n", + "\n", + "\n", + "hover_over_title->view_item_page\n", + "\n", + "\n", + "1.00\n", + "\n", + "\n", + "\n", + "hover_over_paragraph->page_view\n", + "\n", + "\n", + "0.14\n", + "\n", + "\n", + "\n", + "hover_over_paragraph->view_item_page\n", + "\n", + "\n", + "0.86\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0. 1. 0. 0. ]\n", + " [0. 0.67857143 0.28571429 0.03571429]\n", + " [0. 1. 0. 0. ]\n", + " [0.14285714 0.85714286 0. 0. ]]\n" ] } ], "source": [ "def explore_session(session_id: str):\n", " subset = df[df['sessionId'] == session_id]\n", + " print(session_id)\n", " P, labels = build_transition_prob_matrix(subset)\n", " g = render_graph(f\"session_{session_id}\", P, ls_index=labels, threshold=0.01, fmt=\"svg\", view=False)\n", " display(g)\n",