diff --git a/experiments/data_export.ipynb b/experiments/data_export.ipynb index c0620ff..4ba73fb 100644 --- a/experiments/data_export.ipynb +++ b/experiments/data_export.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 98, + "execution_count": 9, "id": "62eafcd9-5462-4063-8873-0e7fb9add907", "metadata": {}, "outputs": [ @@ -12,7 +12,7 @@ "True" ] }, - "execution_count": 98, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -31,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 10, "id": "4af65cb4-e8cf-4877-b2db-13ac19f3838f", "metadata": {}, "outputs": [ @@ -40,22 +40,31 @@ "output_type": "stream", "text": [ "\n", - "RangeIndex: 141 entries, 0 to 140\n", - "Data columns (total 10 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 sessionId 141 non-null object \n", - " 1 eventType 141 non-null object \n", - " 2 ts 141 non-null int64 \n", - " 3 targetEl 14 non-null object \n", - " 4 targetUrl 1 non-null object \n", - " 5 metadata_path 141 non-null object \n", - " 6 metadata_referrer 6 non-null object \n", - " 7 metadata_x 14 non-null float64\n", - " 8 metadata_y 14 non-null float64\n", - " 9 metadata_scrollY 121 non-null float64\n", - "dtypes: float64(3), int64(1), object(6)\n", - "memory usage: 11.1+ KB\n" + "RangeIndex: 528 entries, 0 to 527\n", + "Data columns (total 19 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 sessionId 528 non-null object \n", + " 1 eventType 467 non-null object \n", + " 2 ts 528 non-null object \n", + " 3 targetEl 401 non-null object \n", + " 4 eventName 61 non-null object \n", + " 5 page 61 non-null object \n", + " 6 storeMode 61 non-null object \n", + " 7 userAgent 61 non-null object \n", + " 8 productId 21 non-null object \n", + " 9 metadata_path 467 non-null object \n", + " 10 metadata_referrer 82 non-null object \n", + " 11 metadata_x 425 non-null float64\n", + " 12 metadata_y 425 non-null float64\n", + " 13 metadata_event 7 non-null object \n", + " 14 metadata_targetEl 24 non-null object \n", + " 15 metadata_roomType 5 non-null object \n", + " 16 metadata_price 5 non-null float64\n", + " 17 metadata_nights 5 non-null float64\n", + " 18 metadata_targetUrl 4 non-null object \n", + "dtypes: float64(4), object(15)\n", + "memory usage: 78.5+ KB\n" ] } ], @@ -81,7 +90,7 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 11, "id": "f6819a1c-32ab-49c7-845b-5df7bf60f561", "metadata": {}, "outputs": [ @@ -110,20 +119,33 @@ " eventType\n", " ts\n", " targetEl\n", - " targetUrl\n", + " eventName\n", + " page\n", + " storeMode\n", + " userAgent\n", + " productId\n", " metadata_path\n", " metadata_referrer\n", " metadata_x\n", " metadata_y\n", - " metadata_scrollY\n", + " metadata_event\n", + " metadata_targetEl\n", + " metadata_roomType\n", + " metadata_price\n", + " metadata_nights\n", + " metadata_targetUrl\n", " \n", " \n", " \n", " \n", " 0\n", - " 1761225843899-qaiwwwyj2o\n", + " 1762434923440-66hdhq8qicd\n", " pageview\n", - " 1761226211163\n", + " 1762434924107\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " NaN\n", " NaN\n", " /\n", @@ -131,64 +153,87 @@ " NaN\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " \n", " \n", " 1\n", - " 1761225843899-qaiwwwyj2o\n", + " 1762434923440-66hdhq8qicd\n", " click\n", - " 1761226218090\n", - " MAIN\n", + " 1762434925198\n", + " DIV\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " NaN\n", " /\n", " NaN\n", - " 815.0\n", - " 331.0\n", + " 1098.0\n", + " 663.0\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " NaN\n", " \n", " \n", " 2\n", - " 1761225843899-qaiwwwyj2o\n", + " 1762434923440-66hdhq8qicd\n", " click\n", - " 1761226220890\n", + " 1762434925371\n", " MAIN\n", " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " /\n", " NaN\n", - " 1129.0\n", - " 605.0\n", + " 1098.0\n", + " 663.0\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " NaN\n", " \n", " \n", " 3\n", - " 1761225843899-qaiwwwyj2o\n", - " click\n", - " 1761226225801\n", - " DIV\n", + " 1762434923440-66hdhq8qicd\n", + " pageview\n", + " 1762437192910\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " NaN\n", " /\n", + " \n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " NaN\n", - " 532.0\n", - " 545.0\n", " NaN\n", " \n", " \n", " 4\n", - " 1761225843899-qaiwwwyj2o\n", - " click\n", - " 1761226229364\n", - " DIV\n", - " NaN\n", - " /\n", - " NaN\n", - " 481.0\n", - " 399.0\n", - " NaN\n", - " \n", - " \n", - " 5\n", - " 1761227236286-e7mphcvw6t\n", + " 1762434923440-66hdhq8qicd\n", " pageview\n", - " 1761227236426\n", + " 1762437198539\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " NaN\n", " NaN\n", " /\n", @@ -196,38 +241,131 @@ " NaN\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " \n", " \n", - " 6\n", - " 1761227236286-e7mphcvw6t\n", + " 390\n", + " d423ce8a-77aa-4c9a-94d4-d1adddcc3472\n", " click\n", - " 1761227239328\n", + " 1762443115648\n", " DIV\n", " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " /\n", " NaN\n", - " 202.0\n", - " 351.0\n", + " 245.0\n", + " 595.0\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " NaN\n", " \n", " \n", - " 7\n", - " 1761227236286-e7mphcvw6t\n", + " 391\n", + " d423ce8a-77aa-4c9a-94d4-d1adddcc3472\n", " click\n", - " 1761227244783\n", - " A\n", - " https://vercel.com/new?utm_source=create-next-...\n", + " 1762443174606\n", + " DIV\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " /\n", " NaN\n", - " 377.0\n", - " 723.0\n", + " 475.0\n", + " 428.0\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " NaN\n", " \n", " \n", - " 8\n", - " 1761828056433-0gz7aboz86h\n", + " 392\n", + " d423ce8a-77aa-4c9a-94d4-d1adddcc3472\n", + " click\n", + " 1762443183406\n", + " INPUT\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " /\n", + " NaN\n", + " 832.0\n", + " 219.0\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 393\n", + " d423ce8a-77aa-4c9a-94d4-d1adddcc3472\n", + " click\n", + " 1762443208588\n", + " DIV\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " /\n", + " NaN\n", + " 485.0\n", + " 155.0\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 394\n", + " d423ce8a-77aa-4c9a-94d4-d1adddcc3472\n", + " click\n", + " 1762443225474\n", + " DIV\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " /\n", + " NaN\n", + " 281.0\n", + " 281.0\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 407\n", + " 1762444018243-0120z6z5u42f\n", " pageview\n", - " 1761828261783\n", + " 1762444018256\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " NaN\n", " NaN\n", " /\n", @@ -235,111 +373,381 @@ " NaN\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " \n", " \n", - " 9\n", - " 1761828056433-0gz7aboz86h\n", + " 408\n", + " 1762444018243-0120z6z5u42f\n", " click\n", - " 1761828266484\n", - " H1\n", + " 1762445774344\n", + " DIV\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " NaN\n", " /\n", " NaN\n", - " 527.0\n", - " 169.0\n", + " 299.0\n", + " 214.0\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " NaN\n", " \n", " \n", - " 10\n", - " 1761828056433-0gz7aboz86h\n", - " scroll\n", - " 1761828270314\n", + " 431\n", + " 214d9fad-9b00-40c3-bd0e-7739b6acd654\n", + " pageview\n", + " 1762448190973\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " NaN\n", " NaN\n", " /\n", + " \n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " NaN\n", " NaN\n", " NaN\n", - " 51.666668\n", " \n", " \n", - " 11\n", - " 1761828056433-0gz7aboz86h\n", - " scroll\n", - " 1761828270328\n", + " 432\n", + " 214d9fad-9b00-40c3-bd0e-7739b6acd654\n", + " click\n", + " 1762448192425\n", + " DIV\n", + " NaN\n", + " NaN\n", + " NaN\n", " NaN\n", " NaN\n", " /\n", " NaN\n", + " 1623.0\n", + " 493.0\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " NaN\n", " NaN\n", - " 50.000000\n", " \n", " \n", - " 12\n", - " 1761828056433-0gz7aboz86h\n", - " scroll\n", - " 1761828270336\n", + " 433\n", + " 214d9fad-9b00-40c3-bd0e-7739b6acd654\n", + " click\n", + " 1762448192645\n", + " DIV\n", + " NaN\n", + " NaN\n", + " NaN\n", " NaN\n", " NaN\n", " /\n", " NaN\n", + " 1623.0\n", + " 493.0\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 434\n", + " 214d9fad-9b00-40c3-bd0e-7739b6acd654\n", + " pageview\n", + " 1762448205850\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " /\n", + " \n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 435\n", + " 214d9fad-9b00-40c3-bd0e-7739b6acd654\n", + " click\n", + " 1762448207922\n", + " DIV\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " /\n", + " NaN\n", + " 421.0\n", + " 216.0\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 438\n", + " f0d40ca6-c1d3-4ecd-beb3-796adc74349d\n", + " pageview\n", + " 1762448283244\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " /\n", + " \n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 439\n", + " f0d40ca6-c1d3-4ecd-beb3-796adc74349d\n", + " click\n", + " 1762448295524\n", + " HTML\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " /\n", + " NaN\n", + " 614.0\n", + " 720.0\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 440\n", + " f0d40ca6-c1d3-4ecd-beb3-796adc74349d\n", + " click\n", + " 1762448342763\n", + " DIV\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " /\n", + " NaN\n", + " 416.0\n", + " 397.0\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 441\n", + " f0d40ca6-c1d3-4ecd-beb3-796adc74349d\n", + " pageview\n", + " 1762448343396\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " /\n", + " \n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 442\n", + " f0d40ca6-c1d3-4ecd-beb3-796adc74349d\n", + " click\n", + " 1762448829631\n", + " DIV\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " /\n", + " NaN\n", + " 45.0\n", + " 44.0\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " NaN\n", " NaN\n", - " 49.166668\n", " \n", " \n", "\n", "" ], "text/plain": [ - " sessionId eventType ts targetEl \\\n", - "0 1761225843899-qaiwwwyj2o pageview 1761226211163 NaN \n", - "1 1761225843899-qaiwwwyj2o click 1761226218090 MAIN \n", - "2 1761225843899-qaiwwwyj2o click 1761226220890 MAIN \n", - "3 1761225843899-qaiwwwyj2o click 1761226225801 DIV \n", - "4 1761225843899-qaiwwwyj2o click 1761226229364 DIV \n", - "5 1761227236286-e7mphcvw6t pageview 1761227236426 NaN \n", - "6 1761227236286-e7mphcvw6t click 1761227239328 DIV \n", - "7 1761227236286-e7mphcvw6t click 1761227244783 A \n", - "8 1761828056433-0gz7aboz86h pageview 1761828261783 NaN \n", - "9 1761828056433-0gz7aboz86h click 1761828266484 H1 \n", - "10 1761828056433-0gz7aboz86h scroll 1761828270314 NaN \n", - "11 1761828056433-0gz7aboz86h scroll 1761828270328 NaN \n", - "12 1761828056433-0gz7aboz86h scroll 1761828270336 NaN \n", + " sessionId eventType ts targetEl \\\n", + "0 1762434923440-66hdhq8qicd pageview 1762434924107 NaN \n", + "1 1762434923440-66hdhq8qicd click 1762434925198 DIV \n", + "2 1762434923440-66hdhq8qicd click 1762434925371 MAIN \n", + "3 1762434923440-66hdhq8qicd pageview 1762437192910 NaN \n", + "4 1762434923440-66hdhq8qicd pageview 1762437198539 NaN \n", + "390 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 click 1762443115648 DIV \n", + "391 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 click 1762443174606 DIV \n", + "392 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 click 1762443183406 INPUT \n", + "393 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 click 1762443208588 DIV \n", + "394 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 click 1762443225474 DIV \n", + "407 1762444018243-0120z6z5u42f pageview 1762444018256 NaN \n", + "408 1762444018243-0120z6z5u42f click 1762445774344 DIV \n", + "431 214d9fad-9b00-40c3-bd0e-7739b6acd654 pageview 1762448190973 NaN \n", + "432 214d9fad-9b00-40c3-bd0e-7739b6acd654 click 1762448192425 DIV \n", + "433 214d9fad-9b00-40c3-bd0e-7739b6acd654 click 1762448192645 DIV \n", + "434 214d9fad-9b00-40c3-bd0e-7739b6acd654 pageview 1762448205850 NaN \n", + "435 214d9fad-9b00-40c3-bd0e-7739b6acd654 click 1762448207922 DIV \n", + "438 f0d40ca6-c1d3-4ecd-beb3-796adc74349d pageview 1762448283244 NaN \n", + "439 f0d40ca6-c1d3-4ecd-beb3-796adc74349d click 1762448295524 HTML \n", + "440 f0d40ca6-c1d3-4ecd-beb3-796adc74349d click 1762448342763 DIV \n", + "441 f0d40ca6-c1d3-4ecd-beb3-796adc74349d pageview 1762448343396 NaN \n", + "442 f0d40ca6-c1d3-4ecd-beb3-796adc74349d click 1762448829631 DIV \n", "\n", - " targetUrl metadata_path \\\n", - "0 NaN / \n", - "1 NaN / \n", - "2 NaN / \n", - "3 NaN / \n", - "4 NaN / \n", - "5 NaN / \n", - "6 NaN / \n", - "7 https://vercel.com/new?utm_source=create-next-... / \n", - "8 NaN / \n", - "9 NaN / \n", - "10 NaN / \n", - "11 NaN / \n", - "12 NaN / \n", + " eventName page storeMode userAgent productId metadata_path \\\n", + "0 NaN NaN NaN NaN NaN / \n", + "1 NaN NaN NaN NaN NaN / \n", + "2 NaN NaN NaN NaN NaN / \n", + "3 NaN NaN NaN NaN NaN / \n", + "4 NaN NaN NaN NaN NaN / \n", + "390 NaN NaN NaN NaN NaN / \n", + "391 NaN NaN NaN NaN NaN / \n", + "392 NaN NaN NaN NaN NaN / \n", + "393 NaN NaN NaN NaN NaN / \n", + "394 NaN NaN NaN NaN NaN / \n", + "407 NaN NaN NaN NaN NaN / \n", + "408 NaN NaN NaN NaN NaN / \n", + "431 NaN NaN NaN NaN NaN / \n", + "432 NaN NaN NaN NaN NaN / \n", + "433 NaN NaN NaN NaN NaN / \n", + "434 NaN NaN NaN NaN NaN / \n", + "435 NaN NaN NaN NaN NaN / \n", + "438 NaN NaN NaN NaN NaN / \n", + "439 NaN NaN NaN NaN NaN / \n", + "440 NaN NaN NaN NaN NaN / \n", + "441 NaN NaN NaN NaN NaN / \n", + "442 NaN NaN NaN NaN NaN / \n", "\n", - " metadata_referrer metadata_x metadata_y metadata_scrollY \n", - "0 NaN NaN NaN \n", - "1 NaN 815.0 331.0 NaN \n", - "2 NaN 1129.0 605.0 NaN \n", - "3 NaN 532.0 545.0 NaN \n", - "4 NaN 481.0 399.0 NaN \n", - "5 NaN NaN NaN \n", - "6 NaN 202.0 351.0 NaN \n", - "7 NaN 377.0 723.0 NaN \n", - "8 NaN NaN NaN \n", - "9 NaN 527.0 169.0 NaN \n", - "10 NaN NaN NaN 51.666668 \n", - "11 NaN NaN NaN 50.000000 \n", - "12 NaN NaN NaN 49.166668 " + " metadata_referrer metadata_x metadata_y metadata_event \\\n", + "0 NaN NaN NaN \n", + "1 NaN 1098.0 663.0 NaN \n", + "2 NaN 1098.0 663.0 NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "390 NaN 245.0 595.0 NaN \n", + "391 NaN 475.0 428.0 NaN \n", + "392 NaN 832.0 219.0 NaN \n", + "393 NaN 485.0 155.0 NaN \n", + "394 NaN 281.0 281.0 NaN \n", + "407 NaN NaN NaN \n", + "408 NaN 299.0 214.0 NaN \n", + "431 NaN NaN NaN \n", + "432 NaN 1623.0 493.0 NaN \n", + "433 NaN 1623.0 493.0 NaN \n", + "434 NaN NaN NaN \n", + "435 NaN 421.0 216.0 NaN \n", + "438 NaN NaN NaN \n", + "439 NaN 614.0 720.0 NaN \n", + "440 NaN 416.0 397.0 NaN \n", + "441 NaN NaN NaN \n", + "442 NaN 45.0 44.0 NaN \n", + "\n", + " metadata_targetEl metadata_roomType metadata_price metadata_nights \\\n", + "0 NaN NaN NaN NaN \n", + "1 NaN NaN NaN NaN \n", + "2 NaN NaN NaN NaN \n", + "3 NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN \n", + "390 NaN NaN NaN NaN \n", + "391 NaN NaN NaN NaN \n", + "392 NaN NaN NaN NaN \n", + "393 NaN NaN NaN NaN \n", + "394 NaN NaN NaN NaN \n", + "407 NaN NaN NaN NaN \n", + "408 NaN NaN NaN NaN \n", + "431 NaN NaN NaN NaN \n", + "432 NaN NaN NaN NaN \n", + "433 NaN NaN NaN NaN \n", + "434 NaN NaN NaN NaN \n", + "435 NaN NaN NaN NaN \n", + "438 NaN NaN NaN NaN \n", + "439 NaN NaN NaN NaN \n", + "440 NaN NaN NaN NaN \n", + "441 NaN NaN NaN NaN \n", + "442 NaN NaN NaN NaN \n", + "\n", + " metadata_targetUrl \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "390 NaN \n", + "391 NaN \n", + "392 NaN \n", + "393 NaN \n", + "394 NaN \n", + "407 NaN \n", + "408 NaN \n", + "431 NaN \n", + "432 NaN \n", + "433 NaN \n", + "434 NaN \n", + "435 NaN \n", + "438 NaN \n", + "439 NaN \n", + "440 NaN \n", + "441 NaN \n", + "442 NaN " ] }, - "execution_count": 87, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -350,19 +758,21 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 12, "id": "380eca5f-8304-4fb2-be32-e8bcfd312085", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['1761225843899-qaiwwwyj2o',\n", - " '1761828056433-0gz7aboz86h',\n", - " '1761227236286-e7mphcvw6t']" + "['214d9fad-9b00-40c3-bd0e-7739b6acd654',\n", + " '1762444018243-0120z6z5u42f',\n", + " 'f0d40ca6-c1d3-4ecd-beb3-796adc74349d',\n", + " 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',\n", + " '1762434923440-66hdhq8qicd']" ] }, - "execution_count": 88, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -373,7 +783,7 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 13, "id": "f4ae6f81-dcb8-44be-aee7-30dbc3a6bae1", "metadata": {}, "outputs": [], @@ -383,14 +793,14 @@ }, { "cell_type": "code", - "execution_count": 101, + "execution_count": 17, "id": "050d90a4-20a9-47f5-b998-c31178a54cb3", "metadata": {}, "outputs": [], "source": [ "def build_transition_prob_matrix(df: pd.DataFrame):\n", - " df = df.dropna(subset=['eventType'])\n", - " events = df['eventType'].tolist()\n", + " df = df.dropna(subset=['eventName'])\n", + " events = df['eventName'].tolist()\n", " labels = pd.Index(events).unique().tolist()\n", " idx = {e:i for i,e in enumerate(labels)}\n", " M = np.zeros((len(labels), len(labels)), dtype=float)\n", @@ -404,7 +814,7 @@ }, { "cell_type": "code", - "execution_count": 107, + "execution_count": 18, "id": "e68f9004-82f5-4826-aece-e3dc6e15a18f", "metadata": {}, "outputs": [], @@ -466,7 +876,7 @@ }, { "cell_type": "code", - "execution_count": 108, + "execution_count": 19, "id": "e255a2c1-6454-4e5e-89f6-ef8ac51ab6cc", "metadata": {}, "outputs": [ @@ -479,41 +889,15 @@ "\n", "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "pageview\n", - "\n", - "pageview\n", - "\n", - "\n", - "\n", - "click\n", - "\n", - "click\n", - "\n", - "\n", - "\n", - "pageview->click\n", - "\n", - "\n", - "1.0\n", - "\n", - "\n", - "\n", - "click->click\n", - "\n", - "\n", - "1.0\n", - "\n", + "\n", + "\n", + "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -523,8 +907,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[[0. 1.]\n", - " [0. 1.]]\n" + "[]\n" ] }, { @@ -536,75 +919,169 @@ "\n", "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "pageview\n", - "\n", - "pageview\n", + "\n", + "\n", + "\n", "\n", - "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[]\n" + ] + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[]\n" + ] + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "page_view\n", + "\n", + "page_view\n", + "\n", + "\n", "\n", - "pageview->pageview\n", - "\n", - "\n", - "0.2\n", + "page_view->page_view\n", + "\n", + "\n", + "0.70\n", "\n", "\n", "\n", "click\n", - "\n", - "click\n", + "\n", + "click\n", "\n", - "\n", + "\n", "\n", - "pageview->click\n", - "\n", - "\n", - "0.8\n", + "page_view->click\n", + "\n", + "\n", + "0.17\n", "\n", - "\n", - "\n", - "click->pageview\n", - "\n", - "\n", - "0.3\n", - "\n", - "\n", - "\n", - "click->click\n", - "\n", - "\n", - "0.6\n", - "\n", - "\n", + "\n", "\n", - "scroll\n", - "\n", - "scroll\n", + "product_hover\n", + "\n", + "product_hover\n", "\n", - "\n", + "\n", + "\n", + "page_view->product_hover\n", + "\n", + "\n", + "0.13\n", + "\n", + "\n", + "\n", + "click->page_view\n", + "\n", + "\n", + "0.35\n", + "\n", + "\n", "\n", - "click->scroll\n", - "\n", - "\n", - "0.1\n", + "click->click\n", + "\n", + "\n", + "0.41\n", "\n", - "\n", + "\n", "\n", - "scroll->scroll\n", - "\n", - "\n", - "1.0\n", + "click->product_hover\n", + "\n", + "\n", + "0.24\n", + "\n", + "\n", + "\n", + "product_hover->click\n", + "\n", + "\n", + "0.07\n", + "\n", + "\n", + "\n", + "product_hover->product_hover\n", + "\n", + "\n", + "0.60\n", + "\n", + "\n", + "\n", + "product_view\n", + "\n", + "product_view\n", + "\n", + "\n", + "\n", + "product_hover->product_view\n", + "\n", + "\n", + "0.33\n", + "\n", + "\n", + "\n", + "product_view->click\n", + "\n", + "\n", + "1.00\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -614,9 +1091,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "[[0.25 0.75 0. ]\n", - " [0.28571429 0.57142857 0.14285714]\n", - " [0. 0.00826446 0.99173554]]\n" + "[[0.69565217 0.17391304 0.13043478 0. ]\n", + " [0.35294118 0.41176471 0.23529412 0. ]\n", + " [0. 0.06666667 0.6 0.33333333]\n", + " [0. 1. 0. 0. ]]\n" ] }, { @@ -628,41 +1106,15 @@ "\n", "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "pageview\n", - "\n", - "pageview\n", - "\n", - "\n", - "\n", - "click\n", - "\n", - "click\n", - "\n", - "\n", - "\n", - "pageview->click\n", - "\n", - "\n", - "1.0\n", - "\n", - "\n", - "\n", - "click->click\n", - "\n", - "\n", - "1.0\n", - "\n", + "\n", + "\n", + "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -672,14 +1124,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "[[0. 1.]\n", - " [0. 1.]]\n" + "[]\n" ] } ], "source": [ "def explore_session(session_id: str):\n", - " subset = df[df['sessionId'] == session_id] # not .where(...)\n", + " subset = df[df['sessionId'] == session_id]\n", " P, labels = build_transition_prob_matrix(subset)\n", " g = render_graph(f\"session_{session_id}\", P, ls_index=labels, threshold=0.01, fmt=\"svg\", view=False)\n", " display(g)\n", diff --git a/web/src/app/api/ingest/route.ts b/web/src/app/api/ingest/route.ts new file mode 100644 index 0000000..83a68e1 --- /dev/null +++ b/web/src/app/api/ingest/route.ts @@ -0,0 +1,33 @@ +import { NextRequest, NextResponse } from 'next/server'; +import { sendEvent } from '@/lib/kafka'; +import type { EventBase } from '@/lib/events'; + +export async function POST(req: NextRequest) { + try { + const body = await req.json(); + + const storeMode = process.env.STORE_MODE || 'hotel'; + const userAgent = req.headers.get('user-agent') || undefined; + + const event: EventBase = { + ...body, + storeMode, + userAgent, + ts: body.ts || new Date().toISOString(), + }; + + await sendEvent(event); + + if (process.env.NEXT_PUBLIC_APP_ENV === 'dev') { + console.log('[ingest]', event); + } + + return NextResponse.json({ success: true }); + } catch (err: any) { + console.error('[ingest error]', err); + return NextResponse.json( + { error: err.message || 'unknown error' }, + { status: 500 } + ); + } +} diff --git a/web/src/app/api/track/route.ts b/web/src/app/api/track/route.ts deleted file mode 100644 index 1ccd720..0000000 --- a/web/src/app/api/track/route.ts +++ /dev/null @@ -1,33 +0,0 @@ -import { NextRequest, NextResponse } from 'next/server'; -import { sendInteractionEvent } from '@/lib/kafka'; - -export async function POST(req: NextRequest) { - try { - const body = await req.json(); - const { sessionId, eventType, targetEl, targetUrl, metadata } = body; - - if (!sessionId || !eventType) { - return NextResponse.json( - { error: 'sessionId and eventType required' }, - { status: 400 } - ); - } - - await sendInteractionEvent({ - sessionId, - eventType, - targetEl, - targetUrl, - metadata, - ts: Date.now(), - }); - - return NextResponse.json({ success: true }); - } catch (err: any) { - console.error('track error:', err); - return NextResponse.json( - { error: err.message || 'unknown error' }, - { status: 500 } - ); - } -} diff --git a/web/src/components/ui/index.ts b/web/src/components/ui/index.ts index 3180ea1..d3734cb 100644 --- a/web/src/components/ui/index.ts +++ b/web/src/components/ui/index.ts @@ -4,3 +4,4 @@ export { default as Input } from './Input'; export { default as DateInput } from './DateInput'; export { default as RadioGroup } from './RadioGroup'; export { default as Dropdown, DropdownCounter } from './Dropdown'; +export { default as Navigation } from './Navigation'; diff --git a/web/src/hooks/useInteractionTracking.ts b/web/src/hooks/useInteractionTracking.ts index 6a0ac5a..26173cd 100644 --- a/web/src/hooks/useInteractionTracking.ts +++ b/web/src/hooks/useInteractionTracking.ts @@ -1,33 +1,34 @@ import { useEffect, useRef, useState } from 'react'; import '@/lib/experiments' // ensure experiments lib is loaded +import type { EventName } from '@/lib/events'; const fetchSessionId = async (): Promise => { - try { - const res = await fetch('/api/session'); - const data = await res.json(); - return data.sessionId || ''; - } catch (err) { - console.error('failed to fetch session:', err); - return ''; - } + try { + const res = await fetch('/api/session'); + const data = await res.json(); + return data.sessionId || ''; + } catch (err) { + console.error('failed to fetch session:', err); + return ''; + } }; const track = async (ev: { - sessionId: string; - eventType: string; - targetEl?: string; - targetUrl?: string; - metadata?: Record; + sessionId: string; + eventName: EventName; + page: string; + productId?: string; + metadata?: Record; }) => { - try { - await fetch('/api/track', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(ev), - }); - } catch (err) { - console.error('track failed:', err); - } + try { + await fetch('/api/ingest', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(ev), + }); + } catch (err) { + console.error('track failed:', err); + } }; export const useInteractionTracking = () => { @@ -44,82 +45,61 @@ export const useInteractionTracking = () => { const handleClick = (e: MouseEvent) => { if (!sidRef.current) return; const tgt = e.target as HTMLElement; + const page = window.location.pathname; track({ sessionId: sidRef.current, - eventType: 'click', - targetEl: tgt.tagName, - targetUrl: tgt instanceof HTMLAnchorElement ? tgt.href : undefined, + eventName: 'click', + page, metadata: { x: e.clientX, y: e.clientY, - path: window.location.pathname, - }, - }); - }; - - const handleScroll = () => { - if (!sidRef.current) return; - track({ - sessionId: sidRef.current, - eventType: 'scroll', - metadata: { - scrollY: window.scrollY, - path: window.location.pathname, + targetEl: tgt.tagName, + targetUrl: tgt instanceof HTMLAnchorElement ? tgt.href : undefined, }, }); }; const handlePageView = () => { if (!sidRef.current) return; + const page = window.location.pathname; track({ sessionId: sidRef.current, - eventType: 'pageview', + eventName: 'page_view', + page, metadata: { - path: window.location.pathname, referrer: document.referrer, }, }); }; - enum DefinedInteractions { - ADD_TO_CART = 'add_to_cart', - PURCHASE = 'purchase', - } - - // called when clicking on "Add to Cart" button or "Purchase" button - const handleDefinedInteraction = ( - interactionType: DefinedInteractions, - metadata?: Record - ) => { + // called for canonical events dispatched via custom events + const handleDefinedInteraction = (e: Event) => { if (!sidRef.current) return; + const customEvent = e as CustomEvent<{ + eventName: EventName; + productId?: string; + metadata?: Record; + }>; + const page = window.location.pathname; track({ sessionId: sidRef.current, - eventType: interactionType, - metadata: { - path: window.location.pathname, - ...metadata, - }, + eventName: customEvent.detail.eventName, + page, + productId: customEvent.detail.productId, + metadata: customEvent.detail.metadata, }); }; - const definedInteractionListener = (e: Event) => { - const customEvent = e as CustomEvent; - handleDefinedInteraction(customEvent.detail.interactionType, customEvent.detail.metadata); - }; - // wait for session to be ready before tracking if (!ready) return; handlePageView(); document.addEventListener('click', handleClick); - document.addEventListener('definedInteraction', definedInteractionListener); - // TOO NOISY: enable if needed but tbh not worth it - //window.addEventListener('scroll', handleScroll, { passive: true }); + document.addEventListener('definedInteraction', handleDefinedInteraction); return () => { document.removeEventListener('click', handleClick); - document.removeEventListener('definedInteraction', definedInteractionListener); - //window.removeEventListener('scroll', handleScroll); + document.removeEventListener('definedInteraction', handleDefinedInteraction); }; }, [ready]); }; diff --git a/web/src/lib/kafka.ts b/web/src/lib/kafka.ts index f6abea9..4228a15 100644 --- a/web/src/lib/kafka.ts +++ b/web/src/lib/kafka.ts @@ -1,42 +1,35 @@ import { Kafka, Producer } from 'kafkajs'; +import type { EventBase } from './events'; let producer: Producer | null = null; const kafka = new Kafka({ - clientId: 'phantom-web', - brokers: [`${process.env.KAFKA_HOST || 'localhost'}:${process.env.KAFKA_PORT || '9092'}`], + clientId: 'phantom-web', + brokers: [`${process.env.KAFKA_HOST || 'localhost'}:${process.env.KAFKA_PORT || '9092'}`], }); export const getProducer = async (): Promise => { - if (!producer) { - producer = kafka.producer(); - await producer.connect(); - } - return producer; + if (!producer) { + producer = kafka.producer(); + await producer.connect(); + } + return producer; }; -export const sendInteractionEvent = async (ev: { - sessionId: string; - eventType: string; - targetEl?: string; - targetUrl?: string; - metadata?: Record; - ts: number; -}) => { - const p = await getProducer(); - // add to the metadata - await p.send({ - topic: 'user-interactions', - messages: [{ - key: ev.sessionId, - value: JSON.stringify(ev), - }], - }); +export const sendEvent = async (ev: EventBase) => { + const p = await getProducer(); + await p.send({ + topic: 'user-interactions', + messages: [{ + key: ev.sessionId, + value: JSON.stringify(ev), + }], + }); }; export const disconnect = async () => { - if (producer) { - await producer.disconnect(); - producer = null; - } + if (producer) { + await producer.disconnect(); + producer = null; + } }; diff --git a/web/src/middleware.ts b/web/src/middleware.ts index be050cd..6df6eb7 100644 --- a/web/src/middleware.ts +++ b/web/src/middleware.ts @@ -4,9 +4,10 @@ export function middleware(req: NextRequest) { const mode = process.env.STORE_MODE; const { pathname } = req.nextUrl; - // skip rewrites for api routes, static files, and next internals + // skip rewrites for api routes, admin routes, static files, and next internals if ( pathname.startsWith('/api') || + pathname.startsWith('/admin') || pathname.startsWith('/_next') || pathname.startsWith('/static') || pathname.includes('.')