diff --git a/experiments/data_export.ipynb b/experiments/data_export.ipynb deleted file mode 100644 index 7cd9366..0000000 --- a/experiments/data_export.ipynb +++ /dev/null @@ -1,957 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 10, - "id": "62eafcd9-5462-4063-8873-0e7fb9add907", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from kafka import KafkaConsumer\n", - "import pandas as pd\n", - "import json\n", - "import numpy as np\n", - "import os\n", - "from dotenv import load_dotenv\n", - "import matplotlib.pyplot as plt\n", - "from IPython.display import display, SVG, Image\n", - "load_dotenv()" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "4af65cb4-e8cf-4877-b2db-13ac19f3838f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 73 entries, 0 to 72\n", - "Data columns (total 13 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 sessionId 73 non-null object \n", - " 1 eventName 73 non-null object \n", - " 2 page 73 non-null object \n", - " 3 productId 67 non-null object \n", - " 4 storeMode 73 non-null object \n", - " 5 userAgent 73 non-null object \n", - " 6 ts 73 non-null object \n", - " 7 metadata_referrer 6 non-null object \n", - " 8 metadata_roomType 45 non-null object \n", - " 9 metadata_price 45 non-null float64\n", - " 10 metadata_nights 45 non-null float64\n", - " 11 metadata_elementText 22 non-null object \n", - " 12 metadata_dwellTime 22 non-null float64\n", - "dtypes: float64(3), object(10)\n", - "memory usage: 7.5+ KB\n" - ] - } - ], - "source": [ - "KAFKA_PORT=os.getenv(\"KAFKA_PORT\", 9092)\n", - "topic = \"user-interactions\"\n", - "consumer = KafkaConsumer(\n", - " topic, \n", - " enable_auto_commit=True,\n", - " value_deserializer=lambda x: json.loads(x.decode('utf-8')),\n", - " auto_offset_reset='earliest', \n", - " bootstrap_servers=['localhost:9092'])\n", - "messages=consumer.poll(timeout_ms=1000,max_records=10000)\n", - "df = []\n", - "for m in messages.values():\n", - " for i in m:\n", - " df.append(i.value)\n", - "df = pd.DataFrame(df)\n", - "# explode metadata col json\n", - "df = df.join(pd.json_normalize(df.pop(\"metadata\"), sep=\".\").add_prefix(\"metadata_\"))\n", - "df.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "f6819a1c-32ab-49c7-845b-5df7bf60f561", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sessionIdeventNamepageproductIdstoreModeuserAgenttsmetadata_referrermetadata_roomTypemetadata_pricemetadata_nightsmetadata_elementTextmetadata_dwellTime
0d176d7c9-4027-4702-9e31-2a71395cdda0page_view/productsNonehotelMozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...2025-11-14T13:23:46.270ZNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1f0317a5d-e424-44e9-b784-c8f7291ffe31page_view/NonehotelMozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck...2025-11-14T13:26:00.291ZNaNNaNNaNNaNNaN
2f0317a5d-e424-44e9-b784-c8f7291ffe31page_view/productsNonehotelMozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck...2025-11-14T13:26:07.769ZNaNNaNNaNNaNNaN
3f0317a5d-e424-44e9-b784-c8f7291ffe31view_item_page/productshtl-0hotelMozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck...2025-11-14T13:26:15.010ZNaNPremium Room269.01.0NaNNaN
4238dc588-a7ab-4c0e-bccd-6abca5076c66page_view/productsNonehotelMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...2025-11-14T13:27:15.457ZNaNNaNNaNNaNNaN
5238dc588-a7ab-4c0e-bccd-6abca5076c66view_item_page/productshtl-0hotelMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...2025-11-14T13:27:15.591ZNaNPremium Room264.02.0NaNNaNNaNNaNNaNNaNNaN
432214d9fad-9b00-40c3-bd0e-7739b6acd654click1762448192425DIVNaNNaNNaNNaNNaN/NaN1623.0493.0NaNNaNNaNNaNNaNNaN
6238dc588-a7ab-4c0e-bccd-6abca5076c66view_item_page/productshtl-0hotelMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...2025-11-14T13:27:21.483ZNaNPremium Room264.02.0NaNNaN
7238dc588-a7ab-4c0e-bccd-6abca5076c66hover_over_title/productshtl-0hotelMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...2025-11-14T13:27:22.646ZNaNNaNNaNNaNGrand Plaza Hotel1200.0
8238dc588-a7ab-4c0e-bccd-6abca5076c66view_item_page/productshtl-0hotelMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...2025-11-14T13:27:25.889ZNaNPremium Room264.02.0NaNNaN
35013fc334-4045-4d5a-8739-dd0a8766a63bpage_view/productsNonehotelMozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...2025-11-14T13:53:59.993ZNaNNaNNaNNaNNaN
36013fc334-4045-4d5a-8739-dd0a8766a63bview_item_page/productshtl-0hotelMozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...2025-11-14T13:54:10.705ZNaNPremium Room223.03.0NaNNaN
37013fc334-4045-4d5a-8739-dd0a8766a63bhover_over_title/productshtl-0hotelMozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...2025-11-14T13:54:11.771ZNaNNaN416.0397.0NaNNaNNaNNaNNaNNaNGrand Plaza Hotel1200.0
38013fc334-4045-4d5a-8739-dd0a8766a63bview_item_page/productshtl-1hotelMozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...2025-11-14T13:54:29.772ZNaNStandard Room267.05.0NaNNaN
39013fc334-4045-4d5a-8739-dd0a8766a63bhover_over_title/productshtl-1hotelMozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...2025-11-14T13:54:30.833ZNaNNaNNaNNaNSeaside Resort1200.0
\n", - "
" - ], - "text/plain": [ - " sessionId eventName page \\\n", - "0 d176d7c9-4027-4702-9e31-2a71395cdda0 page_view /products \n", - "1 f0317a5d-e424-44e9-b784-c8f7291ffe31 page_view / \n", - "2 f0317a5d-e424-44e9-b784-c8f7291ffe31 page_view /products \n", - "3 f0317a5d-e424-44e9-b784-c8f7291ffe31 view_item_page /products \n", - "4 238dc588-a7ab-4c0e-bccd-6abca5076c66 page_view /products \n", - "5 238dc588-a7ab-4c0e-bccd-6abca5076c66 view_item_page /products \n", - "6 238dc588-a7ab-4c0e-bccd-6abca5076c66 view_item_page /products \n", - "7 238dc588-a7ab-4c0e-bccd-6abca5076c66 hover_over_title /products \n", - "8 238dc588-a7ab-4c0e-bccd-6abca5076c66 view_item_page /products \n", - "35 013fc334-4045-4d5a-8739-dd0a8766a63b page_view /products \n", - "36 013fc334-4045-4d5a-8739-dd0a8766a63b view_item_page /products \n", - "37 013fc334-4045-4d5a-8739-dd0a8766a63b hover_over_title /products \n", - "38 013fc334-4045-4d5a-8739-dd0a8766a63b view_item_page /products \n", - "39 013fc334-4045-4d5a-8739-dd0a8766a63b hover_over_title /products \n", - "\n", - " productId storeMode userAgent \\\n", - "0 None hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", - "1 None hotel Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck... \n", - "2 None hotel Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck... \n", - "3 htl-0 hotel Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck... \n", - "4 None hotel Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7... \n", - "5 htl-0 hotel Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7... \n", - "6 htl-0 hotel Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7... \n", - "7 htl-0 hotel Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7... \n", - "8 htl-0 hotel Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7... \n", - "35 None hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", - "36 htl-0 hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", - "37 htl-0 hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", - "38 htl-1 hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", - "39 htl-1 hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n", - "\n", - " ts metadata_referrer metadata_roomType \\\n", - "0 2025-11-14T13:23:46.270Z NaN \n", - "1 2025-11-14T13:26:00.291Z NaN \n", - "2 2025-11-14T13:26:07.769Z NaN \n", - "3 2025-11-14T13:26:15.010Z NaN Premium Room \n", - "4 2025-11-14T13:27:15.457Z NaN \n", - "5 2025-11-14T13:27:15.591Z NaN Premium Room \n", - "6 2025-11-14T13:27:21.483Z NaN Premium Room \n", - "7 2025-11-14T13:27:22.646Z NaN NaN \n", - "8 2025-11-14T13:27:25.889Z NaN Premium Room \n", - "35 2025-11-14T13:53:59.993Z NaN \n", - "36 2025-11-14T13:54:10.705Z NaN Premium Room \n", - "37 2025-11-14T13:54:11.771Z NaN NaN \n", - "38 2025-11-14T13:54:29.772Z NaN Standard Room \n", - "39 2025-11-14T13:54:30.833Z NaN NaN \n", - "\n", - " metadata_price metadata_nights metadata_elementText metadata_dwellTime \n", - "0 NaN NaN NaN NaN \n", - "1 NaN NaN NaN NaN \n", - "2 NaN NaN NaN NaN \n", - "3 269.0 1.0 NaN NaN \n", - "4 NaN NaN NaN NaN \n", - "5 264.0 2.0 NaN NaN \n", - "6 264.0 2.0 NaN NaN \n", - "7 NaN NaN Grand Plaza Hotel 1200.0 \n", - "8 264.0 2.0 NaN NaN \n", - "35 NaN NaN NaN NaN \n", - "36 223.0 3.0 NaN NaN \n", - "37 NaN NaN Grand Plaza Hotel 1200.0 \n", - "38 267.0 5.0 NaN NaN \n", - "39 NaN NaN Seaside Resort 1200.0 " - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.groupby('sessionId').head()" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "380eca5f-8304-4fb2-be32-e8bcfd312085", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['013fc334-4045-4d5a-8739-dd0a8766a63b',\n", - " '238dc588-a7ab-4c0e-bccd-6abca5076c66',\n", - " 'd176d7c9-4027-4702-9e31-2a71395cdda0',\n", - " 'f0317a5d-e424-44e9-b784-c8f7291ffe31']" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sessions = list(set(df['sessionId'])); sessions # 238dc588-a7ab-4c0e-bccd-6abca5076c66" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "f4ae6f81-dcb8-44be-aee7-30dbc3a6bae1", - "metadata": {}, - "outputs": [], - "source": [ - "# map sessions to experiments" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "050d90a4-20a9-47f5-b998-c31178a54cb3", - "metadata": {}, - "outputs": [], - "source": [ - "def build_transition_prob_matrix(df: pd.DataFrame):\n", - " df = df.dropna(subset=['eventName'])\n", - " events = df['eventName'].tolist()\n", - " labels = pd.Index(events).unique().tolist()\n", - " idx = {e:i for i,e in enumerate(labels)}\n", - " M = np.zeros((len(labels), len(labels)), dtype=float)\n", - " for a, b in zip(events, events[1:]):\n", - " M[idx[a], idx[b]] += 1\n", - " row_sums = M.sum(axis=1, keepdims=True)\n", - " with np.errstate(divide='ignore', invalid='ignore'):\n", - " P = np.divide(M, row_sums, where=row_sums>0) # row-normalized\n", - " return P, labels" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "e68f9004-82f5-4826-aece-e3dc6e15a18f", - "metadata": {}, - "outputs": [], - "source": [ - "# https://medium.com/data-science/time-series-data-markov-transition-matrices-7060771e362b\n", - "from graphviz import Digraph\n", - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "def _as_prob_df(matrix, labels=None):\n", - " \"\"\"Return a square DataFrame with index=columns=labels.\"\"\"\n", - " if isinstance(matrix, pd.DataFrame):\n", - " # Ensure square and aligned\n", - " assert (matrix.index == matrix.columns).all(), \"Index/columns must match.\"\n", - " return matrix\n", - " matrix = np.asarray(matrix, dtype=float)\n", - " assert matrix.shape[0] == matrix.shape[1], \"Matrix must be square.\"\n", - " if labels is None:\n", - " raise ValueError(\"labels are required when matrix is not a DataFrame\")\n", - " assert len(labels) == matrix.shape[0], \"labels length must match matrix size.\"\n", - " return pd.DataFrame(matrix, index=list(labels), columns=list(labels))\n", - "\n", - "def _df_to_edgelist(P: pd.DataFrame, threshold=0.0, round_digits=2):\n", - " \"\"\"Build weighted edges > threshold.\"\"\"\n", - " edges = []\n", - " for src in P.index:\n", - " for dst in P.columns:\n", - " w = float(P.loc[src, dst])\n", - " if w > threshold:\n", - " edges.append((str(src), str(dst), f\"{w:.{round_digits}f}\"))\n", - " return edges\n", - "\n", - "def render_graph(fname, matrix, ls_index=None, threshold=0.0, fmt=\"svg\", view=False):\n", - " \"\"\"\n", - " fname: output file stem (no extension)\n", - " matrix: NumPy array or pandas DataFrame of transition PROBABILITIES\n", - " ls_index: ordered labels (required if matrix is not a DataFrame)\n", - " threshold: hide edges with weight <= threshold\n", - " fmt: 'svg'|'png'|'pdf' etc.\n", - " view: open after rendering\n", - " \"\"\"\n", - " P = _as_prob_df(matrix, labels=ls_index)\n", - " edges = _df_to_edgelist(P, threshold=threshold)\n", - "\n", - " g = Digraph(format=fmt)\n", - " g.attr(rankdir=\"LR\", size=\"30\")\n", - " g.attr(\"node\", shape=\"circle\")\n", - "\n", - " # ensure isolated nodes appear\n", - " for node in P.index:\n", - " g.node(str(node), width=\"1\", height=\"1\")\n", - "\n", - " for src, dst, label in edges:\n", - " g.edge(src, dst, label=label)\n", - "\n", - " g.render(fname, view=view, cleanup=True)\n", - " return g\n" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "e255a2c1-6454-4e5e-89f6-ef8ac51ab6cc", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "013fc334-4045-4d5a-8739-dd0a8766a63b\n" - ] - }, - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "page_view\n", - "\n", - "page_view\n", - "\n", - "\n", - "\n", - "view_item_page\n", - "\n", - "view_item_page\n", - "\n", - "\n", - "\n", - "page_view->view_item_page\n", - "\n", - "\n", - "1.00\n", - "\n", - "\n", - "\n", - "view_item_page->view_item_page\n", - "\n", - "\n", - "0.68\n", - "\n", - "\n", - "\n", - "hover_over_title\n", - "\n", - "hover_over_title\n", - "\n", - "\n", - "\n", - "view_item_page->hover_over_title\n", - "\n", - "\n", - "0.29\n", - "\n", - "\n", - "\n", - "hover_over_paragraph\n", - "\n", - "hover_over_paragraph\n", - "\n", - "\n", - "\n", - "view_item_page->hover_over_paragraph\n", - "\n", - "\n", - "0.04\n", - "\n", - "\n", - "\n", - "hover_over_title->view_item_page\n", - "\n", - "\n", - "1.00\n", - "\n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[]\n" - ] - }, - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[0.00000000e+000 1.00000000e+000 0.00000000e+000 0.00000000e+000]\n", - " [0.00000000e+000 6.78571429e-001 2.85714286e-001 3.57142857e-002]\n", - " [0.00000000e+000 1.00000000e+000 0.00000000e+000 0.00000000e+000]\n", - " [2.05833592e-312 2.29175545e-312 4.94065646e-324 6.92110218e-310]]\n", - "238dc588-a7ab-4c0e-bccd-6abca5076c66\n" - ] - }, - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "page_view\n", - "\n", - "page_view\n", - "\n", - "\n", - "\n", - "view_item_page\n", - "\n", - "view_item_page\n", - "\n", - "\n", - "\n", - "page_view->view_item_page\n", - "\n", - "\n", - "1.00\n", - "\n", - "\n", - "\n", - "view_item_page->view_item_page\n", - "\n", - "\n", - "0.19\n", - "\n", - "\n", - "\n", - "hover_over_title\n", - "\n", - "hover_over_title\n", - "\n", - "\n", - "\n", - "view_item_page->hover_over_title\n", - "\n", - "\n", - "0.38\n", - "\n", - "\n", - "\n", - "hover_over_paragraph\n", - "\n", - "hover_over_paragraph\n", - "\n", - "\n", - "\n", - "view_item_page->hover_over_paragraph\n", - "\n", - "\n", - "0.44\n", - "\n", - "\n", - "\n", - "hover_over_title->view_item_page\n", - "\n", - "\n", - "1.00\n", - "\n", - "\n", - "\n", - "hover_over_paragraph->page_view\n", - "\n", - "\n", - "0.14\n", - "\n", - "\n", - "\n", - "hover_over_paragraph->view_item_page\n", - "\n", - "\n", - "0.86\n", - "\n", - "\n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[0. 1. 0. 0. ]\n", - " [0. 0.1875 0.375 0.4375 ]\n", - " [0. 1. 0. 0. ]\n", - " [0.14285714 0.85714286 0. 0. ]]\n", - "d176d7c9-4027-4702-9e31-2a71395cdda0\n" - ] - }, - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "page_view\n", - "\n", - "page_view\n", - "\n", - "\n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[0.]]\n", - "f0317a5d-e424-44e9-b784-c8f7291ffe31\n" - ] - }, - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "page_view\n", - "\n", - "page_view\n", - "\n", - "\n", - "\n", - "page_view->page_view\n", - "\n", - "\n", - "0.50\n", - "\n", - "\n", - "\n", - "view_item_page\n", - "\n", - "view_item_page\n", - "\n", - "\n", - "\n", - "page_view->view_item_page\n", - "\n", - "\n", - "0.50\n", - "\n", - "\n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[5.0e-001 5.0e-001]\n", - " [9.9e-324 1.5e-323]]\n" - ] - } - ], - "source": [ - "def explore_session(session_id: str):\n", - " subset = df[df['sessionId'] == session_id]\n", - " print(session_id)\n", - " P, labels = build_transition_prob_matrix(subset)\n", - " g = render_graph(f\"session_{session_id}\", P, ls_index=labels, threshold=0.01, fmt=\"svg\", view=False)\n", - " display(g)\n", - " return P\n", - "for session in sessions:\n", - " print(explore_session(session))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python (PHANTOM)", - "language": "python", - "name": "phantom" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.7" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/web/src/components/feats/airline/AirlineCard.tsx b/web/src/components/feats/airline/AirlineCard.tsx index b08827d..5bc4332 100644 --- a/web/src/components/feats/airline/AirlineCard.tsx +++ b/web/src/components/feats/airline/AirlineCard.tsx @@ -24,19 +24,20 @@ interface Flight { fareRule: FareRule; refundable: boolean; basePrice: number; + dateIndex?: number; } export default function AirlineCard({ flight }: { flight: Flight }) { const durationRef = useHoverTracking({ eventName: 'hover_over_title', productId: flight.id, - metadata: { elementText: flight.duration }, + metadata: { elementText: flight.duration, dateIndex: flight.dateIndex }, }); const priceRef = useHoverTracking({ eventName: 'hover_over_paragraph', productId: flight.id, - metadata: { elementText: 'price' }, + metadata: { elementText: 'price', dateIndex: flight.dateIndex }, }); const handleCardClick = () => { @@ -44,6 +45,7 @@ export default function AirlineCard({ flight }: { flight: Flight }) { cabinClass: flight.cabinClass, fareRule: flight.fareRule, price: flight.basePrice, + dateIndex: flight.dateIndex, }); }; diff --git a/web/src/components/feats/hotel/HotelCard.tsx b/web/src/components/feats/hotel/HotelCard.tsx index 8c68801..c75c9f6 100644 --- a/web/src/components/feats/hotel/HotelCard.tsx +++ b/web/src/components/feats/hotel/HotelCard.tsx @@ -39,13 +39,13 @@ export default function HotelCard({ hotel }: { hotel: Hotel }) { const titleRef = useHoverTracking({ eventName: 'hover_over_title', productId: hotel.id, - metadata: { elementText: hotel.name }, + metadata: { elementText: hotel.name, dateIndex: hotel.dateIndex }, }); const priceRef = useHoverTracking({ eventName: 'hover_over_paragraph', productId: hotel.id, - metadata: { elementText: 'price' }, + metadata: { elementText: 'price', dateIndex: hotel.dateIndex }, }); const handleCardClick = () => { @@ -53,6 +53,7 @@ export default function HotelCard({ hotel }: { hotel: Hotel }) { roomType: hotel.roomType, price: hotel.pricePerNight, nights: hotel.nights, + dateIndex: hotel.dateIndex, }); }; diff --git a/web/src/lib/hotel-utils.ts b/web/src/lib/hotel-utils.ts index 2771412..e660f26 100644 --- a/web/src/lib/hotel-utils.ts +++ b/web/src/lib/hotel-utils.ts @@ -19,6 +19,7 @@ export interface Hotel { roomType: string; checkIn: string; checkOut: string; + dateIndex: number; amenities: string[]; refundable: boolean; pricePerNight: number; @@ -39,6 +40,7 @@ export const transformProduct = (p: HotelProduct): Hotel => { roomType: room_type, checkIn: checkIn.toLocaleDateString('en-US', { month: 'short', day: 'numeric' }), checkOut: checkOut.toLocaleDateString('en-US', { month: 'short', day: 'numeric' }), + dateIndex: date_index, amenities: metadata?.amenities || [], refundable: metadata?.refundable || false, pricePerNight: metadata?.base_price || 100,