Files
PHANTOM/experiments/data_export.ipynb
2025-11-03 18:53:17 +01:00

722 lines
29 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 98,
"id": "62eafcd9-5462-4063-8873-0e7fb9add907",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 98,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from kafka import KafkaConsumer\n",
"import pandas as pd\n",
"import json\n",
"import numpy as np\n",
"import os\n",
"from dotenv import load_dotenv\n",
"import matplotlib.pyplot as plt\n",
"from IPython.display import display, SVG, Image\n",
"load_dotenv()"
]
},
{
"cell_type": "code",
"execution_count": 86,
"id": "4af65cb4-e8cf-4877-b2db-13ac19f3838f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 141 entries, 0 to 140\n",
"Data columns (total 10 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 sessionId 141 non-null object \n",
" 1 eventType 141 non-null object \n",
" 2 ts 141 non-null int64 \n",
" 3 targetEl 14 non-null object \n",
" 4 targetUrl 1 non-null object \n",
" 5 metadata_path 141 non-null object \n",
" 6 metadata_referrer 6 non-null object \n",
" 7 metadata_x 14 non-null float64\n",
" 8 metadata_y 14 non-null float64\n",
" 9 metadata_scrollY 121 non-null float64\n",
"dtypes: float64(3), int64(1), object(6)\n",
"memory usage: 11.1+ KB\n"
]
}
],
"source": [
"KAFKA_PORT=os.getenv(\"KAFKA_PORT\", 9092)\n",
"topic = \"user-interactions\"\n",
"consumer = KafkaConsumer(\n",
" topic, \n",
" enable_auto_commit=True,\n",
" value_deserializer=lambda x: json.loads(x.decode('utf-8')),\n",
" auto_offset_reset='earliest',\n",
" bootstrap_servers=['localhost:9092'])\n",
"messages=consumer.poll(timeout_ms=1000,max_records=10000)\n",
"df = []\n",
"for m in messages.values():\n",
" for i in m:\n",
" df.append(i.value)\n",
"df = pd.DataFrame(df)\n",
"# explode metadata col json\n",
"df = df.join(pd.json_normalize(df.pop(\"metadata\"), sep=\".\").add_prefix(\"metadata_\"))\n",
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 87,
"id": "f6819a1c-32ab-49c7-845b-5df7bf60f561",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sessionId</th>\n",
" <th>eventType</th>\n",
" <th>ts</th>\n",
" <th>targetEl</th>\n",
" <th>targetUrl</th>\n",
" <th>metadata_path</th>\n",
" <th>metadata_referrer</th>\n",
" <th>metadata_x</th>\n",
" <th>metadata_y</th>\n",
" <th>metadata_scrollY</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1761225843899-qaiwwwyj2o</td>\n",
" <td>pageview</td>\n",
" <td>1761226211163</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1761225843899-qaiwwwyj2o</td>\n",
" <td>click</td>\n",
" <td>1761226218090</td>\n",
" <td>MAIN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>815.0</td>\n",
" <td>331.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1761225843899-qaiwwwyj2o</td>\n",
" <td>click</td>\n",
" <td>1761226220890</td>\n",
" <td>MAIN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>1129.0</td>\n",
" <td>605.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1761225843899-qaiwwwyj2o</td>\n",
" <td>click</td>\n",
" <td>1761226225801</td>\n",
" <td>DIV</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>532.0</td>\n",
" <td>545.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1761225843899-qaiwwwyj2o</td>\n",
" <td>click</td>\n",
" <td>1761226229364</td>\n",
" <td>DIV</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>481.0</td>\n",
" <td>399.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>1761227236286-e7mphcvw6t</td>\n",
" <td>pageview</td>\n",
" <td>1761227236426</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>1761227236286-e7mphcvw6t</td>\n",
" <td>click</td>\n",
" <td>1761227239328</td>\n",
" <td>DIV</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>202.0</td>\n",
" <td>351.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>1761227236286-e7mphcvw6t</td>\n",
" <td>click</td>\n",
" <td>1761227244783</td>\n",
" <td>A</td>\n",
" <td>https://vercel.com/new?utm_source=create-next-...</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>377.0</td>\n",
" <td>723.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>1761828056433-0gz7aboz86h</td>\n",
" <td>pageview</td>\n",
" <td>1761828261783</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>1761828056433-0gz7aboz86h</td>\n",
" <td>click</td>\n",
" <td>1761828266484</td>\n",
" <td>H1</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>527.0</td>\n",
" <td>169.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>1761828056433-0gz7aboz86h</td>\n",
" <td>scroll</td>\n",
" <td>1761828270314</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>51.666668</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>1761828056433-0gz7aboz86h</td>\n",
" <td>scroll</td>\n",
" <td>1761828270328</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>50.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>1761828056433-0gz7aboz86h</td>\n",
" <td>scroll</td>\n",
" <td>1761828270336</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>49.166668</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sessionId eventType ts targetEl \\\n",
"0 1761225843899-qaiwwwyj2o pageview 1761226211163 NaN \n",
"1 1761225843899-qaiwwwyj2o click 1761226218090 MAIN \n",
"2 1761225843899-qaiwwwyj2o click 1761226220890 MAIN \n",
"3 1761225843899-qaiwwwyj2o click 1761226225801 DIV \n",
"4 1761225843899-qaiwwwyj2o click 1761226229364 DIV \n",
"5 1761227236286-e7mphcvw6t pageview 1761227236426 NaN \n",
"6 1761227236286-e7mphcvw6t click 1761227239328 DIV \n",
"7 1761227236286-e7mphcvw6t click 1761227244783 A \n",
"8 1761828056433-0gz7aboz86h pageview 1761828261783 NaN \n",
"9 1761828056433-0gz7aboz86h click 1761828266484 H1 \n",
"10 1761828056433-0gz7aboz86h scroll 1761828270314 NaN \n",
"11 1761828056433-0gz7aboz86h scroll 1761828270328 NaN \n",
"12 1761828056433-0gz7aboz86h scroll 1761828270336 NaN \n",
"\n",
" targetUrl metadata_path \\\n",
"0 NaN / \n",
"1 NaN / \n",
"2 NaN / \n",
"3 NaN / \n",
"4 NaN / \n",
"5 NaN / \n",
"6 NaN / \n",
"7 https://vercel.com/new?utm_source=create-next-... / \n",
"8 NaN / \n",
"9 NaN / \n",
"10 NaN / \n",
"11 NaN / \n",
"12 NaN / \n",
"\n",
" metadata_referrer metadata_x metadata_y metadata_scrollY \n",
"0 NaN NaN NaN \n",
"1 NaN 815.0 331.0 NaN \n",
"2 NaN 1129.0 605.0 NaN \n",
"3 NaN 532.0 545.0 NaN \n",
"4 NaN 481.0 399.0 NaN \n",
"5 NaN NaN NaN \n",
"6 NaN 202.0 351.0 NaN \n",
"7 NaN 377.0 723.0 NaN \n",
"8 NaN NaN NaN \n",
"9 NaN 527.0 169.0 NaN \n",
"10 NaN NaN NaN 51.666668 \n",
"11 NaN NaN NaN 50.000000 \n",
"12 NaN NaN NaN 49.166668 "
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby('sessionId').head()"
]
},
{
"cell_type": "code",
"execution_count": 88,
"id": "380eca5f-8304-4fb2-be32-e8bcfd312085",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['1761225843899-qaiwwwyj2o',\n",
" '1761828056433-0gz7aboz86h',\n",
" '1761227236286-e7mphcvw6t']"
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sessions = list(set(df['sessionId'])); sessions"
]
},
{
"cell_type": "code",
"execution_count": 89,
"id": "f4ae6f81-dcb8-44be-aee7-30dbc3a6bae1",
"metadata": {},
"outputs": [],
"source": [
"# map sessions to experiments"
]
},
{
"cell_type": "code",
"execution_count": 101,
"id": "050d90a4-20a9-47f5-b998-c31178a54cb3",
"metadata": {},
"outputs": [],
"source": [
"def build_transition_prob_matrix(df: pd.DataFrame):\n",
" df = df.dropna(subset=['eventType'])\n",
" events = df['eventType'].tolist()\n",
" labels = pd.Index(events).unique().tolist()\n",
" idx = {e:i for i,e in enumerate(labels)}\n",
" M = np.zeros((len(labels), len(labels)), dtype=float)\n",
" for a, b in zip(events, events[1:]):\n",
" M[idx[a], idx[b]] += 1\n",
" row_sums = M.sum(axis=1, keepdims=True)\n",
" with np.errstate(divide='ignore', invalid='ignore'):\n",
" P = np.divide(M, row_sums, where=row_sums>0) # row-normalized\n",
" return P, labels"
]
},
{
"cell_type": "code",
"execution_count": 107,
"id": "e68f9004-82f5-4826-aece-e3dc6e15a18f",
"metadata": {},
"outputs": [],
"source": [
"# https://medium.com/data-science/time-series-data-markov-transition-matrices-7060771e362b\n",
"from graphviz import Digraph\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"def _as_prob_df(matrix, labels=None):\n",
" \"\"\"Return a square DataFrame with index=columns=labels.\"\"\"\n",
" if isinstance(matrix, pd.DataFrame):\n",
" # Ensure square and aligned\n",
" assert (matrix.index == matrix.columns).all(), \"Index/columns must match.\"\n",
" return matrix\n",
" matrix = np.asarray(matrix, dtype=float)\n",
" assert matrix.shape[0] == matrix.shape[1], \"Matrix must be square.\"\n",
" if labels is None:\n",
" raise ValueError(\"labels are required when matrix is not a DataFrame\")\n",
" assert len(labels) == matrix.shape[0], \"labels length must match matrix size.\"\n",
" return pd.DataFrame(matrix, index=list(labels), columns=list(labels))\n",
"\n",
"def _df_to_edgelist(P: pd.DataFrame, threshold=0.0, round_digits=2):\n",
" \"\"\"Build weighted edges > threshold.\"\"\"\n",
" edges = []\n",
" for src in P.index:\n",
" for dst in P.columns:\n",
" w = float(P.loc[src, dst])\n",
" if w > threshold:\n",
" edges.append((str(src), str(dst), f\"{w:.{round_digits}f}\"))\n",
" return edges\n",
"\n",
"def render_graph(fname, matrix, ls_index=None, threshold=0.0, fmt=\"svg\", view=False):\n",
" \"\"\"\n",
" fname: output file stem (no extension)\n",
" matrix: NumPy array or pandas DataFrame of transition PROBABILITIES\n",
" ls_index: ordered labels (required if matrix is not a DataFrame)\n",
" threshold: hide edges with weight <= threshold\n",
" fmt: 'svg'|'png'|'pdf' etc.\n",
" view: open after rendering\n",
" \"\"\"\n",
" P = _as_prob_df(matrix, labels=ls_index)\n",
" edges = _df_to_edgelist(P, threshold=threshold)\n",
"\n",
" g = Digraph(format=fmt)\n",
" g.attr(rankdir=\"LR\", size=\"30\")\n",
" g.attr(\"node\", shape=\"circle\")\n",
"\n",
" # ensure isolated nodes appear\n",
" for node in P.index:\n",
" g.node(str(node), width=\"1\", height=\"1\")\n",
"\n",
" for src, dst, label in edges:\n",
" g.edge(src, dst, label=label)\n",
"\n",
" g.render(fname, view=view, cleanup=True)\n",
" return g\n"
]
},
{
"cell_type": "code",
"execution_count": 108,
"id": "e255a2c1-6454-4e5e-89f6-ef8ac51ab6cc",
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 13.1.2 (0)\n",
" -->\n",
"<!-- Pages: 1 -->\n",
"<svg width=\"228pt\" height=\"124pt\"\n",
" viewBox=\"0.00 0.00 228.00 124.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 119.83)\">\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-119.83 223.66,-119.83 223.66,4 -4,4\"/>\n",
"<!-- pageview -->\n",
"<g id=\"node1\" class=\"node\">\n",
"<title>pageview</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"44.58\" cy=\"-44.58\" rx=\"44.58\" ry=\"44.58\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"44.58\" y=\"-39.91\" font-family=\"Times,serif\" font-size=\"14.00\">pageview</text>\n",
"</g>\n",
"<!-- click -->\n",
"<g id=\"node2\" class=\"node\">\n",
"<title>click</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"183.66\" cy=\"-44.58\" rx=\"36\" ry=\"36\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"183.66\" y=\"-39.91\" font-family=\"Times,serif\" font-size=\"14.00\">click</text>\n",
"</g>\n",
"<!-- pageview&#45;&gt;click -->\n",
"<g id=\"edge1\" class=\"edge\">\n",
"<title>pageview&#45;&gt;click</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M89.33,-44.58C104.32,-44.58 121.13,-44.58 136.31,-44.58\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"136.04,-48.08 146.04,-44.58 136.04,-41.08 136.04,-48.08\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"118.41\" y=\"-48.53\" font-family=\"Times,serif\" font-size=\"14.00\">1.0</text>\n",
"</g>\n",
"<!-- click&#45;&gt;click -->\n",
"<g id=\"edge2\" class=\"edge\">\n",
"<title>click&#45;&gt;click</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M171.43,-78.86C171.56,-89.86 175.63,-98.58 183.66,-98.58 188.68,-98.58 192.16,-95.17 194.09,-89.93\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"197.49,-90.78 195.65,-80.35 190.58,-89.66 197.49,-90.78\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"183.66\" y=\"-102.53\" font-family=\"Times,serif\" font-size=\"14.00\">1.0</text>\n",
"</g>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.graphs.Digraph at 0x7fd404165c70>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0. 1.]\n",
" [0. 1.]]\n"
]
},
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 13.1.2 (0)\n",
" -->\n",
"<!-- Pages: 1 -->\n",
"<svg width=\"358pt\" height=\"132pt\"\n",
" viewBox=\"0.00 0.00 358.00 132.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 128.41)\">\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-128.41 354.16,-128.41 354.16,4 -4,4\"/>\n",
"<!-- pageview -->\n",
"<g id=\"node1\" class=\"node\">\n",
"<title>pageview</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"44.58\" cy=\"-44.58\" rx=\"44.58\" ry=\"44.58\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"44.58\" y=\"-39.91\" font-family=\"Times,serif\" font-size=\"14.00\">pageview</text>\n",
"</g>\n",
"<!-- pageview&#45;&gt;pageview -->\n",
"<g id=\"edge1\" class=\"edge\">\n",
"<title>pageview&#45;&gt;pageview</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M30.86,-87.29C31.64,-98.6 36.22,-107.16 44.58,-107.16 49.94,-107.16 53.74,-103.65 55.99,-98.15\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"59.33,-99.28 57.99,-88.77 52.48,-97.82 59.33,-99.28\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"44.58\" y=\"-111.11\" font-family=\"Times,serif\" font-size=\"14.00\">0.2</text>\n",
"</g>\n",
"<!-- click -->\n",
"<g id=\"node2\" class=\"node\">\n",
"<title>click</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"183.66\" cy=\"-44.58\" rx=\"36\" ry=\"36\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"183.66\" y=\"-39.91\" font-family=\"Times,serif\" font-size=\"14.00\">click</text>\n",
"</g>\n",
"<!-- pageview&#45;&gt;click -->\n",
"<g id=\"edge2\" class=\"edge\">\n",
"<title>pageview&#45;&gt;click</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M89.33,-44.58C104.32,-44.58 121.13,-44.58 136.31,-44.58\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"136.04,-48.08 146.04,-44.58 136.04,-41.08 136.04,-48.08\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"118.41\" y=\"-48.53\" font-family=\"Times,serif\" font-size=\"14.00\">0.8</text>\n",
"</g>\n",
"<!-- click&#45;&gt;pageview -->\n",
"<g id=\"edge3\" class=\"edge\">\n",
"<title>click&#45;&gt;pageview</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M150.74,-29.52C143.93,-26.96 136.67,-24.68 129.66,-23.33 119.02,-21.28 107.71,-22.06 96.96,-24.24\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"96.33,-20.79 87.47,-26.6 98.02,-27.59 96.33,-20.79\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"118.41\" y=\"-27.28\" font-family=\"Times,serif\" font-size=\"14.00\">0.3</text>\n",
"</g>\n",
"<!-- click&#45;&gt;click -->\n",
"<g id=\"edge4\" class=\"edge\">\n",
"<title>click&#45;&gt;click</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M171.43,-78.86C171.56,-89.86 175.63,-98.58 183.66,-98.58 188.68,-98.58 192.16,-95.17 194.09,-89.93\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"197.49,-90.78 195.65,-80.35 190.58,-89.66 197.49,-90.78\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"183.66\" y=\"-102.53\" font-family=\"Times,serif\" font-size=\"14.00\">0.6</text>\n",
"</g>\n",
"<!-- scroll -->\n",
"<g id=\"node3\" class=\"node\">\n",
"<title>scroll</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"314.16\" cy=\"-44.58\" rx=\"36\" ry=\"36\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"314.16\" y=\"-39.91\" font-family=\"Times,serif\" font-size=\"14.00\">scroll</text>\n",
"</g>\n",
"<!-- click&#45;&gt;scroll -->\n",
"<g id=\"edge5\" class=\"edge\">\n",
"<title>click&#45;&gt;scroll</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M220.12,-44.58C234.44,-44.58 251.18,-44.58 266.47,-44.58\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"266.31,-48.08 276.31,-44.58 266.31,-41.08 266.31,-48.08\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"248.91\" y=\"-48.53\" font-family=\"Times,serif\" font-size=\"14.00\">0.1</text>\n",
"</g>\n",
"<!-- scroll&#45;&gt;scroll -->\n",
"<g id=\"edge6\" class=\"edge\">\n",
"<title>scroll&#45;&gt;scroll</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M301.93,-78.86C302.06,-89.86 306.13,-98.58 314.16,-98.58 319.18,-98.58 322.66,-95.17 324.59,-89.93\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"327.99,-90.78 326.15,-80.35 321.08,-89.66 327.99,-90.78\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"314.16\" y=\"-102.53\" font-family=\"Times,serif\" font-size=\"14.00\">1.0</text>\n",
"</g>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.graphs.Digraph at 0x7fd406e21a90>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0.25 0.75 0. ]\n",
" [0.28571429 0.57142857 0.14285714]\n",
" [0. 0.00826446 0.99173554]]\n"
]
},
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 13.1.2 (0)\n",
" -->\n",
"<!-- Pages: 1 -->\n",
"<svg width=\"228pt\" height=\"124pt\"\n",
" viewBox=\"0.00 0.00 228.00 124.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 119.83)\">\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-119.83 223.66,-119.83 223.66,4 -4,4\"/>\n",
"<!-- pageview -->\n",
"<g id=\"node1\" class=\"node\">\n",
"<title>pageview</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"44.58\" cy=\"-44.58\" rx=\"44.58\" ry=\"44.58\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"44.58\" y=\"-39.91\" font-family=\"Times,serif\" font-size=\"14.00\">pageview</text>\n",
"</g>\n",
"<!-- click -->\n",
"<g id=\"node2\" class=\"node\">\n",
"<title>click</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"183.66\" cy=\"-44.58\" rx=\"36\" ry=\"36\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"183.66\" y=\"-39.91\" font-family=\"Times,serif\" font-size=\"14.00\">click</text>\n",
"</g>\n",
"<!-- pageview&#45;&gt;click -->\n",
"<g id=\"edge1\" class=\"edge\">\n",
"<title>pageview&#45;&gt;click</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M89.33,-44.58C104.32,-44.58 121.13,-44.58 136.31,-44.58\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"136.04,-48.08 146.04,-44.58 136.04,-41.08 136.04,-48.08\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"118.41\" y=\"-48.53\" font-family=\"Times,serif\" font-size=\"14.00\">1.0</text>\n",
"</g>\n",
"<!-- click&#45;&gt;click -->\n",
"<g id=\"edge2\" class=\"edge\">\n",
"<title>click&#45;&gt;click</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M171.43,-78.86C171.56,-89.86 175.63,-98.58 183.66,-98.58 188.68,-98.58 192.16,-95.17 194.09,-89.93\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"197.49,-90.78 195.65,-80.35 190.58,-89.66 197.49,-90.78\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"183.66\" y=\"-102.53\" font-family=\"Times,serif\" font-size=\"14.00\">1.0</text>\n",
"</g>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.graphs.Digraph at 0x7fd4041662b0>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0. 1.]\n",
" [0. 1.]]\n"
]
}
],
"source": [
"def explore_session(session_id: str):\n",
" subset = df[df['sessionId'] == session_id] # not .where(...)\n",
" P, labels = build_transition_prob_matrix(subset)\n",
" g = render_graph(f\"session_{session_id}\", P, ls_index=labels, threshold=0.01, fmt=\"svg\", view=False)\n",
" display(g)\n",
" return P\n",
"for session in sessions:\n",
" print(explore_session(session))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4d278c2d-406e-4dc0-b219-5f7b236e852b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (PHANTOM)",
"language": "python",
"name": "phantom"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}