mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 08:33:36 +00:00
722 lines
29 KiB
Plaintext
722 lines
29 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 98,
|
|
"id": "62eafcd9-5462-4063-8873-0e7fb9add907",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"True"
|
|
]
|
|
},
|
|
"execution_count": 98,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"from kafka import KafkaConsumer\n",
|
|
"import pandas as pd\n",
|
|
"import json\n",
|
|
"import numpy as np\n",
|
|
"import os\n",
|
|
"from dotenv import load_dotenv\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"from IPython.display import display, SVG, Image\n",
|
|
"load_dotenv()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 86,
|
|
"id": "4af65cb4-e8cf-4877-b2db-13ac19f3838f",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
"RangeIndex: 141 entries, 0 to 140\n",
|
|
"Data columns (total 10 columns):\n",
|
|
" # Column Non-Null Count Dtype \n",
|
|
"--- ------ -------------- ----- \n",
|
|
" 0 sessionId 141 non-null object \n",
|
|
" 1 eventType 141 non-null object \n",
|
|
" 2 ts 141 non-null int64 \n",
|
|
" 3 targetEl 14 non-null object \n",
|
|
" 4 targetUrl 1 non-null object \n",
|
|
" 5 metadata_path 141 non-null object \n",
|
|
" 6 metadata_referrer 6 non-null object \n",
|
|
" 7 metadata_x 14 non-null float64\n",
|
|
" 8 metadata_y 14 non-null float64\n",
|
|
" 9 metadata_scrollY 121 non-null float64\n",
|
|
"dtypes: float64(3), int64(1), object(6)\n",
|
|
"memory usage: 11.1+ KB\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"KAFKA_PORT=os.getenv(\"KAFKA_PORT\", 9092)\n",
|
|
"topic = \"user-interactions\"\n",
|
|
"consumer = KafkaConsumer(\n",
|
|
" topic, \n",
|
|
" enable_auto_commit=True,\n",
|
|
" value_deserializer=lambda x: json.loads(x.decode('utf-8')),\n",
|
|
" auto_offset_reset='earliest',\n",
|
|
" bootstrap_servers=['localhost:9092'])\n",
|
|
"messages=consumer.poll(timeout_ms=1000,max_records=10000)\n",
|
|
"df = []\n",
|
|
"for m in messages.values():\n",
|
|
" for i in m:\n",
|
|
" df.append(i.value)\n",
|
|
"df = pd.DataFrame(df)\n",
|
|
"# explode metadata col json\n",
|
|
"df = df.join(pd.json_normalize(df.pop(\"metadata\"), sep=\".\").add_prefix(\"metadata_\"))\n",
|
|
"df.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 87,
|
|
"id": "f6819a1c-32ab-49c7-845b-5df7bf60f561",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>sessionId</th>\n",
|
|
" <th>eventType</th>\n",
|
|
" <th>ts</th>\n",
|
|
" <th>targetEl</th>\n",
|
|
" <th>targetUrl</th>\n",
|
|
" <th>metadata_path</th>\n",
|
|
" <th>metadata_referrer</th>\n",
|
|
" <th>metadata_x</th>\n",
|
|
" <th>metadata_y</th>\n",
|
|
" <th>metadata_scrollY</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1761225843899-qaiwwwyj2o</td>\n",
|
|
" <td>pageview</td>\n",
|
|
" <td>1761226211163</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td></td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>1761225843899-qaiwwwyj2o</td>\n",
|
|
" <td>click</td>\n",
|
|
" <td>1761226218090</td>\n",
|
|
" <td>MAIN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>815.0</td>\n",
|
|
" <td>331.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>1761225843899-qaiwwwyj2o</td>\n",
|
|
" <td>click</td>\n",
|
|
" <td>1761226220890</td>\n",
|
|
" <td>MAIN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>1129.0</td>\n",
|
|
" <td>605.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>1761225843899-qaiwwwyj2o</td>\n",
|
|
" <td>click</td>\n",
|
|
" <td>1761226225801</td>\n",
|
|
" <td>DIV</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>532.0</td>\n",
|
|
" <td>545.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>1761225843899-qaiwwwyj2o</td>\n",
|
|
" <td>click</td>\n",
|
|
" <td>1761226229364</td>\n",
|
|
" <td>DIV</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>481.0</td>\n",
|
|
" <td>399.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>1761227236286-e7mphcvw6t</td>\n",
|
|
" <td>pageview</td>\n",
|
|
" <td>1761227236426</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td></td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>1761227236286-e7mphcvw6t</td>\n",
|
|
" <td>click</td>\n",
|
|
" <td>1761227239328</td>\n",
|
|
" <td>DIV</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>202.0</td>\n",
|
|
" <td>351.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>1761227236286-e7mphcvw6t</td>\n",
|
|
" <td>click</td>\n",
|
|
" <td>1761227244783</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>https://vercel.com/new?utm_source=create-next-...</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>377.0</td>\n",
|
|
" <td>723.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8</th>\n",
|
|
" <td>1761828056433-0gz7aboz86h</td>\n",
|
|
" <td>pageview</td>\n",
|
|
" <td>1761828261783</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td></td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>9</th>\n",
|
|
" <td>1761828056433-0gz7aboz86h</td>\n",
|
|
" <td>click</td>\n",
|
|
" <td>1761828266484</td>\n",
|
|
" <td>H1</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>527.0</td>\n",
|
|
" <td>169.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>10</th>\n",
|
|
" <td>1761828056433-0gz7aboz86h</td>\n",
|
|
" <td>scroll</td>\n",
|
|
" <td>1761828270314</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>51.666668</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>11</th>\n",
|
|
" <td>1761828056433-0gz7aboz86h</td>\n",
|
|
" <td>scroll</td>\n",
|
|
" <td>1761828270328</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>50.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>12</th>\n",
|
|
" <td>1761828056433-0gz7aboz86h</td>\n",
|
|
" <td>scroll</td>\n",
|
|
" <td>1761828270336</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>49.166668</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" sessionId eventType ts targetEl \\\n",
|
|
"0 1761225843899-qaiwwwyj2o pageview 1761226211163 NaN \n",
|
|
"1 1761225843899-qaiwwwyj2o click 1761226218090 MAIN \n",
|
|
"2 1761225843899-qaiwwwyj2o click 1761226220890 MAIN \n",
|
|
"3 1761225843899-qaiwwwyj2o click 1761226225801 DIV \n",
|
|
"4 1761225843899-qaiwwwyj2o click 1761226229364 DIV \n",
|
|
"5 1761227236286-e7mphcvw6t pageview 1761227236426 NaN \n",
|
|
"6 1761227236286-e7mphcvw6t click 1761227239328 DIV \n",
|
|
"7 1761227236286-e7mphcvw6t click 1761227244783 A \n",
|
|
"8 1761828056433-0gz7aboz86h pageview 1761828261783 NaN \n",
|
|
"9 1761828056433-0gz7aboz86h click 1761828266484 H1 \n",
|
|
"10 1761828056433-0gz7aboz86h scroll 1761828270314 NaN \n",
|
|
"11 1761828056433-0gz7aboz86h scroll 1761828270328 NaN \n",
|
|
"12 1761828056433-0gz7aboz86h scroll 1761828270336 NaN \n",
|
|
"\n",
|
|
" targetUrl metadata_path \\\n",
|
|
"0 NaN / \n",
|
|
"1 NaN / \n",
|
|
"2 NaN / \n",
|
|
"3 NaN / \n",
|
|
"4 NaN / \n",
|
|
"5 NaN / \n",
|
|
"6 NaN / \n",
|
|
"7 https://vercel.com/new?utm_source=create-next-... / \n",
|
|
"8 NaN / \n",
|
|
"9 NaN / \n",
|
|
"10 NaN / \n",
|
|
"11 NaN / \n",
|
|
"12 NaN / \n",
|
|
"\n",
|
|
" metadata_referrer metadata_x metadata_y metadata_scrollY \n",
|
|
"0 NaN NaN NaN \n",
|
|
"1 NaN 815.0 331.0 NaN \n",
|
|
"2 NaN 1129.0 605.0 NaN \n",
|
|
"3 NaN 532.0 545.0 NaN \n",
|
|
"4 NaN 481.0 399.0 NaN \n",
|
|
"5 NaN NaN NaN \n",
|
|
"6 NaN 202.0 351.0 NaN \n",
|
|
"7 NaN 377.0 723.0 NaN \n",
|
|
"8 NaN NaN NaN \n",
|
|
"9 NaN 527.0 169.0 NaN \n",
|
|
"10 NaN NaN NaN 51.666668 \n",
|
|
"11 NaN NaN NaN 50.000000 \n",
|
|
"12 NaN NaN NaN 49.166668 "
|
|
]
|
|
},
|
|
"execution_count": 87,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"df.groupby('sessionId').head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 88,
|
|
"id": "380eca5f-8304-4fb2-be32-e8bcfd312085",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"['1761225843899-qaiwwwyj2o',\n",
|
|
" '1761828056433-0gz7aboz86h',\n",
|
|
" '1761227236286-e7mphcvw6t']"
|
|
]
|
|
},
|
|
"execution_count": 88,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"sessions = list(set(df['sessionId'])); sessions"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 89,
|
|
"id": "f4ae6f81-dcb8-44be-aee7-30dbc3a6bae1",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# map sessions to experiments"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 101,
|
|
"id": "050d90a4-20a9-47f5-b998-c31178a54cb3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def build_transition_prob_matrix(df: pd.DataFrame):\n",
|
|
" df = df.dropna(subset=['eventType'])\n",
|
|
" events = df['eventType'].tolist()\n",
|
|
" labels = pd.Index(events).unique().tolist()\n",
|
|
" idx = {e:i for i,e in enumerate(labels)}\n",
|
|
" M = np.zeros((len(labels), len(labels)), dtype=float)\n",
|
|
" for a, b in zip(events, events[1:]):\n",
|
|
" M[idx[a], idx[b]] += 1\n",
|
|
" row_sums = M.sum(axis=1, keepdims=True)\n",
|
|
" with np.errstate(divide='ignore', invalid='ignore'):\n",
|
|
" P = np.divide(M, row_sums, where=row_sums>0) # row-normalized\n",
|
|
" return P, labels"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 107,
|
|
"id": "e68f9004-82f5-4826-aece-e3dc6e15a18f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# https://medium.com/data-science/time-series-data-markov-transition-matrices-7060771e362b\n",
|
|
"from graphviz import Digraph\n",
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"\n",
|
|
"def _as_prob_df(matrix, labels=None):\n",
|
|
" \"\"\"Return a square DataFrame with index=columns=labels.\"\"\"\n",
|
|
" if isinstance(matrix, pd.DataFrame):\n",
|
|
" # Ensure square and aligned\n",
|
|
" assert (matrix.index == matrix.columns).all(), \"Index/columns must match.\"\n",
|
|
" return matrix\n",
|
|
" matrix = np.asarray(matrix, dtype=float)\n",
|
|
" assert matrix.shape[0] == matrix.shape[1], \"Matrix must be square.\"\n",
|
|
" if labels is None:\n",
|
|
" raise ValueError(\"labels are required when matrix is not a DataFrame\")\n",
|
|
" assert len(labels) == matrix.shape[0], \"labels length must match matrix size.\"\n",
|
|
" return pd.DataFrame(matrix, index=list(labels), columns=list(labels))\n",
|
|
"\n",
|
|
"def _df_to_edgelist(P: pd.DataFrame, threshold=0.0, round_digits=2):\n",
|
|
" \"\"\"Build weighted edges > threshold.\"\"\"\n",
|
|
" edges = []\n",
|
|
" for src in P.index:\n",
|
|
" for dst in P.columns:\n",
|
|
" w = float(P.loc[src, dst])\n",
|
|
" if w > threshold:\n",
|
|
" edges.append((str(src), str(dst), f\"{w:.{round_digits}f}\"))\n",
|
|
" return edges\n",
|
|
"\n",
|
|
"def render_graph(fname, matrix, ls_index=None, threshold=0.0, fmt=\"svg\", view=False):\n",
|
|
" \"\"\"\n",
|
|
" fname: output file stem (no extension)\n",
|
|
" matrix: NumPy array or pandas DataFrame of transition PROBABILITIES\n",
|
|
" ls_index: ordered labels (required if matrix is not a DataFrame)\n",
|
|
" threshold: hide edges with weight <= threshold\n",
|
|
" fmt: 'svg'|'png'|'pdf' etc.\n",
|
|
" view: open after rendering\n",
|
|
" \"\"\"\n",
|
|
" P = _as_prob_df(matrix, labels=ls_index)\n",
|
|
" edges = _df_to_edgelist(P, threshold=threshold)\n",
|
|
"\n",
|
|
" g = Digraph(format=fmt)\n",
|
|
" g.attr(rankdir=\"LR\", size=\"30\")\n",
|
|
" g.attr(\"node\", shape=\"circle\")\n",
|
|
"\n",
|
|
" # ensure isolated nodes appear\n",
|
|
" for node in P.index:\n",
|
|
" g.node(str(node), width=\"1\", height=\"1\")\n",
|
|
"\n",
|
|
" for src, dst, label in edges:\n",
|
|
" g.edge(src, dst, label=label)\n",
|
|
"\n",
|
|
" g.render(fname, view=view, cleanup=True)\n",
|
|
" return g\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 108,
|
|
"id": "e255a2c1-6454-4e5e-89f6-ef8ac51ab6cc",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"image/svg+xml": [
|
|
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
|
|
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
|
|
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
|
|
"<!-- Generated by graphviz version 13.1.2 (0)\n",
|
|
" -->\n",
|
|
"<!-- Pages: 1 -->\n",
|
|
"<svg width=\"228pt\" height=\"124pt\"\n",
|
|
" viewBox=\"0.00 0.00 228.00 124.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
|
|
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 119.83)\">\n",
|
|
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-119.83 223.66,-119.83 223.66,4 -4,4\"/>\n",
|
|
"<!-- pageview -->\n",
|
|
"<g id=\"node1\" class=\"node\">\n",
|
|
"<title>pageview</title>\n",
|
|
"<ellipse fill=\"none\" stroke=\"black\" cx=\"44.58\" cy=\"-44.58\" rx=\"44.58\" ry=\"44.58\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"44.58\" y=\"-39.91\" font-family=\"Times,serif\" font-size=\"14.00\">pageview</text>\n",
|
|
"</g>\n",
|
|
"<!-- click -->\n",
|
|
"<g id=\"node2\" class=\"node\">\n",
|
|
"<title>click</title>\n",
|
|
"<ellipse fill=\"none\" stroke=\"black\" cx=\"183.66\" cy=\"-44.58\" rx=\"36\" ry=\"36\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"183.66\" y=\"-39.91\" font-family=\"Times,serif\" font-size=\"14.00\">click</text>\n",
|
|
"</g>\n",
|
|
"<!-- pageview->click -->\n",
|
|
"<g id=\"edge1\" class=\"edge\">\n",
|
|
"<title>pageview->click</title>\n",
|
|
"<path fill=\"none\" stroke=\"black\" d=\"M89.33,-44.58C104.32,-44.58 121.13,-44.58 136.31,-44.58\"/>\n",
|
|
"<polygon fill=\"black\" stroke=\"black\" points=\"136.04,-48.08 146.04,-44.58 136.04,-41.08 136.04,-48.08\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"118.41\" y=\"-48.53\" font-family=\"Times,serif\" font-size=\"14.00\">1.0</text>\n",
|
|
"</g>\n",
|
|
"<!-- click->click -->\n",
|
|
"<g id=\"edge2\" class=\"edge\">\n",
|
|
"<title>click->click</title>\n",
|
|
"<path fill=\"none\" stroke=\"black\" d=\"M171.43,-78.86C171.56,-89.86 175.63,-98.58 183.66,-98.58 188.68,-98.58 192.16,-95.17 194.09,-89.93\"/>\n",
|
|
"<polygon fill=\"black\" stroke=\"black\" points=\"197.49,-90.78 195.65,-80.35 190.58,-89.66 197.49,-90.78\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"183.66\" y=\"-102.53\" font-family=\"Times,serif\" font-size=\"14.00\">1.0</text>\n",
|
|
"</g>\n",
|
|
"</g>\n",
|
|
"</svg>\n"
|
|
],
|
|
"text/plain": [
|
|
"<graphviz.graphs.Digraph at 0x7fd404165c70>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[[0. 1.]\n",
|
|
" [0. 1.]]\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"image/svg+xml": [
|
|
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
|
|
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
|
|
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
|
|
"<!-- Generated by graphviz version 13.1.2 (0)\n",
|
|
" -->\n",
|
|
"<!-- Pages: 1 -->\n",
|
|
"<svg width=\"358pt\" height=\"132pt\"\n",
|
|
" viewBox=\"0.00 0.00 358.00 132.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
|
|
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 128.41)\">\n",
|
|
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-128.41 354.16,-128.41 354.16,4 -4,4\"/>\n",
|
|
"<!-- pageview -->\n",
|
|
"<g id=\"node1\" class=\"node\">\n",
|
|
"<title>pageview</title>\n",
|
|
"<ellipse fill=\"none\" stroke=\"black\" cx=\"44.58\" cy=\"-44.58\" rx=\"44.58\" ry=\"44.58\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"44.58\" y=\"-39.91\" font-family=\"Times,serif\" font-size=\"14.00\">pageview</text>\n",
|
|
"</g>\n",
|
|
"<!-- pageview->pageview -->\n",
|
|
"<g id=\"edge1\" class=\"edge\">\n",
|
|
"<title>pageview->pageview</title>\n",
|
|
"<path fill=\"none\" stroke=\"black\" d=\"M30.86,-87.29C31.64,-98.6 36.22,-107.16 44.58,-107.16 49.94,-107.16 53.74,-103.65 55.99,-98.15\"/>\n",
|
|
"<polygon fill=\"black\" stroke=\"black\" points=\"59.33,-99.28 57.99,-88.77 52.48,-97.82 59.33,-99.28\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"44.58\" y=\"-111.11\" font-family=\"Times,serif\" font-size=\"14.00\">0.2</text>\n",
|
|
"</g>\n",
|
|
"<!-- click -->\n",
|
|
"<g id=\"node2\" class=\"node\">\n",
|
|
"<title>click</title>\n",
|
|
"<ellipse fill=\"none\" stroke=\"black\" cx=\"183.66\" cy=\"-44.58\" rx=\"36\" ry=\"36\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"183.66\" y=\"-39.91\" font-family=\"Times,serif\" font-size=\"14.00\">click</text>\n",
|
|
"</g>\n",
|
|
"<!-- pageview->click -->\n",
|
|
"<g id=\"edge2\" class=\"edge\">\n",
|
|
"<title>pageview->click</title>\n",
|
|
"<path fill=\"none\" stroke=\"black\" d=\"M89.33,-44.58C104.32,-44.58 121.13,-44.58 136.31,-44.58\"/>\n",
|
|
"<polygon fill=\"black\" stroke=\"black\" points=\"136.04,-48.08 146.04,-44.58 136.04,-41.08 136.04,-48.08\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"118.41\" y=\"-48.53\" font-family=\"Times,serif\" font-size=\"14.00\">0.8</text>\n",
|
|
"</g>\n",
|
|
"<!-- click->pageview -->\n",
|
|
"<g id=\"edge3\" class=\"edge\">\n",
|
|
"<title>click->pageview</title>\n",
|
|
"<path fill=\"none\" stroke=\"black\" d=\"M150.74,-29.52C143.93,-26.96 136.67,-24.68 129.66,-23.33 119.02,-21.28 107.71,-22.06 96.96,-24.24\"/>\n",
|
|
"<polygon fill=\"black\" stroke=\"black\" points=\"96.33,-20.79 87.47,-26.6 98.02,-27.59 96.33,-20.79\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"118.41\" y=\"-27.28\" font-family=\"Times,serif\" font-size=\"14.00\">0.3</text>\n",
|
|
"</g>\n",
|
|
"<!-- click->click -->\n",
|
|
"<g id=\"edge4\" class=\"edge\">\n",
|
|
"<title>click->click</title>\n",
|
|
"<path fill=\"none\" stroke=\"black\" d=\"M171.43,-78.86C171.56,-89.86 175.63,-98.58 183.66,-98.58 188.68,-98.58 192.16,-95.17 194.09,-89.93\"/>\n",
|
|
"<polygon fill=\"black\" stroke=\"black\" points=\"197.49,-90.78 195.65,-80.35 190.58,-89.66 197.49,-90.78\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"183.66\" y=\"-102.53\" font-family=\"Times,serif\" font-size=\"14.00\">0.6</text>\n",
|
|
"</g>\n",
|
|
"<!-- scroll -->\n",
|
|
"<g id=\"node3\" class=\"node\">\n",
|
|
"<title>scroll</title>\n",
|
|
"<ellipse fill=\"none\" stroke=\"black\" cx=\"314.16\" cy=\"-44.58\" rx=\"36\" ry=\"36\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"314.16\" y=\"-39.91\" font-family=\"Times,serif\" font-size=\"14.00\">scroll</text>\n",
|
|
"</g>\n",
|
|
"<!-- click->scroll -->\n",
|
|
"<g id=\"edge5\" class=\"edge\">\n",
|
|
"<title>click->scroll</title>\n",
|
|
"<path fill=\"none\" stroke=\"black\" d=\"M220.12,-44.58C234.44,-44.58 251.18,-44.58 266.47,-44.58\"/>\n",
|
|
"<polygon fill=\"black\" stroke=\"black\" points=\"266.31,-48.08 276.31,-44.58 266.31,-41.08 266.31,-48.08\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"248.91\" y=\"-48.53\" font-family=\"Times,serif\" font-size=\"14.00\">0.1</text>\n",
|
|
"</g>\n",
|
|
"<!-- scroll->scroll -->\n",
|
|
"<g id=\"edge6\" class=\"edge\">\n",
|
|
"<title>scroll->scroll</title>\n",
|
|
"<path fill=\"none\" stroke=\"black\" d=\"M301.93,-78.86C302.06,-89.86 306.13,-98.58 314.16,-98.58 319.18,-98.58 322.66,-95.17 324.59,-89.93\"/>\n",
|
|
"<polygon fill=\"black\" stroke=\"black\" points=\"327.99,-90.78 326.15,-80.35 321.08,-89.66 327.99,-90.78\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"314.16\" y=\"-102.53\" font-family=\"Times,serif\" font-size=\"14.00\">1.0</text>\n",
|
|
"</g>\n",
|
|
"</g>\n",
|
|
"</svg>\n"
|
|
],
|
|
"text/plain": [
|
|
"<graphviz.graphs.Digraph at 0x7fd406e21a90>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[[0.25 0.75 0. ]\n",
|
|
" [0.28571429 0.57142857 0.14285714]\n",
|
|
" [0. 0.00826446 0.99173554]]\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"image/svg+xml": [
|
|
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
|
|
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
|
|
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
|
|
"<!-- Generated by graphviz version 13.1.2 (0)\n",
|
|
" -->\n",
|
|
"<!-- Pages: 1 -->\n",
|
|
"<svg width=\"228pt\" height=\"124pt\"\n",
|
|
" viewBox=\"0.00 0.00 228.00 124.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
|
|
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 119.83)\">\n",
|
|
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-119.83 223.66,-119.83 223.66,4 -4,4\"/>\n",
|
|
"<!-- pageview -->\n",
|
|
"<g id=\"node1\" class=\"node\">\n",
|
|
"<title>pageview</title>\n",
|
|
"<ellipse fill=\"none\" stroke=\"black\" cx=\"44.58\" cy=\"-44.58\" rx=\"44.58\" ry=\"44.58\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"44.58\" y=\"-39.91\" font-family=\"Times,serif\" font-size=\"14.00\">pageview</text>\n",
|
|
"</g>\n",
|
|
"<!-- click -->\n",
|
|
"<g id=\"node2\" class=\"node\">\n",
|
|
"<title>click</title>\n",
|
|
"<ellipse fill=\"none\" stroke=\"black\" cx=\"183.66\" cy=\"-44.58\" rx=\"36\" ry=\"36\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"183.66\" y=\"-39.91\" font-family=\"Times,serif\" font-size=\"14.00\">click</text>\n",
|
|
"</g>\n",
|
|
"<!-- pageview->click -->\n",
|
|
"<g id=\"edge1\" class=\"edge\">\n",
|
|
"<title>pageview->click</title>\n",
|
|
"<path fill=\"none\" stroke=\"black\" d=\"M89.33,-44.58C104.32,-44.58 121.13,-44.58 136.31,-44.58\"/>\n",
|
|
"<polygon fill=\"black\" stroke=\"black\" points=\"136.04,-48.08 146.04,-44.58 136.04,-41.08 136.04,-48.08\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"118.41\" y=\"-48.53\" font-family=\"Times,serif\" font-size=\"14.00\">1.0</text>\n",
|
|
"</g>\n",
|
|
"<!-- click->click -->\n",
|
|
"<g id=\"edge2\" class=\"edge\">\n",
|
|
"<title>click->click</title>\n",
|
|
"<path fill=\"none\" stroke=\"black\" d=\"M171.43,-78.86C171.56,-89.86 175.63,-98.58 183.66,-98.58 188.68,-98.58 192.16,-95.17 194.09,-89.93\"/>\n",
|
|
"<polygon fill=\"black\" stroke=\"black\" points=\"197.49,-90.78 195.65,-80.35 190.58,-89.66 197.49,-90.78\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"183.66\" y=\"-102.53\" font-family=\"Times,serif\" font-size=\"14.00\">1.0</text>\n",
|
|
"</g>\n",
|
|
"</g>\n",
|
|
"</svg>\n"
|
|
],
|
|
"text/plain": [
|
|
"<graphviz.graphs.Digraph at 0x7fd4041662b0>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[[0. 1.]\n",
|
|
" [0. 1.]]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"def explore_session(session_id: str):\n",
|
|
" subset = df[df['sessionId'] == session_id] # not .where(...)\n",
|
|
" P, labels = build_transition_prob_matrix(subset)\n",
|
|
" g = render_graph(f\"session_{session_id}\", P, ls_index=labels, threshold=0.01, fmt=\"svg\", view=False)\n",
|
|
" display(g)\n",
|
|
" return P\n",
|
|
"for session in sessions:\n",
|
|
" print(explore_session(session))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4d278c2d-406e-4dc0-b219-5f7b236e852b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python (PHANTOM)",
|
|
"language": "python",
|
|
"name": "phantom"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.13.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|