Files
PHANTOM/experiments/data_export.ipynb
Daniel Alves Rösel ab8b8787a8 13 agentic behavior runner v1 (#14)
* baseline setup of agent abstract

* feat: new implementation of simple AI agent that can follow a goal and return

* refactored import structure and created full tests

* pytest setup a github workflow to run tests + more ignores

* singularity for pushing

* fixing builds of PDFs

* inital structure of docs

* init styles and docs

* basic style implementation

* 13 create outline for research paper draft (#18)

* updated outline for paper from issue

* extra paper sections and some formalization of series data

* algorithms and acknowledgements

* updated outline for paper from issue

* Refactor docker-compose services to use individual Dockerfiles (#20)

* Initial plan

* Refactor services into individual Dockerfiles

Co-authored-by: velocitatem <60182044+velocitatem@users.noreply.github.com>

* Add EXPOSE directives to all Dockerfiles with port documentation

Co-authored-by: velocitatem <60182044+velocitatem@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: velocitatem <60182044+velocitatem@users.noreply.github.com>

* 2 nextjs scaffold with store mode shop and admin session experiment wiring event emission v1 (#17)

* chore: cleaning gitignore

* formating and env documentation

* feat: context switching of hotel/airline depndent on env var via middleware

* fixed alignment and building

* wrong file

* prods

* fixed applying style

* better session cookie management

* tentative session storage with maybe using airtable

* migrated api of ingestion

* events and products apge

* fixing build

* 13 create outline for research paper draft (#18)

* updated outline for paper from issue

* extra paper sections and some formalization of series data

* algorithms and acknowledgements

* updated outline for paper from issue

* upadted text formating

* event unification

* refactor tracking to ues callbacks instead of refs

* implement a pricing display api with session passing

* moved middleware to proxy according to new changes in Nextjs

* refactoed kafka ingestion to go via backend not web-db

* Refactor docker-compose services to use individual Dockerfiles (#20)

* Initial plan

* Refactor services into individual Dockerfiles

Co-authored-by: velocitatem <60182044+velocitatem@users.noreply.github.com>

* Add EXPOSE directives to all Dockerfiles with port documentation

Co-authored-by: velocitatem <60182044+velocitatem@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: velocitatem <60182044+velocitatem@users.noreply.github.com>

* fixing small bugs and adding exepriments to tracking

* added some doc

* fixing prod

* prod kafka server logging

* topic auto create

* pytest setup a github workflow to run tests + more ignores

* getting data from agents properly

* proper pipeline to handle data and build matrices

* fixing backend dumping

* fixing agents and ignore

* fixing import for tests

---------

Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com>
2025-11-15 16:16:01 +01:00

958 lines
42 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 10,
"id": "62eafcd9-5462-4063-8873-0e7fb9add907",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from kafka import KafkaConsumer\n",
"import pandas as pd\n",
"import json\n",
"import numpy as np\n",
"import os\n",
"from dotenv import load_dotenv\n",
"import matplotlib.pyplot as plt\n",
"from IPython.display import display, SVG, Image\n",
"load_dotenv()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "4af65cb4-e8cf-4877-b2db-13ac19f3838f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 73 entries, 0 to 72\n",
"Data columns (total 13 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 sessionId 73 non-null object \n",
" 1 eventName 73 non-null object \n",
" 2 page 73 non-null object \n",
" 3 productId 67 non-null object \n",
" 4 storeMode 73 non-null object \n",
" 5 userAgent 73 non-null object \n",
" 6 ts 73 non-null object \n",
" 7 metadata_referrer 6 non-null object \n",
" 8 metadata_roomType 45 non-null object \n",
" 9 metadata_price 45 non-null float64\n",
" 10 metadata_nights 45 non-null float64\n",
" 11 metadata_elementText 22 non-null object \n",
" 12 metadata_dwellTime 22 non-null float64\n",
"dtypes: float64(3), object(10)\n",
"memory usage: 7.5+ KB\n"
]
}
],
"source": [
"KAFKA_PORT=os.getenv(\"KAFKA_PORT\", 9092)\n",
"topic = \"user-interactions\"\n",
"consumer = KafkaConsumer(\n",
" topic, \n",
" enable_auto_commit=True,\n",
" value_deserializer=lambda x: json.loads(x.decode('utf-8')),\n",
" auto_offset_reset='earliest', \n",
" bootstrap_servers=['localhost:9092'])\n",
"messages=consumer.poll(timeout_ms=1000,max_records=10000)\n",
"df = []\n",
"for m in messages.values():\n",
" for i in m:\n",
" df.append(i.value)\n",
"df = pd.DataFrame(df)\n",
"# explode metadata col json\n",
"df = df.join(pd.json_normalize(df.pop(\"metadata\"), sep=\".\").add_prefix(\"metadata_\"))\n",
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "f6819a1c-32ab-49c7-845b-5df7bf60f561",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sessionId</th>\n",
" <th>eventName</th>\n",
" <th>page</th>\n",
" <th>productId</th>\n",
" <th>storeMode</th>\n",
" <th>userAgent</th>\n",
" <th>ts</th>\n",
" <th>metadata_referrer</th>\n",
" <th>metadata_roomType</th>\n",
" <th>metadata_price</th>\n",
" <th>metadata_nights</th>\n",
" <th>metadata_elementText</th>\n",
" <th>metadata_dwellTime</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>d176d7c9-4027-4702-9e31-2a71395cdda0</td>\n",
" <td>page_view</td>\n",
" <td>/products</td>\n",
" <td>None</td>\n",
" <td>hotel</td>\n",
" <td>Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...</td>\n",
" <td>2025-11-14T13:23:46.270Z</td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>f0317a5d-e424-44e9-b784-c8f7291ffe31</td>\n",
" <td>page_view</td>\n",
" <td>/</td>\n",
" <td>None</td>\n",
" <td>hotel</td>\n",
" <td>Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck...</td>\n",
" <td>2025-11-14T13:26:00.291Z</td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>f0317a5d-e424-44e9-b784-c8f7291ffe31</td>\n",
" <td>page_view</td>\n",
" <td>/products</td>\n",
" <td>None</td>\n",
" <td>hotel</td>\n",
" <td>Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck...</td>\n",
" <td>2025-11-14T13:26:07.769Z</td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>f0317a5d-e424-44e9-b784-c8f7291ffe31</td>\n",
" <td>view_item_page</td>\n",
" <td>/products</td>\n",
" <td>htl-0</td>\n",
" <td>hotel</td>\n",
" <td>Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck...</td>\n",
" <td>2025-11-14T13:26:15.010Z</td>\n",
" <td>NaN</td>\n",
" <td>Premium Room</td>\n",
" <td>269.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>238dc588-a7ab-4c0e-bccd-6abca5076c66</td>\n",
" <td>page_view</td>\n",
" <td>/products</td>\n",
" <td>None</td>\n",
" <td>hotel</td>\n",
" <td>Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...</td>\n",
" <td>2025-11-14T13:27:15.457Z</td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>238dc588-a7ab-4c0e-bccd-6abca5076c66</td>\n",
" <td>view_item_page</td>\n",
" <td>/products</td>\n",
" <td>htl-0</td>\n",
" <td>hotel</td>\n",
" <td>Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...</td>\n",
" <td>2025-11-14T13:27:15.591Z</td>\n",
" <td>NaN</td>\n",
" <td>Premium Room</td>\n",
" <td>264.0</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>432</th>\n",
" <td>214d9fad-9b00-40c3-bd0e-7739b6acd654</td>\n",
" <td>click</td>\n",
" <td>1762448192425</td>\n",
" <td>DIV</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>1623.0</td>\n",
" <td>493.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>238dc588-a7ab-4c0e-bccd-6abca5076c66</td>\n",
" <td>view_item_page</td>\n",
" <td>/products</td>\n",
" <td>htl-0</td>\n",
" <td>hotel</td>\n",
" <td>Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...</td>\n",
" <td>2025-11-14T13:27:21.483Z</td>\n",
" <td>NaN</td>\n",
" <td>Premium Room</td>\n",
" <td>264.0</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>238dc588-a7ab-4c0e-bccd-6abca5076c66</td>\n",
" <td>hover_over_title</td>\n",
" <td>/products</td>\n",
" <td>htl-0</td>\n",
" <td>hotel</td>\n",
" <td>Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...</td>\n",
" <td>2025-11-14T13:27:22.646Z</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Grand Plaza Hotel</td>\n",
" <td>1200.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>238dc588-a7ab-4c0e-bccd-6abca5076c66</td>\n",
" <td>view_item_page</td>\n",
" <td>/products</td>\n",
" <td>htl-0</td>\n",
" <td>hotel</td>\n",
" <td>Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...</td>\n",
" <td>2025-11-14T13:27:25.889Z</td>\n",
" <td>NaN</td>\n",
" <td>Premium Room</td>\n",
" <td>264.0</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>013fc334-4045-4d5a-8739-dd0a8766a63b</td>\n",
" <td>page_view</td>\n",
" <td>/products</td>\n",
" <td>None</td>\n",
" <td>hotel</td>\n",
" <td>Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...</td>\n",
" <td>2025-11-14T13:53:59.993Z</td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>013fc334-4045-4d5a-8739-dd0a8766a63b</td>\n",
" <td>view_item_page</td>\n",
" <td>/products</td>\n",
" <td>htl-0</td>\n",
" <td>hotel</td>\n",
" <td>Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...</td>\n",
" <td>2025-11-14T13:54:10.705Z</td>\n",
" <td>NaN</td>\n",
" <td>Premium Room</td>\n",
" <td>223.0</td>\n",
" <td>3.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>013fc334-4045-4d5a-8739-dd0a8766a63b</td>\n",
" <td>hover_over_title</td>\n",
" <td>/products</td>\n",
" <td>htl-0</td>\n",
" <td>hotel</td>\n",
" <td>Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...</td>\n",
" <td>2025-11-14T13:54:11.771Z</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>416.0</td>\n",
" <td>397.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Grand Plaza Hotel</td>\n",
" <td>1200.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>013fc334-4045-4d5a-8739-dd0a8766a63b</td>\n",
" <td>view_item_page</td>\n",
" <td>/products</td>\n",
" <td>htl-1</td>\n",
" <td>hotel</td>\n",
" <td>Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...</td>\n",
" <td>2025-11-14T13:54:29.772Z</td>\n",
" <td>NaN</td>\n",
" <td>Standard Room</td>\n",
" <td>267.0</td>\n",
" <td>5.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>013fc334-4045-4d5a-8739-dd0a8766a63b</td>\n",
" <td>hover_over_title</td>\n",
" <td>/products</td>\n",
" <td>htl-1</td>\n",
" <td>hotel</td>\n",
" <td>Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...</td>\n",
" <td>2025-11-14T13:54:30.833Z</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Seaside Resort</td>\n",
" <td>1200.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sessionId eventName page \\\n",
"0 d176d7c9-4027-4702-9e31-2a71395cdda0 page_view /products \n",
"1 f0317a5d-e424-44e9-b784-c8f7291ffe31 page_view / \n",
"2 f0317a5d-e424-44e9-b784-c8f7291ffe31 page_view /products \n",
"3 f0317a5d-e424-44e9-b784-c8f7291ffe31 view_item_page /products \n",
"4 238dc588-a7ab-4c0e-bccd-6abca5076c66 page_view /products \n",
"5 238dc588-a7ab-4c0e-bccd-6abca5076c66 view_item_page /products \n",
"6 238dc588-a7ab-4c0e-bccd-6abca5076c66 view_item_page /products \n",
"7 238dc588-a7ab-4c0e-bccd-6abca5076c66 hover_over_title /products \n",
"8 238dc588-a7ab-4c0e-bccd-6abca5076c66 view_item_page /products \n",
"35 013fc334-4045-4d5a-8739-dd0a8766a63b page_view /products \n",
"36 013fc334-4045-4d5a-8739-dd0a8766a63b view_item_page /products \n",
"37 013fc334-4045-4d5a-8739-dd0a8766a63b hover_over_title /products \n",
"38 013fc334-4045-4d5a-8739-dd0a8766a63b view_item_page /products \n",
"39 013fc334-4045-4d5a-8739-dd0a8766a63b hover_over_title /products \n",
"\n",
" productId storeMode userAgent \\\n",
"0 None hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n",
"1 None hotel Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck... \n",
"2 None hotel Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck... \n",
"3 htl-0 hotel Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Geck... \n",
"4 None hotel Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7... \n",
"5 htl-0 hotel Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7... \n",
"6 htl-0 hotel Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7... \n",
"7 htl-0 hotel Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7... \n",
"8 htl-0 hotel Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7... \n",
"35 None hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n",
"36 htl-0 hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n",
"37 htl-0 hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n",
"38 htl-1 hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n",
"39 htl-1 hotel Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53... \n",
"\n",
" ts metadata_referrer metadata_roomType \\\n",
"0 2025-11-14T13:23:46.270Z NaN \n",
"1 2025-11-14T13:26:00.291Z NaN \n",
"2 2025-11-14T13:26:07.769Z NaN \n",
"3 2025-11-14T13:26:15.010Z NaN Premium Room \n",
"4 2025-11-14T13:27:15.457Z NaN \n",
"5 2025-11-14T13:27:15.591Z NaN Premium Room \n",
"6 2025-11-14T13:27:21.483Z NaN Premium Room \n",
"7 2025-11-14T13:27:22.646Z NaN NaN \n",
"8 2025-11-14T13:27:25.889Z NaN Premium Room \n",
"35 2025-11-14T13:53:59.993Z NaN \n",
"36 2025-11-14T13:54:10.705Z NaN Premium Room \n",
"37 2025-11-14T13:54:11.771Z NaN NaN \n",
"38 2025-11-14T13:54:29.772Z NaN Standard Room \n",
"39 2025-11-14T13:54:30.833Z NaN NaN \n",
"\n",
" metadata_price metadata_nights metadata_elementText metadata_dwellTime \n",
"0 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 269.0 1.0 NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"5 264.0 2.0 NaN NaN \n",
"6 264.0 2.0 NaN NaN \n",
"7 NaN NaN Grand Plaza Hotel 1200.0 \n",
"8 264.0 2.0 NaN NaN \n",
"35 NaN NaN NaN NaN \n",
"36 223.0 3.0 NaN NaN \n",
"37 NaN NaN Grand Plaza Hotel 1200.0 \n",
"38 267.0 5.0 NaN NaN \n",
"39 NaN NaN Seaside Resort 1200.0 "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby('sessionId').head()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "380eca5f-8304-4fb2-be32-e8bcfd312085",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['013fc334-4045-4d5a-8739-dd0a8766a63b',\n",
" '238dc588-a7ab-4c0e-bccd-6abca5076c66',\n",
" 'd176d7c9-4027-4702-9e31-2a71395cdda0',\n",
" 'f0317a5d-e424-44e9-b784-c8f7291ffe31']"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sessions = list(set(df['sessionId'])); sessions # 238dc588-a7ab-4c0e-bccd-6abca5076c66"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "f4ae6f81-dcb8-44be-aee7-30dbc3a6bae1",
"metadata": {},
"outputs": [],
"source": [
"# map sessions to experiments"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "050d90a4-20a9-47f5-b998-c31178a54cb3",
"metadata": {},
"outputs": [],
"source": [
"def build_transition_prob_matrix(df: pd.DataFrame):\n",
" df = df.dropna(subset=['eventName'])\n",
" events = df['eventName'].tolist()\n",
" labels = pd.Index(events).unique().tolist()\n",
" idx = {e:i for i,e in enumerate(labels)}\n",
" M = np.zeros((len(labels), len(labels)), dtype=float)\n",
" for a, b in zip(events, events[1:]):\n",
" M[idx[a], idx[b]] += 1\n",
" row_sums = M.sum(axis=1, keepdims=True)\n",
" with np.errstate(divide='ignore', invalid='ignore'):\n",
" P = np.divide(M, row_sums, where=row_sums>0) # row-normalized\n",
" return P, labels"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "e68f9004-82f5-4826-aece-e3dc6e15a18f",
"metadata": {},
"outputs": [],
"source": [
"# https://medium.com/data-science/time-series-data-markov-transition-matrices-7060771e362b\n",
"from graphviz import Digraph\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"def _as_prob_df(matrix, labels=None):\n",
" \"\"\"Return a square DataFrame with index=columns=labels.\"\"\"\n",
" if isinstance(matrix, pd.DataFrame):\n",
" # Ensure square and aligned\n",
" assert (matrix.index == matrix.columns).all(), \"Index/columns must match.\"\n",
" return matrix\n",
" matrix = np.asarray(matrix, dtype=float)\n",
" assert matrix.shape[0] == matrix.shape[1], \"Matrix must be square.\"\n",
" if labels is None:\n",
" raise ValueError(\"labels are required when matrix is not a DataFrame\")\n",
" assert len(labels) == matrix.shape[0], \"labels length must match matrix size.\"\n",
" return pd.DataFrame(matrix, index=list(labels), columns=list(labels))\n",
"\n",
"def _df_to_edgelist(P: pd.DataFrame, threshold=0.0, round_digits=2):\n",
" \"\"\"Build weighted edges > threshold.\"\"\"\n",
" edges = []\n",
" for src in P.index:\n",
" for dst in P.columns:\n",
" w = float(P.loc[src, dst])\n",
" if w > threshold:\n",
" edges.append((str(src), str(dst), f\"{w:.{round_digits}f}\"))\n",
" return edges\n",
"\n",
"def render_graph(fname, matrix, ls_index=None, threshold=0.0, fmt=\"svg\", view=False):\n",
" \"\"\"\n",
" fname: output file stem (no extension)\n",
" matrix: NumPy array or pandas DataFrame of transition PROBABILITIES\n",
" ls_index: ordered labels (required if matrix is not a DataFrame)\n",
" threshold: hide edges with weight <= threshold\n",
" fmt: 'svg'|'png'|'pdf' etc.\n",
" view: open after rendering\n",
" \"\"\"\n",
" P = _as_prob_df(matrix, labels=ls_index)\n",
" edges = _df_to_edgelist(P, threshold=threshold)\n",
"\n",
" g = Digraph(format=fmt)\n",
" g.attr(rankdir=\"LR\", size=\"30\")\n",
" g.attr(\"node\", shape=\"circle\")\n",
"\n",
" # ensure isolated nodes appear\n",
" for node in P.index:\n",
" g.node(str(node), width=\"1\", height=\"1\")\n",
"\n",
" for src, dst, label in edges:\n",
" g.edge(src, dst, label=label)\n",
"\n",
" g.render(fname, view=view, cleanup=True)\n",
" return g\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "e255a2c1-6454-4e5e-89f6-ef8ac51ab6cc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"013fc334-4045-4d5a-8739-dd0a8766a63b\n"
]
},
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 13.1.2 (0)\n",
" -->\n",
"<!-- Pages: 1 -->\n",
"<svg width=\"565pt\" height=\"354pt\"\n",
" viewBox=\"0.00 0.00 565.00 354.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 349.64)\">\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-349.64 561.05,-349.64 561.05,4 -4,4\"/>\n",
"<!-- page_view -->\n",
"<g id=\"node1\" class=\"node\">\n",
"<title>page_view</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"48.19\" cy=\"-235.83\" rx=\"48.19\" ry=\"48.19\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"48.19\" y=\"-231.16\" font-family=\"Times,serif\" font-size=\"14.00\">page_view</text>\n",
"</g>\n",
"<!-- view_item_page -->\n",
"<g id=\"node2\" class=\"node\">\n",
"<title>view_item_page</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"232.88\" cy=\"-235.83\" rx=\"69.01\" ry=\"69.01\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"232.88\" y=\"-231.16\" font-family=\"Times,serif\" font-size=\"14.00\">view_item_page</text>\n",
"</g>\n",
"<!-- page_view&#45;&gt;view_item_page -->\n",
"<g id=\"edge1\" class=\"edge\">\n",
"<title>page_view&#45;&gt;view_item_page</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M96.71,-235.83C113.69,-235.83 133.31,-235.83 152.25,-235.83\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"152.1,-239.33 162.1,-235.83 152.1,-232.33 152.1,-239.33\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"130.12\" y=\"-239.78\" font-family=\"Times,serif\" font-size=\"14.00\">1.00</text>\n",
"</g>\n",
"<!-- view_item_page&#45;&gt;view_item_page -->\n",
"<g id=\"edge2\" class=\"edge\">\n",
"<title>view_item_page&#45;&gt;view_item_page</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M214.74,-302.59C217.1,-314.51 223.14,-322.84 232.88,-322.84 239.27,-322.84 244.07,-319.26 247.28,-313.42\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"250.57,-314.62 250.52,-304.02 243.95,-312.33 250.57,-314.62\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"232.88\" y=\"-326.79\" font-family=\"Times,serif\" font-size=\"14.00\">0.68</text>\n",
"</g>\n",
"<!-- hover_over_title -->\n",
"<g id=\"node3\" class=\"node\">\n",
"<title>hover_over_title</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"463.22\" cy=\"-275.83\" rx=\"69.81\" ry=\"69.81\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"463.22\" y=\"-271.16\" font-family=\"Times,serif\" font-size=\"14.00\">hover_over_title</text>\n",
"</g>\n",
"<!-- view_item_page&#45;&gt;hover_over_title -->\n",
"<g id=\"edge3\" class=\"edge\">\n",
"<title>view_item_page&#45;&gt;hover_over_title</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M300.48,-250.14C307.03,-251.43 313.58,-252.69 319.89,-253.83 340.12,-257.51 362.05,-261.1 382.5,-264.27\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"381.77,-267.7 392.19,-265.76 382.83,-260.78 381.77,-267.7\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"335.64\" y=\"-263.17\" font-family=\"Times,serif\" font-size=\"14.00\">0.29</text>\n",
"</g>\n",
"<!-- hover_over_paragraph -->\n",
"<g id=\"node4\" class=\"node\">\n",
"<title>hover_over_paragraph</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"463.22\" cy=\"-93.83\" rx=\"93.83\" ry=\"93.83\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"463.22\" y=\"-89.16\" font-family=\"Times,serif\" font-size=\"14.00\">hover_over_paragraph</text>\n",
"</g>\n",
"<!-- view_item_page&#45;&gt;hover_over_paragraph -->\n",
"<g id=\"edge4\" class=\"edge\">\n",
"<title>view_item_page&#45;&gt;hover_over_paragraph</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M292.09,-199.63C316.79,-184.27 346.14,-166.02 373.44,-149.04\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"375.08,-152.15 381.72,-143.89 371.38,-146.2 375.08,-152.15\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"335.64\" y=\"-185.68\" font-family=\"Times,serif\" font-size=\"14.00\">0.04</text>\n",
"</g>\n",
"<!-- hover_over_title&#45;&gt;view_item_page -->\n",
"<g id=\"edge5\" class=\"edge\">\n",
"<title>hover_over_title&#45;&gt;view_item_page</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M399.53,-246.73C384.12,-240.88 367.42,-235.6 351.39,-232.58 339.13,-230.28 326.03,-229.26 313.19,-229.04\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"313.51,-225.54 303.51,-229.04 313.51,-232.54 313.51,-225.54\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"335.64\" y=\"-236.53\" font-family=\"Times,serif\" font-size=\"14.00\">1.00</text>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.graphs.Digraph at 0x7f0779e818b0>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[]\n"
]
},
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 13.1.2 (0)\n",
" -->\n",
"<!-- Pages: 1 -->\n",
"<svg width=\"8pt\" height=\"8pt\"\n",
" viewBox=\"0.00 0.00 8.00 8.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 4)\">\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-4 4,-4 4,4 -4,4\"/>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.graphs.Digraph at 0x7f6800fac980>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0.00000000e+000 1.00000000e+000 0.00000000e+000 0.00000000e+000]\n",
" [0.00000000e+000 6.78571429e-001 2.85714286e-001 3.57142857e-002]\n",
" [0.00000000e+000 1.00000000e+000 0.00000000e+000 0.00000000e+000]\n",
" [2.05833592e-312 2.29175545e-312 4.94065646e-324 6.92110218e-310]]\n",
"238dc588-a7ab-4c0e-bccd-6abca5076c66\n"
]
},
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 13.1.2 (0)\n",
" -->\n",
"<!-- Pages: 1 -->\n",
"<svg width=\"565pt\" height=\"354pt\"\n",
" viewBox=\"0.00 0.00 565.00 354.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 349.64)\">\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-349.64 561.05,-349.64 561.05,4 -4,4\"/>\n",
"<!-- page_view -->\n",
"<g id=\"node1\" class=\"node\">\n",
"<title>page_view</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"48.19\" cy=\"-109.83\" rx=\"48.19\" ry=\"48.19\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"48.19\" y=\"-105.16\" font-family=\"Times,serif\" font-size=\"14.00\">page_view</text>\n",
"</g>\n",
"<!-- view_item_page -->\n",
"<g id=\"node2\" class=\"node\">\n",
"<title>view_item_page</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"232.88\" cy=\"-197.83\" rx=\"69.01\" ry=\"69.01\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"232.88\" y=\"-193.16\" font-family=\"Times,serif\" font-size=\"14.00\">view_item_page</text>\n",
"</g>\n",
"<!-- page_view&#45;&gt;view_item_page -->\n",
"<g id=\"edge1\" class=\"edge\">\n",
"<title>page_view&#45;&gt;view_item_page</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M92.02,-130.47C112.32,-140.25 137.13,-152.2 160.18,-163.3\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"158.39,-166.32 168.92,-167.51 161.43,-160.02 158.39,-166.32\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"130.12\" y=\"-157.78\" font-family=\"Times,serif\" font-size=\"14.00\">1.00</text>\n",
"</g>\n",
"<!-- view_item_page&#45;&gt;view_item_page -->\n",
"<g id=\"edge2\" class=\"edge\">\n",
"<title>view_item_page&#45;&gt;view_item_page</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M214.74,-264.59C217.1,-276.51 223.14,-284.84 232.88,-284.84 239.27,-284.84 244.07,-281.26 247.28,-275.42\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"250.57,-276.62 250.52,-266.02 243.95,-274.33 250.57,-276.62\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"232.88\" y=\"-288.79\" font-family=\"Times,serif\" font-size=\"14.00\">0.19</text>\n",
"</g>\n",
"<!-- hover_over_title -->\n",
"<g id=\"node3\" class=\"node\">\n",
"<title>hover_over_title</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"463.22\" cy=\"-275.83\" rx=\"69.81\" ry=\"69.81\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"463.22\" y=\"-271.16\" font-family=\"Times,serif\" font-size=\"14.00\">hover_over_title</text>\n",
"</g>\n",
"<!-- view_item_page&#45;&gt;hover_over_title -->\n",
"<g id=\"edge3\" class=\"edge\">\n",
"<title>view_item_page&#45;&gt;hover_over_title</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M289.6,-237.16C299.36,-242.77 309.67,-247.94 319.89,-251.83 339.45,-259.28 361.4,-264.43 382.1,-267.98\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"381.52,-271.43 391.95,-269.55 382.62,-264.52 381.52,-271.43\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"335.64\" y=\"-265.16\" font-family=\"Times,serif\" font-size=\"14.00\">0.38</text>\n",
"</g>\n",
"<!-- hover_over_paragraph -->\n",
"<g id=\"node4\" class=\"node\">\n",
"<title>hover_over_paragraph</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"463.22\" cy=\"-93.83\" rx=\"93.83\" ry=\"93.83\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"463.22\" y=\"-89.16\" font-family=\"Times,serif\" font-size=\"14.00\">hover_over_paragraph</text>\n",
"</g>\n",
"<!-- view_item_page&#45;&gt;hover_over_paragraph -->\n",
"<g id=\"edge4\" class=\"edge\">\n",
"<title>view_item_page&#45;&gt;hover_over_paragraph</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M300.22,-180.71C317.22,-175.46 335.24,-169.12 351.39,-161.83 358.97,-158.41 366.67,-154.57 374.29,-150.49\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"375.84,-153.63 382.92,-145.75 372.47,-147.5 375.84,-153.63\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"335.64\" y=\"-178.15\" font-family=\"Times,serif\" font-size=\"14.00\">0.44</text>\n",
"</g>\n",
"<!-- hover_over_title&#45;&gt;view_item_page -->\n",
"<g id=\"edge5\" class=\"edge\">\n",
"<title>hover_over_title&#45;&gt;view_item_page</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M398.52,-248.36C383.21,-242.16 366.82,-235.87 351.39,-230.58 338.42,-226.15 324.5,-221.86 310.94,-217.93\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"312.2,-214.65 301.62,-215.28 310.28,-221.39 312.2,-214.65\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"335.64\" y=\"-234.53\" font-family=\"Times,serif\" font-size=\"14.00\">1.00</text>\n",
"</g>\n",
"<!-- hover_over_paragraph&#45;&gt;page_view -->\n",
"<g id=\"edge6\" class=\"edge\">\n",
"<title>hover_over_paragraph&#45;&gt;page_view</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M369.13,-95.76C310.26,-97.17 232.59,-99.41 163.87,-102.58 145.72,-103.42 125.98,-104.58 108.06,-105.73\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"107.86,-102.24 98.1,-106.38 108.31,-109.22 107.86,-102.24\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"232.88\" y=\"-106.53\" font-family=\"Times,serif\" font-size=\"14.00\">0.14</text>\n",
"</g>\n",
"<!-- hover_over_paragraph&#45;&gt;view_item_page -->\n",
"<g id=\"edge7\" class=\"edge\">\n",
"<title>hover_over_paragraph&#45;&gt;view_item_page</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M372.68,-119.15C354.84,-125.32 336.5,-132.51 319.89,-140.58 312.9,-143.98 305.81,-147.87 298.86,-151.98\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"297.49,-148.71 290.78,-156.91 301.14,-154.69 297.49,-148.71\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"335.64\" y=\"-144.53\" font-family=\"Times,serif\" font-size=\"14.00\">0.86</text>\n",
"</g>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.graphs.Digraph at 0x7f6800f97110>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0. 1. 0. 0. ]\n",
" [0. 0.1875 0.375 0.4375 ]\n",
" [0. 1. 0. 0. ]\n",
" [0.14285714 0.85714286 0. 0. ]]\n",
"d176d7c9-4027-4702-9e31-2a71395cdda0\n"
]
},
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 13.1.2 (0)\n",
" -->\n",
"<!-- Pages: 1 -->\n",
"<svg width=\"104pt\" height=\"104pt\"\n",
" viewBox=\"0.00 0.00 104.00 104.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 100.37)\">\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-100.37 100.37,-100.37 100.37,4 -4,4\"/>\n",
"<!-- page_view -->\n",
"<g id=\"node1\" class=\"node\">\n",
"<title>page_view</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"48.19\" cy=\"-48.19\" rx=\"48.19\" ry=\"48.19\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"48.19\" y=\"-43.51\" font-family=\"Times,serif\" font-size=\"14.00\">page_view</text>\n",
"</g>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.graphs.Digraph at 0x7f6800f97110>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0.]]\n",
"f0317a5d-e424-44e9-b784-c8f7291ffe31\n"
]
},
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 13.1.2 (0)\n",
" -->\n",
"<!-- Pages: 1 -->\n",
"<svg width=\"310pt\" height=\"160pt\"\n",
" viewBox=\"0.00 0.00 310.00 160.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 156.44)\">\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-156.44 305.89,-156.44 305.89,4 -4,4\"/>\n",
"<!-- page_view -->\n",
"<g id=\"node1\" class=\"node\">\n",
"<title>page_view</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"48.19\" cy=\"-69.01\" rx=\"48.19\" ry=\"48.19\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"48.19\" y=\"-64.33\" font-family=\"Times,serif\" font-size=\"14.00\">page_view</text>\n",
"</g>\n",
"<!-- page_view&#45;&gt;page_view -->\n",
"<g id=\"edge1\" class=\"edge\">\n",
"<title>page_view&#45;&gt;page_view</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M33.03,-115.09C34.09,-126.6 39.14,-135.19 48.19,-135.19 53.98,-135.19 58.13,-131.66 60.65,-126.1\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"64.01,-127.11 62.98,-116.56 57.21,-125.45 64.01,-127.11\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"48.19\" y=\"-139.14\" font-family=\"Times,serif\" font-size=\"14.00\">0.50</text>\n",
"</g>\n",
"<!-- view_item_page -->\n",
"<g id=\"node2\" class=\"node\">\n",
"<title>view_item_page</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"232.88\" cy=\"-69.01\" rx=\"69.01\" ry=\"69.01\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"232.88\" y=\"-64.33\" font-family=\"Times,serif\" font-size=\"14.00\">view_item_page</text>\n",
"</g>\n",
"<!-- page_view&#45;&gt;view_item_page -->\n",
"<g id=\"edge2\" class=\"edge\">\n",
"<title>page_view&#45;&gt;view_item_page</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M96.71,-69.01C113.69,-69.01 133.31,-69.01 152.25,-69.01\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"152.1,-72.51 162.1,-69.01 152.1,-65.51 152.1,-72.51\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"130.12\" y=\"-72.96\" font-family=\"Times,serif\" font-size=\"14.00\">0.50</text>\n",
"</g>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.graphs.Digraph at 0x7f6800bf50f0>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[5.0e-001 5.0e-001]\n",
" [9.9e-324 1.5e-323]]\n"
]
}
],
"source": [
"def explore_session(session_id: str):\n",
" subset = df[df['sessionId'] == session_id]\n",
" print(session_id)\n",
" P, labels = build_transition_prob_matrix(subset)\n",
" g = render_graph(f\"session_{session_id}\", P, ls_index=labels, threshold=0.01, fmt=\"svg\", view=False)\n",
" display(g)\n",
" return P\n",
"for session in sessions:\n",
" print(explore_session(session))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (PHANTOM)",
"language": "python",
"name": "phantom"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}