Files
PHANTOM/experiments/data_export.ipynb
Daniel Alves Rösel 37b2099ee0 2 nextjs scaffold with store mode shop and admin session experiment wiring event emission v1 (#17)
* chore: cleaning gitignore

* formating and env documentation

* feat: context switching of hotel/airline depndent on env var via middleware

* fixed alignment and building

* wrong file

* prods

* fixed applying style

* better session cookie management

* tentative session storage with maybe using airtable

* migrated api of ingestion

* events and products apge

* fixing build

* 13 create outline for research paper draft (#18)

* updated outline for paper from issue

* extra paper sections and some formalization of series data

* algorithms and acknowledgements

* updated outline for paper from issue

* upadted text formating

* event unification

* refactor tracking to ues callbacks instead of refs

* implement a pricing display api with session passing

* moved middleware to proxy according to new changes in Nextjs

* refactoed kafka ingestion to go via backend not web-db

* Refactor docker-compose services to use individual Dockerfiles (#20)

* Initial plan

* Refactor services into individual Dockerfiles

Co-authored-by: velocitatem <60182044+velocitatem@users.noreply.github.com>

* Add EXPOSE directives to all Dockerfiles with port documentation

Co-authored-by: velocitatem <60182044+velocitatem@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: velocitatem <60182044+velocitatem@users.noreply.github.com>

* fixing small bugs and adding exepriments to tracking

* added some doc
2025-11-13 18:07:27 +01:00

1173 lines
46 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 9,
"id": "62eafcd9-5462-4063-8873-0e7fb9add907",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from kafka import KafkaConsumer\n",
"import pandas as pd\n",
"import json\n",
"import numpy as np\n",
"import os\n",
"from dotenv import load_dotenv\n",
"import matplotlib.pyplot as plt\n",
"from IPython.display import display, SVG, Image\n",
"load_dotenv()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "4af65cb4-e8cf-4877-b2db-13ac19f3838f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 528 entries, 0 to 527\n",
"Data columns (total 19 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 sessionId 528 non-null object \n",
" 1 eventType 467 non-null object \n",
" 2 ts 528 non-null object \n",
" 3 targetEl 401 non-null object \n",
" 4 eventName 61 non-null object \n",
" 5 page 61 non-null object \n",
" 6 storeMode 61 non-null object \n",
" 7 userAgent 61 non-null object \n",
" 8 productId 21 non-null object \n",
" 9 metadata_path 467 non-null object \n",
" 10 metadata_referrer 82 non-null object \n",
" 11 metadata_x 425 non-null float64\n",
" 12 metadata_y 425 non-null float64\n",
" 13 metadata_event 7 non-null object \n",
" 14 metadata_targetEl 24 non-null object \n",
" 15 metadata_roomType 5 non-null object \n",
" 16 metadata_price 5 non-null float64\n",
" 17 metadata_nights 5 non-null float64\n",
" 18 metadata_targetUrl 4 non-null object \n",
"dtypes: float64(4), object(15)\n",
"memory usage: 78.5+ KB\n"
]
}
],
"source": [
"KAFKA_PORT=os.getenv(\"KAFKA_PORT\", 9092)\n",
"topic = \"user-interactions\"\n",
"consumer = KafkaConsumer(\n",
" topic, \n",
" enable_auto_commit=True,\n",
" value_deserializer=lambda x: json.loads(x.decode('utf-8')),\n",
" auto_offset_reset='earliest',\n",
" bootstrap_servers=['localhost:9092'])\n",
"messages=consumer.poll(timeout_ms=1000,max_records=10000)\n",
"df = []\n",
"for m in messages.values():\n",
" for i in m:\n",
" df.append(i.value)\n",
"df = pd.DataFrame(df)\n",
"# explode metadata col json\n",
"df = df.join(pd.json_normalize(df.pop(\"metadata\"), sep=\".\").add_prefix(\"metadata_\"))\n",
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "f6819a1c-32ab-49c7-845b-5df7bf60f561",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sessionId</th>\n",
" <th>eventType</th>\n",
" <th>ts</th>\n",
" <th>targetEl</th>\n",
" <th>eventName</th>\n",
" <th>page</th>\n",
" <th>storeMode</th>\n",
" <th>userAgent</th>\n",
" <th>productId</th>\n",
" <th>metadata_path</th>\n",
" <th>metadata_referrer</th>\n",
" <th>metadata_x</th>\n",
" <th>metadata_y</th>\n",
" <th>metadata_event</th>\n",
" <th>metadata_targetEl</th>\n",
" <th>metadata_roomType</th>\n",
" <th>metadata_price</th>\n",
" <th>metadata_nights</th>\n",
" <th>metadata_targetUrl</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1762434923440-66hdhq8qicd</td>\n",
" <td>pageview</td>\n",
" <td>1762434924107</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1762434923440-66hdhq8qicd</td>\n",
" <td>click</td>\n",
" <td>1762434925198</td>\n",
" <td>DIV</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>1098.0</td>\n",
" <td>663.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1762434923440-66hdhq8qicd</td>\n",
" <td>click</td>\n",
" <td>1762434925371</td>\n",
" <td>MAIN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>1098.0</td>\n",
" <td>663.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1762434923440-66hdhq8qicd</td>\n",
" <td>pageview</td>\n",
" <td>1762437192910</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1762434923440-66hdhq8qicd</td>\n",
" <td>pageview</td>\n",
" <td>1762437198539</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>390</th>\n",
" <td>d423ce8a-77aa-4c9a-94d4-d1adddcc3472</td>\n",
" <td>click</td>\n",
" <td>1762443115648</td>\n",
" <td>DIV</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>245.0</td>\n",
" <td>595.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>391</th>\n",
" <td>d423ce8a-77aa-4c9a-94d4-d1adddcc3472</td>\n",
" <td>click</td>\n",
" <td>1762443174606</td>\n",
" <td>DIV</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>475.0</td>\n",
" <td>428.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>392</th>\n",
" <td>d423ce8a-77aa-4c9a-94d4-d1adddcc3472</td>\n",
" <td>click</td>\n",
" <td>1762443183406</td>\n",
" <td>INPUT</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>832.0</td>\n",
" <td>219.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>393</th>\n",
" <td>d423ce8a-77aa-4c9a-94d4-d1adddcc3472</td>\n",
" <td>click</td>\n",
" <td>1762443208588</td>\n",
" <td>DIV</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>485.0</td>\n",
" <td>155.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>394</th>\n",
" <td>d423ce8a-77aa-4c9a-94d4-d1adddcc3472</td>\n",
" <td>click</td>\n",
" <td>1762443225474</td>\n",
" <td>DIV</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>281.0</td>\n",
" <td>281.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>407</th>\n",
" <td>1762444018243-0120z6z5u42f</td>\n",
" <td>pageview</td>\n",
" <td>1762444018256</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>408</th>\n",
" <td>1762444018243-0120z6z5u42f</td>\n",
" <td>click</td>\n",
" <td>1762445774344</td>\n",
" <td>DIV</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>299.0</td>\n",
" <td>214.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>431</th>\n",
" <td>214d9fad-9b00-40c3-bd0e-7739b6acd654</td>\n",
" <td>pageview</td>\n",
" <td>1762448190973</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>432</th>\n",
" <td>214d9fad-9b00-40c3-bd0e-7739b6acd654</td>\n",
" <td>click</td>\n",
" <td>1762448192425</td>\n",
" <td>DIV</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>1623.0</td>\n",
" <td>493.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>433</th>\n",
" <td>214d9fad-9b00-40c3-bd0e-7739b6acd654</td>\n",
" <td>click</td>\n",
" <td>1762448192645</td>\n",
" <td>DIV</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>1623.0</td>\n",
" <td>493.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>434</th>\n",
" <td>214d9fad-9b00-40c3-bd0e-7739b6acd654</td>\n",
" <td>pageview</td>\n",
" <td>1762448205850</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>435</th>\n",
" <td>214d9fad-9b00-40c3-bd0e-7739b6acd654</td>\n",
" <td>click</td>\n",
" <td>1762448207922</td>\n",
" <td>DIV</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>421.0</td>\n",
" <td>216.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>438</th>\n",
" <td>f0d40ca6-c1d3-4ecd-beb3-796adc74349d</td>\n",
" <td>pageview</td>\n",
" <td>1762448283244</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>439</th>\n",
" <td>f0d40ca6-c1d3-4ecd-beb3-796adc74349d</td>\n",
" <td>click</td>\n",
" <td>1762448295524</td>\n",
" <td>HTML</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>614.0</td>\n",
" <td>720.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>440</th>\n",
" <td>f0d40ca6-c1d3-4ecd-beb3-796adc74349d</td>\n",
" <td>click</td>\n",
" <td>1762448342763</td>\n",
" <td>DIV</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>416.0</td>\n",
" <td>397.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>441</th>\n",
" <td>f0d40ca6-c1d3-4ecd-beb3-796adc74349d</td>\n",
" <td>pageview</td>\n",
" <td>1762448343396</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>442</th>\n",
" <td>f0d40ca6-c1d3-4ecd-beb3-796adc74349d</td>\n",
" <td>click</td>\n",
" <td>1762448829631</td>\n",
" <td>DIV</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>/</td>\n",
" <td>NaN</td>\n",
" <td>45.0</td>\n",
" <td>44.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sessionId eventType ts targetEl \\\n",
"0 1762434923440-66hdhq8qicd pageview 1762434924107 NaN \n",
"1 1762434923440-66hdhq8qicd click 1762434925198 DIV \n",
"2 1762434923440-66hdhq8qicd click 1762434925371 MAIN \n",
"3 1762434923440-66hdhq8qicd pageview 1762437192910 NaN \n",
"4 1762434923440-66hdhq8qicd pageview 1762437198539 NaN \n",
"390 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 click 1762443115648 DIV \n",
"391 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 click 1762443174606 DIV \n",
"392 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 click 1762443183406 INPUT \n",
"393 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 click 1762443208588 DIV \n",
"394 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 click 1762443225474 DIV \n",
"407 1762444018243-0120z6z5u42f pageview 1762444018256 NaN \n",
"408 1762444018243-0120z6z5u42f click 1762445774344 DIV \n",
"431 214d9fad-9b00-40c3-bd0e-7739b6acd654 pageview 1762448190973 NaN \n",
"432 214d9fad-9b00-40c3-bd0e-7739b6acd654 click 1762448192425 DIV \n",
"433 214d9fad-9b00-40c3-bd0e-7739b6acd654 click 1762448192645 DIV \n",
"434 214d9fad-9b00-40c3-bd0e-7739b6acd654 pageview 1762448205850 NaN \n",
"435 214d9fad-9b00-40c3-bd0e-7739b6acd654 click 1762448207922 DIV \n",
"438 f0d40ca6-c1d3-4ecd-beb3-796adc74349d pageview 1762448283244 NaN \n",
"439 f0d40ca6-c1d3-4ecd-beb3-796adc74349d click 1762448295524 HTML \n",
"440 f0d40ca6-c1d3-4ecd-beb3-796adc74349d click 1762448342763 DIV \n",
"441 f0d40ca6-c1d3-4ecd-beb3-796adc74349d pageview 1762448343396 NaN \n",
"442 f0d40ca6-c1d3-4ecd-beb3-796adc74349d click 1762448829631 DIV \n",
"\n",
" eventName page storeMode userAgent productId metadata_path \\\n",
"0 NaN NaN NaN NaN NaN / \n",
"1 NaN NaN NaN NaN NaN / \n",
"2 NaN NaN NaN NaN NaN / \n",
"3 NaN NaN NaN NaN NaN / \n",
"4 NaN NaN NaN NaN NaN / \n",
"390 NaN NaN NaN NaN NaN / \n",
"391 NaN NaN NaN NaN NaN / \n",
"392 NaN NaN NaN NaN NaN / \n",
"393 NaN NaN NaN NaN NaN / \n",
"394 NaN NaN NaN NaN NaN / \n",
"407 NaN NaN NaN NaN NaN / \n",
"408 NaN NaN NaN NaN NaN / \n",
"431 NaN NaN NaN NaN NaN / \n",
"432 NaN NaN NaN NaN NaN / \n",
"433 NaN NaN NaN NaN NaN / \n",
"434 NaN NaN NaN NaN NaN / \n",
"435 NaN NaN NaN NaN NaN / \n",
"438 NaN NaN NaN NaN NaN / \n",
"439 NaN NaN NaN NaN NaN / \n",
"440 NaN NaN NaN NaN NaN / \n",
"441 NaN NaN NaN NaN NaN / \n",
"442 NaN NaN NaN NaN NaN / \n",
"\n",
" metadata_referrer metadata_x metadata_y metadata_event \\\n",
"0 NaN NaN NaN \n",
"1 NaN 1098.0 663.0 NaN \n",
"2 NaN 1098.0 663.0 NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"390 NaN 245.0 595.0 NaN \n",
"391 NaN 475.0 428.0 NaN \n",
"392 NaN 832.0 219.0 NaN \n",
"393 NaN 485.0 155.0 NaN \n",
"394 NaN 281.0 281.0 NaN \n",
"407 NaN NaN NaN \n",
"408 NaN 299.0 214.0 NaN \n",
"431 NaN NaN NaN \n",
"432 NaN 1623.0 493.0 NaN \n",
"433 NaN 1623.0 493.0 NaN \n",
"434 NaN NaN NaN \n",
"435 NaN 421.0 216.0 NaN \n",
"438 NaN NaN NaN \n",
"439 NaN 614.0 720.0 NaN \n",
"440 NaN 416.0 397.0 NaN \n",
"441 NaN NaN NaN \n",
"442 NaN 45.0 44.0 NaN \n",
"\n",
" metadata_targetEl metadata_roomType metadata_price metadata_nights \\\n",
"0 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"390 NaN NaN NaN NaN \n",
"391 NaN NaN NaN NaN \n",
"392 NaN NaN NaN NaN \n",
"393 NaN NaN NaN NaN \n",
"394 NaN NaN NaN NaN \n",
"407 NaN NaN NaN NaN \n",
"408 NaN NaN NaN NaN \n",
"431 NaN NaN NaN NaN \n",
"432 NaN NaN NaN NaN \n",
"433 NaN NaN NaN NaN \n",
"434 NaN NaN NaN NaN \n",
"435 NaN NaN NaN NaN \n",
"438 NaN NaN NaN NaN \n",
"439 NaN NaN NaN NaN \n",
"440 NaN NaN NaN NaN \n",
"441 NaN NaN NaN NaN \n",
"442 NaN NaN NaN NaN \n",
"\n",
" metadata_targetUrl \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"390 NaN \n",
"391 NaN \n",
"392 NaN \n",
"393 NaN \n",
"394 NaN \n",
"407 NaN \n",
"408 NaN \n",
"431 NaN \n",
"432 NaN \n",
"433 NaN \n",
"434 NaN \n",
"435 NaN \n",
"438 NaN \n",
"439 NaN \n",
"440 NaN \n",
"441 NaN \n",
"442 NaN "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby('sessionId').head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "380eca5f-8304-4fb2-be32-e8bcfd312085",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['214d9fad-9b00-40c3-bd0e-7739b6acd654',\n",
" '1762444018243-0120z6z5u42f',\n",
" 'f0d40ca6-c1d3-4ecd-beb3-796adc74349d',\n",
" 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',\n",
" '1762434923440-66hdhq8qicd']"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sessions = list(set(df['sessionId'])); sessions"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "f4ae6f81-dcb8-44be-aee7-30dbc3a6bae1",
"metadata": {},
"outputs": [],
"source": [
"# map sessions to experiments"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "050d90a4-20a9-47f5-b998-c31178a54cb3",
"metadata": {},
"outputs": [],
"source": [
"def build_transition_prob_matrix(df: pd.DataFrame):\n",
" df = df.dropna(subset=['eventName'])\n",
" events = df['eventName'].tolist()\n",
" labels = pd.Index(events).unique().tolist()\n",
" idx = {e:i for i,e in enumerate(labels)}\n",
" M = np.zeros((len(labels), len(labels)), dtype=float)\n",
" for a, b in zip(events, events[1:]):\n",
" M[idx[a], idx[b]] += 1\n",
" row_sums = M.sum(axis=1, keepdims=True)\n",
" with np.errstate(divide='ignore', invalid='ignore'):\n",
" P = np.divide(M, row_sums, where=row_sums>0) # row-normalized\n",
" return P, labels"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "e68f9004-82f5-4826-aece-e3dc6e15a18f",
"metadata": {},
"outputs": [],
"source": [
"# https://medium.com/data-science/time-series-data-markov-transition-matrices-7060771e362b\n",
"from graphviz import Digraph\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"def _as_prob_df(matrix, labels=None):\n",
" \"\"\"Return a square DataFrame with index=columns=labels.\"\"\"\n",
" if isinstance(matrix, pd.DataFrame):\n",
" # Ensure square and aligned\n",
" assert (matrix.index == matrix.columns).all(), \"Index/columns must match.\"\n",
" return matrix\n",
" matrix = np.asarray(matrix, dtype=float)\n",
" assert matrix.shape[0] == matrix.shape[1], \"Matrix must be square.\"\n",
" if labels is None:\n",
" raise ValueError(\"labels are required when matrix is not a DataFrame\")\n",
" assert len(labels) == matrix.shape[0], \"labels length must match matrix size.\"\n",
" return pd.DataFrame(matrix, index=list(labels), columns=list(labels))\n",
"\n",
"def _df_to_edgelist(P: pd.DataFrame, threshold=0.0, round_digits=2):\n",
" \"\"\"Build weighted edges > threshold.\"\"\"\n",
" edges = []\n",
" for src in P.index:\n",
" for dst in P.columns:\n",
" w = float(P.loc[src, dst])\n",
" if w > threshold:\n",
" edges.append((str(src), str(dst), f\"{w:.{round_digits}f}\"))\n",
" return edges\n",
"\n",
"def render_graph(fname, matrix, ls_index=None, threshold=0.0, fmt=\"svg\", view=False):\n",
" \"\"\"\n",
" fname: output file stem (no extension)\n",
" matrix: NumPy array or pandas DataFrame of transition PROBABILITIES\n",
" ls_index: ordered labels (required if matrix is not a DataFrame)\n",
" threshold: hide edges with weight <= threshold\n",
" fmt: 'svg'|'png'|'pdf' etc.\n",
" view: open after rendering\n",
" \"\"\"\n",
" P = _as_prob_df(matrix, labels=ls_index)\n",
" edges = _df_to_edgelist(P, threshold=threshold)\n",
"\n",
" g = Digraph(format=fmt)\n",
" g.attr(rankdir=\"LR\", size=\"30\")\n",
" g.attr(\"node\", shape=\"circle\")\n",
"\n",
" # ensure isolated nodes appear\n",
" for node in P.index:\n",
" g.node(str(node), width=\"1\", height=\"1\")\n",
"\n",
" for src, dst, label in edges:\n",
" g.edge(src, dst, label=label)\n",
"\n",
" g.render(fname, view=view, cleanup=True)\n",
" return g\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "e255a2c1-6454-4e5e-89f6-ef8ac51ab6cc",
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 13.1.2 (0)\n",
" -->\n",
"<!-- Pages: 1 -->\n",
"<svg width=\"8pt\" height=\"8pt\"\n",
" viewBox=\"0.00 0.00 8.00 8.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 4)\">\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-4 4,-4 4,4 -4,4\"/>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.graphs.Digraph at 0x7f0779e814f0>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[]\n"
]
},
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 13.1.2 (0)\n",
" -->\n",
"<!-- Pages: 1 -->\n",
"<svg width=\"8pt\" height=\"8pt\"\n",
" viewBox=\"0.00 0.00 8.00 8.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 4)\">\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-4 4,-4 4,4 -4,4\"/>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.graphs.Digraph at 0x7f0779e818b0>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[]\n"
]
},
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 13.1.2 (0)\n",
" -->\n",
"<!-- Pages: 1 -->\n",
"<svg width=\"8pt\" height=\"8pt\"\n",
" viewBox=\"0.00 0.00 8.00 8.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 4)\">\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-4 4,-4 4,4 -4,4\"/>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.graphs.Digraph at 0x7f0779e6e5f0>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[]\n"
]
},
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 13.1.2 (0)\n",
" -->\n",
"<!-- Pages: 1 -->\n",
"<svg width=\"624pt\" height=\"205pt\"\n",
" viewBox=\"0.00 0.00 624.00 205.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 200.8)\">\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-200.8 620.47,-200.8 620.47,4 -4,4\"/>\n",
"<!-- page_view -->\n",
"<g id=\"node1\" class=\"node\">\n",
"<title>page_view</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"48.19\" cy=\"-80.4\" rx=\"48.19\" ry=\"48.19\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"48.19\" y=\"-75.73\" font-family=\"Times,serif\" font-size=\"14.00\">page_view</text>\n",
"</g>\n",
"<!-- page_view&#45;&gt;page_view -->\n",
"<g id=\"edge1\" class=\"edge\">\n",
"<title>page_view&#45;&gt;page_view</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M33.03,-126.49C34.09,-137.99 39.14,-146.59 48.19,-146.59 53.98,-146.59 58.13,-143.06 60.65,-137.5\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"64.01,-138.5 62.98,-127.96 57.21,-136.84 64.01,-138.5\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"48.19\" y=\"-150.54\" font-family=\"Times,serif\" font-size=\"14.00\">0.70</text>\n",
"</g>\n",
"<!-- click -->\n",
"<g id=\"node2\" class=\"node\">\n",
"<title>click</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"199.87\" cy=\"-120.4\" rx=\"36\" ry=\"36\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"199.87\" y=\"-115.73\" font-family=\"Times,serif\" font-size=\"14.00\">click</text>\n",
"</g>\n",
"<!-- page_view&#45;&gt;click -->\n",
"<g id=\"edge2\" class=\"edge\">\n",
"<title>page_view&#45;&gt;click</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M95.1,-93.17C101.57,-94.94 108.13,-96.73 114.37,-98.4 127.17,-101.83 141.07,-105.49 153.84,-108.81\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"152.69,-112.13 163.25,-111.25 154.45,-105.35 152.69,-112.13\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"130.12\" y=\"-110.33\" font-family=\"Times,serif\" font-size=\"14.00\">0.17</text>\n",
"</g>\n",
"<!-- product_hover -->\n",
"<g id=\"node3\" class=\"node\">\n",
"<title>product_hover</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"366.77\" cy=\"-63.4\" rx=\"63.4\" ry=\"63.4\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"366.77\" y=\"-58.73\" font-family=\"Times,serif\" font-size=\"14.00\">product_hover</text>\n",
"</g>\n",
"<!-- page_view&#45;&gt;product_hover -->\n",
"<g id=\"edge3\" class=\"edge\">\n",
"<title>page_view&#45;&gt;product_hover</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M95.82,-71.03C102.05,-69.8 108.37,-68.56 114.37,-67.4 136.35,-63.16 141.59,-60.22 163.87,-58.15 206.29,-54.22 253.93,-55.21 292.27,-57.32\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"291.64,-60.79 301.83,-57.89 292.06,-53.8 291.64,-60.79\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"199.87\" y=\"-62.1\" font-family=\"Times,serif\" font-size=\"14.00\">0.13</text>\n",
"</g>\n",
"<!-- click&#45;&gt;page_view -->\n",
"<g id=\"edge4\" class=\"edge\">\n",
"<title>click&#45;&gt;page_view</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M174.43,-94.64C166.11,-87.58 156.27,-80.86 145.87,-77.15 133.76,-72.84 120.24,-71.39 107.24,-71.49\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"107.37,-67.98 97.51,-71.88 107.65,-74.98 107.37,-67.98\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"130.12\" y=\"-81.1\" font-family=\"Times,serif\" font-size=\"14.00\">0.35</text>\n",
"</g>\n",
"<!-- click&#45;&gt;click -->\n",
"<g id=\"edge5\" class=\"edge\">\n",
"<title>click&#45;&gt;click</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M186.79,-154.41C186.86,-165.54 191.22,-174.4 199.87,-174.4 205.28,-174.4 209.01,-170.94 211.06,-165.63\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"214.49,-166.35 212.7,-155.91 207.59,-165.19 214.49,-166.35\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"199.87\" y=\"-178.35\" font-family=\"Times,serif\" font-size=\"14.00\">0.41</text>\n",
"</g>\n",
"<!-- click&#45;&gt;product_hover -->\n",
"<g id=\"edge6\" class=\"edge\">\n",
"<title>click&#45;&gt;product_hover</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M235.36,-112.71C250.82,-108.94 269.18,-104 285.37,-98.4 289.54,-96.96 293.8,-95.4 298.07,-93.76\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"299.26,-97.05 307.26,-90.11 296.68,-90.55 299.26,-97.05\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"269.62\" y=\"-111.45\" font-family=\"Times,serif\" font-size=\"14.00\">0.24</text>\n",
"</g>\n",
"<!-- product_hover&#45;&gt;click -->\n",
"<g id=\"edge7\" class=\"edge\">\n",
"<title>product_hover&#45;&gt;click</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M303.17,-64.87C286.65,-66.91 269.17,-70.62 253.87,-77.15 247.28,-79.97 240.84,-83.86 234.86,-88.16\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"232.81,-85.33 227.08,-94.24 237.11,-90.85 232.81,-85.33\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"269.62\" y=\"-81.1\" font-family=\"Times,serif\" font-size=\"14.00\">0.07</text>\n",
"</g>\n",
"<!-- product_hover&#45;&gt;product_hover -->\n",
"<g id=\"edge8\" class=\"edge\">\n",
"<title>product_hover&#45;&gt;product_hover</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M349.43,-124.81C351.5,-136.52 357.28,-144.8 366.77,-144.8 373,-144.8 377.63,-141.24 380.66,-135.48\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"383.9,-136.85 383.65,-126.25 377.24,-134.69 383.9,-136.85\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"366.77\" y=\"-148.75\" font-family=\"Times,serif\" font-size=\"14.00\">0.60</text>\n",
"</g>\n",
"<!-- product_view -->\n",
"<g id=\"node4\" class=\"node\">\n",
"<title>product_view</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"557.07\" cy=\"-131.4\" rx=\"59.4\" ry=\"59.4\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"557.07\" y=\"-126.73\" font-family=\"Times,serif\" font-size=\"14.00\">product_view</text>\n",
"</g>\n",
"<!-- product_hover&#45;&gt;product_view -->\n",
"<g id=\"edge9\" class=\"edge\">\n",
"<title>product_hover&#45;&gt;product_view</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M426.83,-84.73C446.95,-91.99 469.56,-100.16 490.17,-107.6\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"488.73,-110.8 499.32,-110.91 491.11,-104.22 488.73,-110.8\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"463.92\" y=\"-107.44\" font-family=\"Times,serif\" font-size=\"14.00\">0.33</text>\n",
"</g>\n",
"<!-- product_view&#45;&gt;click -->\n",
"<g id=\"edge10\" class=\"edge\">\n",
"<title>product_view&#45;&gt;click</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M501.18,-152.88C479.43,-160.32 453.97,-167.71 430.17,-171.4 374.48,-180.04 358.23,-184.3 303.37,-171.4 281.33,-166.22 258.61,-155.55 240.18,-145.31\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"242.14,-142.4 231.73,-140.45 238.65,-148.47 242.14,-142.4\"/>\n",
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"366.77\" y=\"-183.5\" font-family=\"Times,serif\" font-size=\"14.00\">1.00</text>\n",
"</g>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.graphs.Digraph at 0x7f0779e6e190>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0.69565217 0.17391304 0.13043478 0. ]\n",
" [0.35294118 0.41176471 0.23529412 0. ]\n",
" [0. 0.06666667 0.6 0.33333333]\n",
" [0. 1. 0. 0. ]]\n"
]
},
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 13.1.2 (0)\n",
" -->\n",
"<!-- Pages: 1 -->\n",
"<svg width=\"8pt\" height=\"8pt\"\n",
" viewBox=\"0.00 0.00 8.00 8.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 4)\">\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-4 4,-4 4,4 -4,4\"/>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.graphs.Digraph at 0x7f0779e77d40>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[]\n"
]
}
],
"source": [
"def explore_session(session_id: str):\n",
" subset = df[df['sessionId'] == session_id]\n",
" P, labels = build_transition_prob_matrix(subset)\n",
" g = render_graph(f\"session_{session_id}\", P, ls_index=labels, threshold=0.01, fmt=\"svg\", view=False)\n",
" display(g)\n",
" return P\n",
"for session in sessions:\n",
" print(explore_session(session))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4d278c2d-406e-4dc0-b219-5f7b236e852b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (PHANTOM)",
"language": "python",
"name": "phantom"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}