mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 08:33:36 +00:00
* chore: cleaning gitignore * formating and env documentation * feat: context switching of hotel/airline depndent on env var via middleware * fixed alignment and building * wrong file * prods * fixed applying style * better session cookie management * tentative session storage with maybe using airtable * migrated api of ingestion * events and products apge * fixing build * 13 create outline for research paper draft (#18) * updated outline for paper from issue * extra paper sections and some formalization of series data * algorithms and acknowledgements * updated outline for paper from issue * upadted text formating * event unification * refactor tracking to ues callbacks instead of refs * implement a pricing display api with session passing * moved middleware to proxy according to new changes in Nextjs * refactoed kafka ingestion to go via backend not web-db * Refactor docker-compose services to use individual Dockerfiles (#20) * Initial plan * Refactor services into individual Dockerfiles Co-authored-by: velocitatem <60182044+velocitatem@users.noreply.github.com> * Add EXPOSE directives to all Dockerfiles with port documentation Co-authored-by: velocitatem <60182044+velocitatem@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: velocitatem <60182044+velocitatem@users.noreply.github.com> * fixing small bugs and adding exepriments to tracking * added some doc
1173 lines
46 KiB
Plaintext
1173 lines
46 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "62eafcd9-5462-4063-8873-0e7fb9add907",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"True"
|
|
]
|
|
},
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"from kafka import KafkaConsumer\n",
|
|
"import pandas as pd\n",
|
|
"import json\n",
|
|
"import numpy as np\n",
|
|
"import os\n",
|
|
"from dotenv import load_dotenv\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"from IPython.display import display, SVG, Image\n",
|
|
"load_dotenv()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "4af65cb4-e8cf-4877-b2db-13ac19f3838f",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
"RangeIndex: 528 entries, 0 to 527\n",
|
|
"Data columns (total 19 columns):\n",
|
|
" # Column Non-Null Count Dtype \n",
|
|
"--- ------ -------------- ----- \n",
|
|
" 0 sessionId 528 non-null object \n",
|
|
" 1 eventType 467 non-null object \n",
|
|
" 2 ts 528 non-null object \n",
|
|
" 3 targetEl 401 non-null object \n",
|
|
" 4 eventName 61 non-null object \n",
|
|
" 5 page 61 non-null object \n",
|
|
" 6 storeMode 61 non-null object \n",
|
|
" 7 userAgent 61 non-null object \n",
|
|
" 8 productId 21 non-null object \n",
|
|
" 9 metadata_path 467 non-null object \n",
|
|
" 10 metadata_referrer 82 non-null object \n",
|
|
" 11 metadata_x 425 non-null float64\n",
|
|
" 12 metadata_y 425 non-null float64\n",
|
|
" 13 metadata_event 7 non-null object \n",
|
|
" 14 metadata_targetEl 24 non-null object \n",
|
|
" 15 metadata_roomType 5 non-null object \n",
|
|
" 16 metadata_price 5 non-null float64\n",
|
|
" 17 metadata_nights 5 non-null float64\n",
|
|
" 18 metadata_targetUrl 4 non-null object \n",
|
|
"dtypes: float64(4), object(15)\n",
|
|
"memory usage: 78.5+ KB\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"KAFKA_PORT=os.getenv(\"KAFKA_PORT\", 9092)\n",
|
|
"topic = \"user-interactions\"\n",
|
|
"consumer = KafkaConsumer(\n",
|
|
" topic, \n",
|
|
" enable_auto_commit=True,\n",
|
|
" value_deserializer=lambda x: json.loads(x.decode('utf-8')),\n",
|
|
" auto_offset_reset='earliest',\n",
|
|
" bootstrap_servers=['localhost:9092'])\n",
|
|
"messages=consumer.poll(timeout_ms=1000,max_records=10000)\n",
|
|
"df = []\n",
|
|
"for m in messages.values():\n",
|
|
" for i in m:\n",
|
|
" df.append(i.value)\n",
|
|
"df = pd.DataFrame(df)\n",
|
|
"# explode metadata col json\n",
|
|
"df = df.join(pd.json_normalize(df.pop(\"metadata\"), sep=\".\").add_prefix(\"metadata_\"))\n",
|
|
"df.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "f6819a1c-32ab-49c7-845b-5df7bf60f561",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>sessionId</th>\n",
|
|
" <th>eventType</th>\n",
|
|
" <th>ts</th>\n",
|
|
" <th>targetEl</th>\n",
|
|
" <th>eventName</th>\n",
|
|
" <th>page</th>\n",
|
|
" <th>storeMode</th>\n",
|
|
" <th>userAgent</th>\n",
|
|
" <th>productId</th>\n",
|
|
" <th>metadata_path</th>\n",
|
|
" <th>metadata_referrer</th>\n",
|
|
" <th>metadata_x</th>\n",
|
|
" <th>metadata_y</th>\n",
|
|
" <th>metadata_event</th>\n",
|
|
" <th>metadata_targetEl</th>\n",
|
|
" <th>metadata_roomType</th>\n",
|
|
" <th>metadata_price</th>\n",
|
|
" <th>metadata_nights</th>\n",
|
|
" <th>metadata_targetUrl</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1762434923440-66hdhq8qicd</td>\n",
|
|
" <td>pageview</td>\n",
|
|
" <td>1762434924107</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td></td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>1762434923440-66hdhq8qicd</td>\n",
|
|
" <td>click</td>\n",
|
|
" <td>1762434925198</td>\n",
|
|
" <td>DIV</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>1098.0</td>\n",
|
|
" <td>663.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>1762434923440-66hdhq8qicd</td>\n",
|
|
" <td>click</td>\n",
|
|
" <td>1762434925371</td>\n",
|
|
" <td>MAIN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>1098.0</td>\n",
|
|
" <td>663.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>1762434923440-66hdhq8qicd</td>\n",
|
|
" <td>pageview</td>\n",
|
|
" <td>1762437192910</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td></td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>1762434923440-66hdhq8qicd</td>\n",
|
|
" <td>pageview</td>\n",
|
|
" <td>1762437198539</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td></td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>390</th>\n",
|
|
" <td>d423ce8a-77aa-4c9a-94d4-d1adddcc3472</td>\n",
|
|
" <td>click</td>\n",
|
|
" <td>1762443115648</td>\n",
|
|
" <td>DIV</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>245.0</td>\n",
|
|
" <td>595.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>391</th>\n",
|
|
" <td>d423ce8a-77aa-4c9a-94d4-d1adddcc3472</td>\n",
|
|
" <td>click</td>\n",
|
|
" <td>1762443174606</td>\n",
|
|
" <td>DIV</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>475.0</td>\n",
|
|
" <td>428.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>392</th>\n",
|
|
" <td>d423ce8a-77aa-4c9a-94d4-d1adddcc3472</td>\n",
|
|
" <td>click</td>\n",
|
|
" <td>1762443183406</td>\n",
|
|
" <td>INPUT</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>832.0</td>\n",
|
|
" <td>219.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>393</th>\n",
|
|
" <td>d423ce8a-77aa-4c9a-94d4-d1adddcc3472</td>\n",
|
|
" <td>click</td>\n",
|
|
" <td>1762443208588</td>\n",
|
|
" <td>DIV</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>485.0</td>\n",
|
|
" <td>155.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>394</th>\n",
|
|
" <td>d423ce8a-77aa-4c9a-94d4-d1adddcc3472</td>\n",
|
|
" <td>click</td>\n",
|
|
" <td>1762443225474</td>\n",
|
|
" <td>DIV</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>281.0</td>\n",
|
|
" <td>281.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>407</th>\n",
|
|
" <td>1762444018243-0120z6z5u42f</td>\n",
|
|
" <td>pageview</td>\n",
|
|
" <td>1762444018256</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td></td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>408</th>\n",
|
|
" <td>1762444018243-0120z6z5u42f</td>\n",
|
|
" <td>click</td>\n",
|
|
" <td>1762445774344</td>\n",
|
|
" <td>DIV</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>299.0</td>\n",
|
|
" <td>214.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>431</th>\n",
|
|
" <td>214d9fad-9b00-40c3-bd0e-7739b6acd654</td>\n",
|
|
" <td>pageview</td>\n",
|
|
" <td>1762448190973</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td></td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>432</th>\n",
|
|
" <td>214d9fad-9b00-40c3-bd0e-7739b6acd654</td>\n",
|
|
" <td>click</td>\n",
|
|
" <td>1762448192425</td>\n",
|
|
" <td>DIV</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>1623.0</td>\n",
|
|
" <td>493.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>433</th>\n",
|
|
" <td>214d9fad-9b00-40c3-bd0e-7739b6acd654</td>\n",
|
|
" <td>click</td>\n",
|
|
" <td>1762448192645</td>\n",
|
|
" <td>DIV</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>1623.0</td>\n",
|
|
" <td>493.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>434</th>\n",
|
|
" <td>214d9fad-9b00-40c3-bd0e-7739b6acd654</td>\n",
|
|
" <td>pageview</td>\n",
|
|
" <td>1762448205850</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td></td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>435</th>\n",
|
|
" <td>214d9fad-9b00-40c3-bd0e-7739b6acd654</td>\n",
|
|
" <td>click</td>\n",
|
|
" <td>1762448207922</td>\n",
|
|
" <td>DIV</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>421.0</td>\n",
|
|
" <td>216.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>438</th>\n",
|
|
" <td>f0d40ca6-c1d3-4ecd-beb3-796adc74349d</td>\n",
|
|
" <td>pageview</td>\n",
|
|
" <td>1762448283244</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td></td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>439</th>\n",
|
|
" <td>f0d40ca6-c1d3-4ecd-beb3-796adc74349d</td>\n",
|
|
" <td>click</td>\n",
|
|
" <td>1762448295524</td>\n",
|
|
" <td>HTML</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>614.0</td>\n",
|
|
" <td>720.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>440</th>\n",
|
|
" <td>f0d40ca6-c1d3-4ecd-beb3-796adc74349d</td>\n",
|
|
" <td>click</td>\n",
|
|
" <td>1762448342763</td>\n",
|
|
" <td>DIV</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>416.0</td>\n",
|
|
" <td>397.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>441</th>\n",
|
|
" <td>f0d40ca6-c1d3-4ecd-beb3-796adc74349d</td>\n",
|
|
" <td>pageview</td>\n",
|
|
" <td>1762448343396</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td></td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>442</th>\n",
|
|
" <td>f0d40ca6-c1d3-4ecd-beb3-796adc74349d</td>\n",
|
|
" <td>click</td>\n",
|
|
" <td>1762448829631</td>\n",
|
|
" <td>DIV</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>45.0</td>\n",
|
|
" <td>44.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" sessionId eventType ts targetEl \\\n",
|
|
"0 1762434923440-66hdhq8qicd pageview 1762434924107 NaN \n",
|
|
"1 1762434923440-66hdhq8qicd click 1762434925198 DIV \n",
|
|
"2 1762434923440-66hdhq8qicd click 1762434925371 MAIN \n",
|
|
"3 1762434923440-66hdhq8qicd pageview 1762437192910 NaN \n",
|
|
"4 1762434923440-66hdhq8qicd pageview 1762437198539 NaN \n",
|
|
"390 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 click 1762443115648 DIV \n",
|
|
"391 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 click 1762443174606 DIV \n",
|
|
"392 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 click 1762443183406 INPUT \n",
|
|
"393 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 click 1762443208588 DIV \n",
|
|
"394 d423ce8a-77aa-4c9a-94d4-d1adddcc3472 click 1762443225474 DIV \n",
|
|
"407 1762444018243-0120z6z5u42f pageview 1762444018256 NaN \n",
|
|
"408 1762444018243-0120z6z5u42f click 1762445774344 DIV \n",
|
|
"431 214d9fad-9b00-40c3-bd0e-7739b6acd654 pageview 1762448190973 NaN \n",
|
|
"432 214d9fad-9b00-40c3-bd0e-7739b6acd654 click 1762448192425 DIV \n",
|
|
"433 214d9fad-9b00-40c3-bd0e-7739b6acd654 click 1762448192645 DIV \n",
|
|
"434 214d9fad-9b00-40c3-bd0e-7739b6acd654 pageview 1762448205850 NaN \n",
|
|
"435 214d9fad-9b00-40c3-bd0e-7739b6acd654 click 1762448207922 DIV \n",
|
|
"438 f0d40ca6-c1d3-4ecd-beb3-796adc74349d pageview 1762448283244 NaN \n",
|
|
"439 f0d40ca6-c1d3-4ecd-beb3-796adc74349d click 1762448295524 HTML \n",
|
|
"440 f0d40ca6-c1d3-4ecd-beb3-796adc74349d click 1762448342763 DIV \n",
|
|
"441 f0d40ca6-c1d3-4ecd-beb3-796adc74349d pageview 1762448343396 NaN \n",
|
|
"442 f0d40ca6-c1d3-4ecd-beb3-796adc74349d click 1762448829631 DIV \n",
|
|
"\n",
|
|
" eventName page storeMode userAgent productId metadata_path \\\n",
|
|
"0 NaN NaN NaN NaN NaN / \n",
|
|
"1 NaN NaN NaN NaN NaN / \n",
|
|
"2 NaN NaN NaN NaN NaN / \n",
|
|
"3 NaN NaN NaN NaN NaN / \n",
|
|
"4 NaN NaN NaN NaN NaN / \n",
|
|
"390 NaN NaN NaN NaN NaN / \n",
|
|
"391 NaN NaN NaN NaN NaN / \n",
|
|
"392 NaN NaN NaN NaN NaN / \n",
|
|
"393 NaN NaN NaN NaN NaN / \n",
|
|
"394 NaN NaN NaN NaN NaN / \n",
|
|
"407 NaN NaN NaN NaN NaN / \n",
|
|
"408 NaN NaN NaN NaN NaN / \n",
|
|
"431 NaN NaN NaN NaN NaN / \n",
|
|
"432 NaN NaN NaN NaN NaN / \n",
|
|
"433 NaN NaN NaN NaN NaN / \n",
|
|
"434 NaN NaN NaN NaN NaN / \n",
|
|
"435 NaN NaN NaN NaN NaN / \n",
|
|
"438 NaN NaN NaN NaN NaN / \n",
|
|
"439 NaN NaN NaN NaN NaN / \n",
|
|
"440 NaN NaN NaN NaN NaN / \n",
|
|
"441 NaN NaN NaN NaN NaN / \n",
|
|
"442 NaN NaN NaN NaN NaN / \n",
|
|
"\n",
|
|
" metadata_referrer metadata_x metadata_y metadata_event \\\n",
|
|
"0 NaN NaN NaN \n",
|
|
"1 NaN 1098.0 663.0 NaN \n",
|
|
"2 NaN 1098.0 663.0 NaN \n",
|
|
"3 NaN NaN NaN \n",
|
|
"4 NaN NaN NaN \n",
|
|
"390 NaN 245.0 595.0 NaN \n",
|
|
"391 NaN 475.0 428.0 NaN \n",
|
|
"392 NaN 832.0 219.0 NaN \n",
|
|
"393 NaN 485.0 155.0 NaN \n",
|
|
"394 NaN 281.0 281.0 NaN \n",
|
|
"407 NaN NaN NaN \n",
|
|
"408 NaN 299.0 214.0 NaN \n",
|
|
"431 NaN NaN NaN \n",
|
|
"432 NaN 1623.0 493.0 NaN \n",
|
|
"433 NaN 1623.0 493.0 NaN \n",
|
|
"434 NaN NaN NaN \n",
|
|
"435 NaN 421.0 216.0 NaN \n",
|
|
"438 NaN NaN NaN \n",
|
|
"439 NaN 614.0 720.0 NaN \n",
|
|
"440 NaN 416.0 397.0 NaN \n",
|
|
"441 NaN NaN NaN \n",
|
|
"442 NaN 45.0 44.0 NaN \n",
|
|
"\n",
|
|
" metadata_targetEl metadata_roomType metadata_price metadata_nights \\\n",
|
|
"0 NaN NaN NaN NaN \n",
|
|
"1 NaN NaN NaN NaN \n",
|
|
"2 NaN NaN NaN NaN \n",
|
|
"3 NaN NaN NaN NaN \n",
|
|
"4 NaN NaN NaN NaN \n",
|
|
"390 NaN NaN NaN NaN \n",
|
|
"391 NaN NaN NaN NaN \n",
|
|
"392 NaN NaN NaN NaN \n",
|
|
"393 NaN NaN NaN NaN \n",
|
|
"394 NaN NaN NaN NaN \n",
|
|
"407 NaN NaN NaN NaN \n",
|
|
"408 NaN NaN NaN NaN \n",
|
|
"431 NaN NaN NaN NaN \n",
|
|
"432 NaN NaN NaN NaN \n",
|
|
"433 NaN NaN NaN NaN \n",
|
|
"434 NaN NaN NaN NaN \n",
|
|
"435 NaN NaN NaN NaN \n",
|
|
"438 NaN NaN NaN NaN \n",
|
|
"439 NaN NaN NaN NaN \n",
|
|
"440 NaN NaN NaN NaN \n",
|
|
"441 NaN NaN NaN NaN \n",
|
|
"442 NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" metadata_targetUrl \n",
|
|
"0 NaN \n",
|
|
"1 NaN \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"390 NaN \n",
|
|
"391 NaN \n",
|
|
"392 NaN \n",
|
|
"393 NaN \n",
|
|
"394 NaN \n",
|
|
"407 NaN \n",
|
|
"408 NaN \n",
|
|
"431 NaN \n",
|
|
"432 NaN \n",
|
|
"433 NaN \n",
|
|
"434 NaN \n",
|
|
"435 NaN \n",
|
|
"438 NaN \n",
|
|
"439 NaN \n",
|
|
"440 NaN \n",
|
|
"441 NaN \n",
|
|
"442 NaN "
|
|
]
|
|
},
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"df.groupby('sessionId').head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"id": "380eca5f-8304-4fb2-be32-e8bcfd312085",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"['214d9fad-9b00-40c3-bd0e-7739b6acd654',\n",
|
|
" '1762444018243-0120z6z5u42f',\n",
|
|
" 'f0d40ca6-c1d3-4ecd-beb3-796adc74349d',\n",
|
|
" 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',\n",
|
|
" '1762434923440-66hdhq8qicd']"
|
|
]
|
|
},
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"sessions = list(set(df['sessionId'])); sessions"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"id": "f4ae6f81-dcb8-44be-aee7-30dbc3a6bae1",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# map sessions to experiments"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"id": "050d90a4-20a9-47f5-b998-c31178a54cb3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def build_transition_prob_matrix(df: pd.DataFrame):\n",
|
|
" df = df.dropna(subset=['eventName'])\n",
|
|
" events = df['eventName'].tolist()\n",
|
|
" labels = pd.Index(events).unique().tolist()\n",
|
|
" idx = {e:i for i,e in enumerate(labels)}\n",
|
|
" M = np.zeros((len(labels), len(labels)), dtype=float)\n",
|
|
" for a, b in zip(events, events[1:]):\n",
|
|
" M[idx[a], idx[b]] += 1\n",
|
|
" row_sums = M.sum(axis=1, keepdims=True)\n",
|
|
" with np.errstate(divide='ignore', invalid='ignore'):\n",
|
|
" P = np.divide(M, row_sums, where=row_sums>0) # row-normalized\n",
|
|
" return P, labels"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"id": "e68f9004-82f5-4826-aece-e3dc6e15a18f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# https://medium.com/data-science/time-series-data-markov-transition-matrices-7060771e362b\n",
|
|
"from graphviz import Digraph\n",
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"\n",
|
|
"def _as_prob_df(matrix, labels=None):\n",
|
|
" \"\"\"Return a square DataFrame with index=columns=labels.\"\"\"\n",
|
|
" if isinstance(matrix, pd.DataFrame):\n",
|
|
" # Ensure square and aligned\n",
|
|
" assert (matrix.index == matrix.columns).all(), \"Index/columns must match.\"\n",
|
|
" return matrix\n",
|
|
" matrix = np.asarray(matrix, dtype=float)\n",
|
|
" assert matrix.shape[0] == matrix.shape[1], \"Matrix must be square.\"\n",
|
|
" if labels is None:\n",
|
|
" raise ValueError(\"labels are required when matrix is not a DataFrame\")\n",
|
|
" assert len(labels) == matrix.shape[0], \"labels length must match matrix size.\"\n",
|
|
" return pd.DataFrame(matrix, index=list(labels), columns=list(labels))\n",
|
|
"\n",
|
|
"def _df_to_edgelist(P: pd.DataFrame, threshold=0.0, round_digits=2):\n",
|
|
" \"\"\"Build weighted edges > threshold.\"\"\"\n",
|
|
" edges = []\n",
|
|
" for src in P.index:\n",
|
|
" for dst in P.columns:\n",
|
|
" w = float(P.loc[src, dst])\n",
|
|
" if w > threshold:\n",
|
|
" edges.append((str(src), str(dst), f\"{w:.{round_digits}f}\"))\n",
|
|
" return edges\n",
|
|
"\n",
|
|
"def render_graph(fname, matrix, ls_index=None, threshold=0.0, fmt=\"svg\", view=False):\n",
|
|
" \"\"\"\n",
|
|
" fname: output file stem (no extension)\n",
|
|
" matrix: NumPy array or pandas DataFrame of transition PROBABILITIES\n",
|
|
" ls_index: ordered labels (required if matrix is not a DataFrame)\n",
|
|
" threshold: hide edges with weight <= threshold\n",
|
|
" fmt: 'svg'|'png'|'pdf' etc.\n",
|
|
" view: open after rendering\n",
|
|
" \"\"\"\n",
|
|
" P = _as_prob_df(matrix, labels=ls_index)\n",
|
|
" edges = _df_to_edgelist(P, threshold=threshold)\n",
|
|
"\n",
|
|
" g = Digraph(format=fmt)\n",
|
|
" g.attr(rankdir=\"LR\", size=\"30\")\n",
|
|
" g.attr(\"node\", shape=\"circle\")\n",
|
|
"\n",
|
|
" # ensure isolated nodes appear\n",
|
|
" for node in P.index:\n",
|
|
" g.node(str(node), width=\"1\", height=\"1\")\n",
|
|
"\n",
|
|
" for src, dst, label in edges:\n",
|
|
" g.edge(src, dst, label=label)\n",
|
|
"\n",
|
|
" g.render(fname, view=view, cleanup=True)\n",
|
|
" return g\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"id": "e255a2c1-6454-4e5e-89f6-ef8ac51ab6cc",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"image/svg+xml": [
|
|
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
|
|
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
|
|
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
|
|
"<!-- Generated by graphviz version 13.1.2 (0)\n",
|
|
" -->\n",
|
|
"<!-- Pages: 1 -->\n",
|
|
"<svg width=\"8pt\" height=\"8pt\"\n",
|
|
" viewBox=\"0.00 0.00 8.00 8.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
|
|
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 4)\">\n",
|
|
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-4 4,-4 4,4 -4,4\"/>\n",
|
|
"</g>\n",
|
|
"</svg>\n"
|
|
],
|
|
"text/plain": [
|
|
"<graphviz.graphs.Digraph at 0x7f0779e814f0>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[]\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"image/svg+xml": [
|
|
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
|
|
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
|
|
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
|
|
"<!-- Generated by graphviz version 13.1.2 (0)\n",
|
|
" -->\n",
|
|
"<!-- Pages: 1 -->\n",
|
|
"<svg width=\"8pt\" height=\"8pt\"\n",
|
|
" viewBox=\"0.00 0.00 8.00 8.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
|
|
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 4)\">\n",
|
|
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-4 4,-4 4,4 -4,4\"/>\n",
|
|
"</g>\n",
|
|
"</svg>\n"
|
|
],
|
|
"text/plain": [
|
|
"<graphviz.graphs.Digraph at 0x7f0779e818b0>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[]\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"image/svg+xml": [
|
|
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
|
|
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
|
|
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
|
|
"<!-- Generated by graphviz version 13.1.2 (0)\n",
|
|
" -->\n",
|
|
"<!-- Pages: 1 -->\n",
|
|
"<svg width=\"8pt\" height=\"8pt\"\n",
|
|
" viewBox=\"0.00 0.00 8.00 8.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
|
|
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 4)\">\n",
|
|
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-4 4,-4 4,4 -4,4\"/>\n",
|
|
"</g>\n",
|
|
"</svg>\n"
|
|
],
|
|
"text/plain": [
|
|
"<graphviz.graphs.Digraph at 0x7f0779e6e5f0>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[]\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"image/svg+xml": [
|
|
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
|
|
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
|
|
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
|
|
"<!-- Generated by graphviz version 13.1.2 (0)\n",
|
|
" -->\n",
|
|
"<!-- Pages: 1 -->\n",
|
|
"<svg width=\"624pt\" height=\"205pt\"\n",
|
|
" viewBox=\"0.00 0.00 624.00 205.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
|
|
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 200.8)\">\n",
|
|
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-200.8 620.47,-200.8 620.47,4 -4,4\"/>\n",
|
|
"<!-- page_view -->\n",
|
|
"<g id=\"node1\" class=\"node\">\n",
|
|
"<title>page_view</title>\n",
|
|
"<ellipse fill=\"none\" stroke=\"black\" cx=\"48.19\" cy=\"-80.4\" rx=\"48.19\" ry=\"48.19\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"48.19\" y=\"-75.73\" font-family=\"Times,serif\" font-size=\"14.00\">page_view</text>\n",
|
|
"</g>\n",
|
|
"<!-- page_view->page_view -->\n",
|
|
"<g id=\"edge1\" class=\"edge\">\n",
|
|
"<title>page_view->page_view</title>\n",
|
|
"<path fill=\"none\" stroke=\"black\" d=\"M33.03,-126.49C34.09,-137.99 39.14,-146.59 48.19,-146.59 53.98,-146.59 58.13,-143.06 60.65,-137.5\"/>\n",
|
|
"<polygon fill=\"black\" stroke=\"black\" points=\"64.01,-138.5 62.98,-127.96 57.21,-136.84 64.01,-138.5\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"48.19\" y=\"-150.54\" font-family=\"Times,serif\" font-size=\"14.00\">0.70</text>\n",
|
|
"</g>\n",
|
|
"<!-- click -->\n",
|
|
"<g id=\"node2\" class=\"node\">\n",
|
|
"<title>click</title>\n",
|
|
"<ellipse fill=\"none\" stroke=\"black\" cx=\"199.87\" cy=\"-120.4\" rx=\"36\" ry=\"36\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"199.87\" y=\"-115.73\" font-family=\"Times,serif\" font-size=\"14.00\">click</text>\n",
|
|
"</g>\n",
|
|
"<!-- page_view->click -->\n",
|
|
"<g id=\"edge2\" class=\"edge\">\n",
|
|
"<title>page_view->click</title>\n",
|
|
"<path fill=\"none\" stroke=\"black\" d=\"M95.1,-93.17C101.57,-94.94 108.13,-96.73 114.37,-98.4 127.17,-101.83 141.07,-105.49 153.84,-108.81\"/>\n",
|
|
"<polygon fill=\"black\" stroke=\"black\" points=\"152.69,-112.13 163.25,-111.25 154.45,-105.35 152.69,-112.13\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"130.12\" y=\"-110.33\" font-family=\"Times,serif\" font-size=\"14.00\">0.17</text>\n",
|
|
"</g>\n",
|
|
"<!-- product_hover -->\n",
|
|
"<g id=\"node3\" class=\"node\">\n",
|
|
"<title>product_hover</title>\n",
|
|
"<ellipse fill=\"none\" stroke=\"black\" cx=\"366.77\" cy=\"-63.4\" rx=\"63.4\" ry=\"63.4\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"366.77\" y=\"-58.73\" font-family=\"Times,serif\" font-size=\"14.00\">product_hover</text>\n",
|
|
"</g>\n",
|
|
"<!-- page_view->product_hover -->\n",
|
|
"<g id=\"edge3\" class=\"edge\">\n",
|
|
"<title>page_view->product_hover</title>\n",
|
|
"<path fill=\"none\" stroke=\"black\" d=\"M95.82,-71.03C102.05,-69.8 108.37,-68.56 114.37,-67.4 136.35,-63.16 141.59,-60.22 163.87,-58.15 206.29,-54.22 253.93,-55.21 292.27,-57.32\"/>\n",
|
|
"<polygon fill=\"black\" stroke=\"black\" points=\"291.64,-60.79 301.83,-57.89 292.06,-53.8 291.64,-60.79\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"199.87\" y=\"-62.1\" font-family=\"Times,serif\" font-size=\"14.00\">0.13</text>\n",
|
|
"</g>\n",
|
|
"<!-- click->page_view -->\n",
|
|
"<g id=\"edge4\" class=\"edge\">\n",
|
|
"<title>click->page_view</title>\n",
|
|
"<path fill=\"none\" stroke=\"black\" d=\"M174.43,-94.64C166.11,-87.58 156.27,-80.86 145.87,-77.15 133.76,-72.84 120.24,-71.39 107.24,-71.49\"/>\n",
|
|
"<polygon fill=\"black\" stroke=\"black\" points=\"107.37,-67.98 97.51,-71.88 107.65,-74.98 107.37,-67.98\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"130.12\" y=\"-81.1\" font-family=\"Times,serif\" font-size=\"14.00\">0.35</text>\n",
|
|
"</g>\n",
|
|
"<!-- click->click -->\n",
|
|
"<g id=\"edge5\" class=\"edge\">\n",
|
|
"<title>click->click</title>\n",
|
|
"<path fill=\"none\" stroke=\"black\" d=\"M186.79,-154.41C186.86,-165.54 191.22,-174.4 199.87,-174.4 205.28,-174.4 209.01,-170.94 211.06,-165.63\"/>\n",
|
|
"<polygon fill=\"black\" stroke=\"black\" points=\"214.49,-166.35 212.7,-155.91 207.59,-165.19 214.49,-166.35\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"199.87\" y=\"-178.35\" font-family=\"Times,serif\" font-size=\"14.00\">0.41</text>\n",
|
|
"</g>\n",
|
|
"<!-- click->product_hover -->\n",
|
|
"<g id=\"edge6\" class=\"edge\">\n",
|
|
"<title>click->product_hover</title>\n",
|
|
"<path fill=\"none\" stroke=\"black\" d=\"M235.36,-112.71C250.82,-108.94 269.18,-104 285.37,-98.4 289.54,-96.96 293.8,-95.4 298.07,-93.76\"/>\n",
|
|
"<polygon fill=\"black\" stroke=\"black\" points=\"299.26,-97.05 307.26,-90.11 296.68,-90.55 299.26,-97.05\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"269.62\" y=\"-111.45\" font-family=\"Times,serif\" font-size=\"14.00\">0.24</text>\n",
|
|
"</g>\n",
|
|
"<!-- product_hover->click -->\n",
|
|
"<g id=\"edge7\" class=\"edge\">\n",
|
|
"<title>product_hover->click</title>\n",
|
|
"<path fill=\"none\" stroke=\"black\" d=\"M303.17,-64.87C286.65,-66.91 269.17,-70.62 253.87,-77.15 247.28,-79.97 240.84,-83.86 234.86,-88.16\"/>\n",
|
|
"<polygon fill=\"black\" stroke=\"black\" points=\"232.81,-85.33 227.08,-94.24 237.11,-90.85 232.81,-85.33\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"269.62\" y=\"-81.1\" font-family=\"Times,serif\" font-size=\"14.00\">0.07</text>\n",
|
|
"</g>\n",
|
|
"<!-- product_hover->product_hover -->\n",
|
|
"<g id=\"edge8\" class=\"edge\">\n",
|
|
"<title>product_hover->product_hover</title>\n",
|
|
"<path fill=\"none\" stroke=\"black\" d=\"M349.43,-124.81C351.5,-136.52 357.28,-144.8 366.77,-144.8 373,-144.8 377.63,-141.24 380.66,-135.48\"/>\n",
|
|
"<polygon fill=\"black\" stroke=\"black\" points=\"383.9,-136.85 383.65,-126.25 377.24,-134.69 383.9,-136.85\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"366.77\" y=\"-148.75\" font-family=\"Times,serif\" font-size=\"14.00\">0.60</text>\n",
|
|
"</g>\n",
|
|
"<!-- product_view -->\n",
|
|
"<g id=\"node4\" class=\"node\">\n",
|
|
"<title>product_view</title>\n",
|
|
"<ellipse fill=\"none\" stroke=\"black\" cx=\"557.07\" cy=\"-131.4\" rx=\"59.4\" ry=\"59.4\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"557.07\" y=\"-126.73\" font-family=\"Times,serif\" font-size=\"14.00\">product_view</text>\n",
|
|
"</g>\n",
|
|
"<!-- product_hover->product_view -->\n",
|
|
"<g id=\"edge9\" class=\"edge\">\n",
|
|
"<title>product_hover->product_view</title>\n",
|
|
"<path fill=\"none\" stroke=\"black\" d=\"M426.83,-84.73C446.95,-91.99 469.56,-100.16 490.17,-107.6\"/>\n",
|
|
"<polygon fill=\"black\" stroke=\"black\" points=\"488.73,-110.8 499.32,-110.91 491.11,-104.22 488.73,-110.8\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"463.92\" y=\"-107.44\" font-family=\"Times,serif\" font-size=\"14.00\">0.33</text>\n",
|
|
"</g>\n",
|
|
"<!-- product_view->click -->\n",
|
|
"<g id=\"edge10\" class=\"edge\">\n",
|
|
"<title>product_view->click</title>\n",
|
|
"<path fill=\"none\" stroke=\"black\" d=\"M501.18,-152.88C479.43,-160.32 453.97,-167.71 430.17,-171.4 374.48,-180.04 358.23,-184.3 303.37,-171.4 281.33,-166.22 258.61,-155.55 240.18,-145.31\"/>\n",
|
|
"<polygon fill=\"black\" stroke=\"black\" points=\"242.14,-142.4 231.73,-140.45 238.65,-148.47 242.14,-142.4\"/>\n",
|
|
"<text xml:space=\"preserve\" text-anchor=\"middle\" x=\"366.77\" y=\"-183.5\" font-family=\"Times,serif\" font-size=\"14.00\">1.00</text>\n",
|
|
"</g>\n",
|
|
"</g>\n",
|
|
"</svg>\n"
|
|
],
|
|
"text/plain": [
|
|
"<graphviz.graphs.Digraph at 0x7f0779e6e190>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[[0.69565217 0.17391304 0.13043478 0. ]\n",
|
|
" [0.35294118 0.41176471 0.23529412 0. ]\n",
|
|
" [0. 0.06666667 0.6 0.33333333]\n",
|
|
" [0. 1. 0. 0. ]]\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"image/svg+xml": [
|
|
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
|
|
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
|
|
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
|
|
"<!-- Generated by graphviz version 13.1.2 (0)\n",
|
|
" -->\n",
|
|
"<!-- Pages: 1 -->\n",
|
|
"<svg width=\"8pt\" height=\"8pt\"\n",
|
|
" viewBox=\"0.00 0.00 8.00 8.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
|
|
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 4)\">\n",
|
|
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-4 4,-4 4,4 -4,4\"/>\n",
|
|
"</g>\n",
|
|
"</svg>\n"
|
|
],
|
|
"text/plain": [
|
|
"<graphviz.graphs.Digraph at 0x7f0779e77d40>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"def explore_session(session_id: str):\n",
|
|
" subset = df[df['sessionId'] == session_id]\n",
|
|
" P, labels = build_transition_prob_matrix(subset)\n",
|
|
" g = render_graph(f\"session_{session_id}\", P, ls_index=labels, threshold=0.01, fmt=\"svg\", view=False)\n",
|
|
" display(g)\n",
|
|
" return P\n",
|
|
"for session in sessions:\n",
|
|
" print(explore_session(session))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4d278c2d-406e-4dc0-b219-5f7b236e852b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python (PHANTOM)",
|
|
"language": "python",
|
|
"name": "phantom"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.13.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|