mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 08:33:36 +00:00
catchup: rogue scripts
This commit is contained in:
165
experiments/procesing/tests/test_session.py
Normal file
165
experiments/procesing/tests/test_session.py
Normal file
@@ -0,0 +1,165 @@
|
||||
import pytest
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from procesing.steps.session import (
|
||||
TemporalFeatureStep,
|
||||
BehavioralFeatureStep,
|
||||
ProductFeatureStep,
|
||||
UserAgentFeatureStep,
|
||||
ExtractSessionFeaturesStep,
|
||||
JoinLabelsStep,
|
||||
ValidateDataStep,
|
||||
)
|
||||
|
||||
|
||||
# TemporalFeatureStep tests
|
||||
def test_temporal_empty(pipeline_context):
|
||||
result = TemporalFeatureStep(pipeline_context).transform(pd.DataFrame())
|
||||
assert 'sessionId' in result.columns
|
||||
assert result.empty
|
||||
|
||||
|
||||
def test_temporal_basic(pipeline_context, session_interactions):
|
||||
result = TemporalFeatureStep(pipeline_context).transform(session_interactions)
|
||||
assert 'session_duration_sec' in result.columns
|
||||
assert 'interaction_velocity' in result.columns
|
||||
assert 'max_velocity_5min' in result.columns
|
||||
assert result['total_interactions'].sum() == len(session_interactions)
|
||||
|
||||
|
||||
def test_temporal_timeout(pipeline_context):
|
||||
df = pd.DataFrame({
|
||||
'sessionId': ['s1', 's1'],
|
||||
'ts': ['2025-01-01T10:00:00Z', '2025-01-01T11:00:00Z'], # 1 hour gap
|
||||
})
|
||||
result = TemporalFeatureStep(pipeline_context, timeout_sec=900).transform(df)
|
||||
assert result.iloc[0]['session_duration_sec'] == 0 # gap exceeds timeout
|
||||
|
||||
|
||||
# BehavioralFeatureStep tests
|
||||
def test_behavioral_empty(pipeline_context):
|
||||
result = BehavioralFeatureStep(pipeline_context).transform(pd.DataFrame())
|
||||
assert 'sessionId' in result.columns
|
||||
|
||||
|
||||
def test_behavioral_counts(pipeline_context, session_interactions):
|
||||
result = BehavioralFeatureStep(pipeline_context).transform(session_interactions)
|
||||
assert 'page_views' in result.columns
|
||||
assert 'item_views' in result.columns
|
||||
assert 'hover_events' in result.columns
|
||||
assert result['total_events'].sum() == len(session_interactions)
|
||||
|
||||
|
||||
def test_behavioral_hover_prefix(pipeline_context):
|
||||
df = pd.DataFrame({
|
||||
'sessionId': ['s1', 's1'],
|
||||
'eventName': ['hover_over_custom', 'hover_over_button'],
|
||||
'page': ['/products', '/products'],
|
||||
})
|
||||
result = BehavioralFeatureStep(pipeline_context).transform(df)
|
||||
assert result.iloc[0]['hover_events'] == 2
|
||||
|
||||
|
||||
# ProductFeatureStep tests
|
||||
def test_product_empty(pipeline_context):
|
||||
result = ProductFeatureStep(pipeline_context).transform(pd.DataFrame())
|
||||
assert 'sessionId' in result.columns
|
||||
|
||||
|
||||
def test_product_features(pipeline_context, session_interactions):
|
||||
result = ProductFeatureStep(pipeline_context).transform(session_interactions)
|
||||
assert 'unique_products_viewed' in result.columns
|
||||
assert 'price_range' in result.columns
|
||||
assert result['unique_products_viewed'].sum() > 0
|
||||
|
||||
|
||||
# UserAgentFeatureStep tests
|
||||
def test_ua_empty(pipeline_context):
|
||||
result = UserAgentFeatureStep(pipeline_context).transform(pd.DataFrame())
|
||||
assert 'sessionId' in result.columns
|
||||
|
||||
|
||||
def test_ua_headless_detection(pipeline_context):
|
||||
df = pd.DataFrame({
|
||||
'sessionId': ['s1', 's2'],
|
||||
'userAgent': ['Mozilla/5.0 Chrome/120', 'HeadlessChrome/120'],
|
||||
})
|
||||
result = UserAgentFeatureStep(pipeline_context).transform(df)
|
||||
assert 'is_headless' in result.columns
|
||||
headless = dict(zip(result['sessionId'], result['is_headless']))
|
||||
assert headless['s1'] == False
|
||||
assert headless['s2'] == True
|
||||
|
||||
|
||||
def test_ua_browser_family(pipeline_context):
|
||||
df = pd.DataFrame({
|
||||
'sessionId': ['s1', 's2', 's3'],
|
||||
'userAgent': ['Mozilla/5.0 Firefox/120', 'Safari/605.1.15', 'Unknown'],
|
||||
})
|
||||
result = UserAgentFeatureStep(pipeline_context).transform(df)
|
||||
browsers = dict(zip(result['sessionId'], result['browser_family']))
|
||||
assert browsers['s1'] == 'Firefox'
|
||||
assert browsers['s2'] == 'Safari'
|
||||
assert browsers['s3'] == 'Other'
|
||||
|
||||
|
||||
def test_ua_automation_detection(pipeline_context):
|
||||
df = pd.DataFrame({
|
||||
'sessionId': ['s1', 's2'],
|
||||
'userAgent': ['Selenium WebDriver', 'Normal Chrome/120'],
|
||||
})
|
||||
result = UserAgentFeatureStep(pipeline_context).transform(df)
|
||||
auto = dict(zip(result['sessionId'], result['is_automation']))
|
||||
assert auto['s1'] == True
|
||||
assert auto['s2'] == False
|
||||
|
||||
|
||||
# ExtractSessionFeaturesStep tests
|
||||
def test_extract_empty(pipeline_context):
|
||||
result = ExtractSessionFeaturesStep(pipeline_context).transform(pd.DataFrame())
|
||||
assert result.empty
|
||||
|
||||
|
||||
def test_extract_merges_all(pipeline_context, session_interactions):
|
||||
result = ExtractSessionFeaturesStep(pipeline_context).transform(session_interactions)
|
||||
expected = ['session_duration_sec', 'total_events', 'unique_products_viewed', 'is_headless']
|
||||
for col in expected:
|
||||
assert col in result.columns
|
||||
assert 'experimentId' in result.columns
|
||||
|
||||
|
||||
# JoinLabelsStep tests
|
||||
def test_join_labels_tuple_input(pipeline_context):
|
||||
features = pd.DataFrame({'sessionId': ['s1'], 'experimentId': ['exp1'], 'total_events': [5]})
|
||||
experiments = pd.DataFrame({'id': ['exp1'], 'xp_human_only': [True]})
|
||||
result = JoinLabelsStep(pipeline_context).transform((features, experiments))
|
||||
assert 'is_agent' in result.columns
|
||||
assert result.iloc[0]['is_agent'] == False
|
||||
|
||||
|
||||
def test_join_labels_empty_experiments(pipeline_context):
|
||||
features = pd.DataFrame({'sessionId': ['s1'], 'experimentId': ['exp1']})
|
||||
result = JoinLabelsStep(pipeline_context).transform((features, pd.DataFrame()))
|
||||
assert pd.isna(result.iloc[0]['is_agent'])
|
||||
|
||||
|
||||
# ValidateDataStep tests
|
||||
def test_validate_empty(pipeline_context):
|
||||
ValidateDataStep(pipeline_context).transform(pd.DataFrame())
|
||||
report = pipeline_context.get_cached('validation_report')
|
||||
assert report['status'] == 'empty'
|
||||
|
||||
|
||||
def test_validate_missing_cols(pipeline_context):
|
||||
df = pd.DataFrame({'sessionId': ['s1'], 'ts': ['2025-01-01']})
|
||||
ValidateDataStep(pipeline_context).transform(df)
|
||||
report = pipeline_context.get_cached('validation_report')
|
||||
assert report['status'] == 'invalid'
|
||||
assert 'eventName' in report['missing_cols']
|
||||
|
||||
|
||||
def test_validate_valid(pipeline_context, session_interactions):
|
||||
ValidateDataStep(pipeline_context).transform(session_interactions)
|
||||
report = pipeline_context.get_cached('validation_report')
|
||||
assert report['status'] == 'valid'
|
||||
assert report['sessions'] > 0
|
||||
Reference in New Issue
Block a user