chore: refactoring function definition to avoid reinit

This commit is contained in:
2025-12-12 12:11:08 +01:00
parent b28f3206a7
commit 92c84f5419

View File

@@ -21,6 +21,13 @@ BROWSER_PATTERNS = [('Chrome', r'Chrome/[\d.]+'), ('Firefox', r'Firefox/[\d.]+')
('Safari', r'Safari/[\d.]+'), ('Edge', r'Edg/[\d.]+')] ('Safari', r'Safari/[\d.]+'), ('Edge', r'Edg/[\d.]+')]
def _get_browser(s: str) -> str:
if pd.isna(s): return 'Unknown'
for name, pat in BROWSER_PATTERNS:
if re.search(pat, s): return name
return 'Other'
class TemporalFeatureStep(BaseContextStep): class TemporalFeatureStep(BaseContextStep):
"""Vectorized time-based features: durations, velocities, gaps.""" """Vectorized time-based features: durations, velocities, gaps."""
@@ -119,13 +126,7 @@ class UserAgentFeatureStep(BaseContextStep):
ua = df.groupby('sessionId')['userAgent'].first().reset_index() ua = df.groupby('sessionId')['userAgent'].first().reset_index()
ua['is_headless'] = ua['userAgent'].str.contains(HEADLESS_RE, na=False) ua['is_headless'] = ua['userAgent'].str.contains(HEADLESS_RE, na=False)
ua['is_automation'] = ua['userAgent'].str.contains(AUTOMATION_RE, na=False) ua['is_automation'] = ua['userAgent'].str.contains(AUTOMATION_RE, na=False)
ua['browser_family'] = ua['userAgent'].apply(_get_browser)
def get_browser(s):
if pd.isna(s): return 'Unknown'
for name, pat in BROWSER_PATTERNS:
if re.search(pat, s): return name
return 'Other'
ua['browser_family'] = ua['userAgent'].apply(get_browser)
return ua[['sessionId', 'is_headless', 'is_automation', 'browser_family']] return ua[['sessionId', 'is_headless', 'is_automation', 'browser_family']]