test: started with pipeline step testing

This commit is contained in:
2025-11-28 16:20:17 +01:00
parent e79edf2ef3
commit b5c71e713b
6 changed files with 376 additions and 0 deletions

View File

@@ -0,0 +1,45 @@
import pytest
import random
import pandas as pd
from procesing.steps import (
CreatePriceBucketsStep,
AugmentEventNamesStep
)
def test_bucketing(pipeline_context):
step = CreatePriceBucketsStep(context=pipeline_context)
# Test with normal price data
df = pd.DataFrame({
'metadata_price': random.sample(range(10, 1000), 100)
})
result = step.transform(df)
assert 'price_bucket' in result.columns
# test if is categorical
assert isinstance(result['price_bucket'].dtype, pd.CategoricalDtype)
assert result['price_bucket'].nunique() == 3 # as per context config
# distribution check
counts = result['price_bucket'].value_counts()
assert all(counts > 0)
assert counts.max() - counts.min() <= 10 # roughly equal distribution for 100 samples
# Test with empty DataFrame
df = pd.DataFrame()
result = step.transform(df)
assert 'price_bucket' in result.columns
assert result.empty
def test_augment_names(pipeline_context):
df = pd.DataFrame({
'eventName': ['click', 'view', 'purchase'],
'productId': ['prod_1', 'prod_2', None],
'price_bucket': ['PB_1', None, 'PB_3']
})
step = AugmentEventNamesStep(context=pipeline_context)
result = step.transform(df)
expected_event_names = [
'click_prod_1@PB_1',
'view',
'purchase'
]
assert result['eventName'].tolist() == expected_event_names