import pytest import random import pandas as pd from procesing.steps import ( ComputeDemandStep ) def test_compute_demand(pipeline_context): random.seed(42) # deterministic test step = ComputeDemandStep(context=pipeline_context) # Test with normal interaction data df = pd.DataFrame({ 'ts': pd.date_range(start='2023-01-01', periods=100, freq='h'), 'productId': random.choices([ 'd018efc1-25e9-4284-b276-80386e048b25', '51266ddb-5b07-47b7-89ee-5b5cae94bb11', '2cd7f756-fc65-4ba0-ab01-74521c1fff43' ], k=100), 'eventName': random.choices(['view', 'click', 'purchase'], k=100) }) result = step.transform(df) assert type(result) == pd.DataFrame assert not result.empty assert set(result['productId']) == set(pipeline_context.products['id']) assert all(result['demand_score'] > 100/3 -10) def test_compute_demand_skewed(pipeline_context): random.seed(42) # deterministic test step = ComputeDemandStep(context=pipeline_context) # Test with normal interaction data df = pd.DataFrame({ 'ts': pd.date_range(start='2023-01-01', periods=100, freq='h'), 'productId': random.choices([ 'd018efc1-25e9-4284-b276-80386e048b25', '51266ddb-5b07-47b7-89ee-5b5cae94bb11', '2cd7f756-fc65-4ba0-ab01-74521c1fff43' ], weights=[0.7, 0.2, 0.1], k=100), 'eventName': random.choices(['view', 'click', 'purchase'], k=100) }) result = step.transform(df) assert type(result) == pd.DataFrame assert not result.empty assert set(result['productId']) == set(pipeline_context.products['id']) # test for skewness scores = result.set_index('productId')['demand_score'].to_dict() assert scores['d018efc1-25e9-4284-b276-80386e048b25'] > \ scores['51266ddb-5b07-47b7-89ee-5b5cae94bb11'] > \ scores['2cd7f756-fc65-4ba0-ab01-74521c1fff43']