mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 16:43:36 +00:00
Airflow addition (#28)
* introducing airflow to run pipeline * chore: updating dag with upload to registry * introducing complete provider (non refactored and noisy) * chore: removing old shit * generic pricing baselines * feature: super simple model registry (to be updated maybe third party OS software) * chore: refactoring the providers docker config and requirements * chore: refactored and broke down components (braking * exporting all * local pipeline excution working * fix: fixing import structures from nonrelativistic * chore: enables cross comm pickling with fully e2e pipeline compilation * docs: what the pipeline is like now * pipelines local running and pipeline high level definition * cleaning old pipeline and vectorization * leaked but fixing, not so important * test: started with pipeline step testing * chore: cleaning up provider of prices * test: extra tests wit hsemantic meaning checks * migrating pricers * feature: introducing pricing predictors (pricers) * chore: e2e is done with new pipeline * extra session feature extraction * feature: experiemntal sessin pricer and metrics(vibe) * chore: redefined and connected pricers (#29)
This commit is contained in:
committed by
GitHub
parent
2a0e44ab24
commit
ad9423bf59
0
experiments/procesing/tests/__init__.py
Normal file
0
experiments/procesing/tests/__init__.py
Normal file
271
experiments/procesing/tests/conftest.py
Normal file
271
experiments/procesing/tests/conftest.py
Normal file
@@ -0,0 +1,271 @@
|
||||
import pytest
|
||||
import pandas as pd
|
||||
from typing import List
|
||||
from procesing.providers.base import DataProvider
|
||||
from procesing.context import PipelineContext
|
||||
|
||||
|
||||
class MockProvider(DataProvider):
|
||||
"""Mock provider for testing, holds in-memory fixtures"""
|
||||
|
||||
def __init__(self, products_df=None, experiments_df=None, kafka_data=None):
|
||||
self._products = products_df if products_df is not None else pd.DataFrame()
|
||||
self._experiments = experiments_df if experiments_df is not None else pd.DataFrame()
|
||||
self._kafka_data = kafka_data if kafka_data is not None else {}
|
||||
|
||||
def fetch_products(self, store_mode: str) -> pd.DataFrame:
|
||||
return self._products.copy()
|
||||
|
||||
def fetch_experiments(self, experiment_ids: List[str]) -> pd.DataFrame:
|
||||
if self._experiments.empty:
|
||||
return pd.DataFrame()
|
||||
return self._experiments[
|
||||
self._experiments['id'].isin(experiment_ids)
|
||||
].copy()
|
||||
|
||||
def fetch_kafka_topic(self, topic: str) -> pd.DataFrame:
|
||||
return self._kafka_data.get(topic, pd.DataFrame()).copy()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_products():
|
||||
"""Standard product catalog fixture with realistic IDs from test data"""
|
||||
return pd.DataFrame({
|
||||
'id': [
|
||||
'd018efc1-25e9-4284-b276-80386e048b25',
|
||||
'51266ddb-5b07-47b7-89ee-5b5cae94bb11',
|
||||
'2cd7f756-fc65-4ba0-ab01-74521c1fff43'
|
||||
],
|
||||
'name': ['Junior Suite', 'Superior Room', 'Deluxe Room'],
|
||||
'base_price': [200.0, 150.0, 180.0]
|
||||
})
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_interactions_raw_kafka():
|
||||
"""Raw Kafka message structure for interactions, matches production format"""
|
||||
return [
|
||||
{
|
||||
'partitionID': 0, 'offset': 203, 'timestamp': 1764102082676,
|
||||
'value': {
|
||||
'payload': {
|
||||
'sessionId': 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',
|
||||
'experimentId': '53aefd07-f66a-4d7f-ba8b-7ea1fc562d35',
|
||||
'eventName': 'learn_more_about_item',
|
||||
'page': '/hotel/products/d018efc1-25e9-4284-b276-80386e048b25',
|
||||
'productId': 'd018efc1-25e9-4284-b276-80386e048b25',
|
||||
'metadata': {'type': 'hotel', 'dateIndex': 1, 'roomType': 'Junior Suite'},
|
||||
'storeMode': 'hotel',
|
||||
'ts': '2025-11-25T20:21:22.674Z'
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
'partitionID': 0, 'offset': 204, 'timestamp': 1764102086982,
|
||||
'value': {
|
||||
'payload': {
|
||||
'sessionId': 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',
|
||||
'experimentId': '53aefd07-f66a-4d7f-ba8b-7ea1fc562d35',
|
||||
'eventName': 'page_view',
|
||||
'page': '/hotel/products',
|
||||
'productId': None,
|
||||
'metadata': {'referrer': ''},
|
||||
'storeMode': 'hotel',
|
||||
'ts': '2025-11-25T20:21:26.947Z'
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
'partitionID': 0, 'offset': 205, 'timestamp': 1764102091825,
|
||||
'value': {
|
||||
'payload': {
|
||||
'sessionId': 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',
|
||||
'experimentId': '53aefd07-f66a-4d7f-ba8b-7ea1fc562d35',
|
||||
'eventName': 'hover_over_title',
|
||||
'page': '/hotel/products',
|
||||
'productId': '51266ddb-5b07-47b7-89ee-5b5cae94bb11',
|
||||
'metadata': {'elementText': 'Superior Room', 'dateIndex': 1, 'dwellTime': 1200},
|
||||
'storeMode': 'hotel',
|
||||
'ts': '2025-11-25T20:21:31.823Z'
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
'partitionID': 0, 'offset': 206, 'timestamp': 1764102094193,
|
||||
'value': {
|
||||
'payload': {
|
||||
'sessionId': 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',
|
||||
'experimentId': 'bbbbcccc-dddd-eeee-ffff-000011112222',
|
||||
'eventName': 'hover_over_paragraph',
|
||||
'page': '/hotel/products',
|
||||
'productId': '51266ddb-5b07-47b7-89ee-5b5cae94bb11',
|
||||
'metadata': {'elementText': 'price', 'dateIndex': 1, 'dwellTime': 1307},
|
||||
'storeMode': 'hotel',
|
||||
'ts': '2025-11-25T20:21:34.191Z'
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
'partitionID': 0, 'offset': 207, 'timestamp': 1764102101970,
|
||||
'value': {
|
||||
'payload': {
|
||||
'sessionId': 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',
|
||||
'experimentId': 'bbbbcccc-dddd-eeee-ffff-000011112222',
|
||||
'eventName': 'hover_over_paragraph',
|
||||
'page': '/hotel/products',
|
||||
'productId': 'd018efc1-25e9-4284-b276-80386e048b25',
|
||||
'metadata': {'elementText': 'price', 'dateIndex': 1, 'dwellTime': 1201},
|
||||
'storeMode': 'hotel',
|
||||
'ts': '2025-11-25T20:21:41.967Z'
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_interactions(mock_interactions_raw_kafka):
|
||||
"""Processed interaction DataFrame (what provider.fetch_kafka_topic returns)"""
|
||||
records = [msg['value']['payload'] for msg in mock_interactions_raw_kafka]
|
||||
df = pd.DataFrame(records)
|
||||
df['timestamp'] = pd.to_datetime(df['ts'])
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_price_logs_raw_kafka():
|
||||
"""Raw Kafka message structure for price logs, matches production format"""
|
||||
return [
|
||||
{
|
||||
'partitionID': 0, 'offset': 32, 'timestamp': 1764104757969,
|
||||
'value': {
|
||||
'payload': {
|
||||
'productId': '2cd7f756-fc65-4ba0-ab01-74521c1fff43',
|
||||
'price': 162.47,
|
||||
'sessionId': 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',
|
||||
'experimentId': '53aefd07-f66a-4d7f-ba8b-7ea1fc562d35',
|
||||
'storeMode': 'shop',
|
||||
'ts': '2025-11-25T21:05:57.967Z'
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
'partitionID': 0, 'offset': 33, 'timestamp': 1764104757995,
|
||||
'value': {
|
||||
'payload': {
|
||||
'productId': '2ddabbfc-4127-48fc-86dc-ebc4c677efa2',
|
||||
'price': 743.49,
|
||||
'sessionId': 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',
|
||||
'experimentId': '53aefd07-f66a-4d7f-ba8b-7ea1fc562d35',
|
||||
'storeMode': 'shop',
|
||||
'ts': '2025-11-25T21:05:57.993Z'
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
'partitionID': 0, 'offset': 34, 'timestamp': 1764104758011,
|
||||
'value': {
|
||||
'payload': {
|
||||
'productId': '2cd7f756-fc65-4ba0-ab01-74521c1fff43',
|
||||
'price': 163.87,
|
||||
'sessionId': 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',
|
||||
'experimentId': '53aefd07-f66a-4d7f-ba8b-7ea1fc562d35',
|
||||
'storeMode': 'shop',
|
||||
'ts': '2025-11-25T21:05:58.009Z'
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
'partitionID': 0, 'offset': 35, 'timestamp': 1764104758050,
|
||||
'value': {
|
||||
'payload': {
|
||||
'productId': '2ddabbfc-4127-48fc-86dc-ebc4c677efa2',
|
||||
'price': 397.46,
|
||||
'sessionId': 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',
|
||||
'experimentId': '53aefd07-f66a-4d7f-ba8b-7ea1fc562d35',
|
||||
'storeMode': 'shop',
|
||||
'ts': '2025-11-25T21:05:58.049Z'
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
'partitionID': 0, 'offset': 36, 'timestamp': 1764104768865,
|
||||
'value': {
|
||||
'payload': {
|
||||
'productId': '2cd7f756-fc65-4ba0-ab01-74521c1fff43',
|
||||
'price': 401.66,
|
||||
'sessionId': 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',
|
||||
'experimentId': '53aefd07-f66a-4d7f-ba8b-7ea1fc562d35',
|
||||
'storeMode': 'shop',
|
||||
'ts': '2025-11-25T21:06:08.864Z'
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_price_logs(mock_price_logs_raw_kafka):
|
||||
"""Processed price logs DataFrame (what provider.fetch_kafka_topic returns)"""
|
||||
# extract payloads and flatten
|
||||
records = [msg['value']['payload'] for msg in mock_price_logs_raw_kafka]
|
||||
df = pd.DataFrame(records)
|
||||
df['timestamp'] = pd.to_datetime(df['ts'])
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_experiments():
|
||||
"""Standard experiment metadata fixture matching Supabase schema"""
|
||||
return pd.DataFrame({
|
||||
'id': ['53aefd07-f66a-4d7f-ba8b-7ea1fc562d35', 'bbbbcccc-dddd-eeee-ffff-000011112222'],
|
||||
'created_at': pd.to_datetime(['2025-11-25T20:00:00Z', '2025-11-26T10:00:00Z']),
|
||||
'subject_name': ['Session A', 'Session B'],
|
||||
'xp_human_only': [True, False],
|
||||
'xp_market_mode': ['hotel', 'shop'],
|
||||
'xp_task_id': [None, None]
|
||||
})
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_provider(mock_products, mock_experiments, mock_interactions, mock_price_logs):
|
||||
"""Fully configured mock provider"""
|
||||
return MockProvider(
|
||||
products_df=mock_products,
|
||||
experiments_df=mock_experiments,
|
||||
kafka_data={
|
||||
'user-interactions': mock_interactions,
|
||||
'price-logs': mock_price_logs
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pipeline_context(mock_provider):
|
||||
"""Standard pipeline context for testing"""
|
||||
return PipelineContext(
|
||||
provider=mock_provider,
|
||||
store_mode='hotel',
|
||||
window_size='30s',
|
||||
n_price_buckets=3
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def empty_provider():
|
||||
"""Provider with no data, for edge case testing"""
|
||||
return MockProvider(
|
||||
products_df=pd.DataFrame(columns=['id', 'name', 'base_price']),
|
||||
experiments_df=pd.DataFrame(columns=['id', 'created_at', 'subject_name', 'xp_human_only', 'xp_market_mode', 'xp_task_id']),
|
||||
kafka_data={'user-interactions': pd.DataFrame(), 'price-logs': pd.DataFrame()}
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def empty_context(empty_provider):
|
||||
"""Context with empty provider"""
|
||||
return PipelineContext(
|
||||
provider=empty_provider,
|
||||
store_mode='hotel',
|
||||
window_size='30s'
|
||||
)
|
||||
45
experiments/procesing/tests/test_augement.py
Normal file
45
experiments/procesing/tests/test_augement.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import pytest
|
||||
import random
|
||||
import pandas as pd
|
||||
from procesing.steps import (
|
||||
CreatePriceBucketsStep,
|
||||
AugmentEventNamesStep
|
||||
)
|
||||
|
||||
def test_bucketing(pipeline_context):
|
||||
step = CreatePriceBucketsStep(context=pipeline_context)
|
||||
|
||||
# Test with normal price data
|
||||
df = pd.DataFrame({
|
||||
'metadata_price': random.sample(range(10, 1000), 100)
|
||||
})
|
||||
result = step.transform(df)
|
||||
assert 'price_bucket' in result.columns
|
||||
# test if is categorical
|
||||
assert isinstance(result['price_bucket'].dtype, pd.CategoricalDtype)
|
||||
assert result['price_bucket'].nunique() == 3 # as per context config
|
||||
# distribution check
|
||||
counts = result['price_bucket'].value_counts()
|
||||
assert all(counts > 0)
|
||||
assert counts.max() - counts.min() <= 10 # roughly equal distribution for 100 samples
|
||||
# Test with empty DataFrame
|
||||
df = pd.DataFrame()
|
||||
result = step.transform(df)
|
||||
assert 'price_bucket' in result.columns
|
||||
assert result.empty
|
||||
|
||||
|
||||
def test_augment_names(pipeline_context):
|
||||
df = pd.DataFrame({
|
||||
'eventName': ['click', 'view', 'purchase'],
|
||||
'productId': ['prod_1', 'prod_2', None],
|
||||
'price_bucket': ['PB_1', None, 'PB_3']
|
||||
})
|
||||
step = AugmentEventNamesStep(context=pipeline_context)
|
||||
result = step.transform(df)
|
||||
expected_event_names = [
|
||||
'click_prod_1@PB_1',
|
||||
'view',
|
||||
'purchase'
|
||||
]
|
||||
assert result['eventName'].tolist() == expected_event_names
|
||||
49
experiments/procesing/tests/test_demand.py
Normal file
49
experiments/procesing/tests/test_demand.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import pytest
|
||||
import random
|
||||
import pandas as pd
|
||||
from procesing.steps import (
|
||||
ComputeDemandStep
|
||||
)
|
||||
|
||||
def test_compute_demand(pipeline_context):
|
||||
step = ComputeDemandStep(context=pipeline_context)
|
||||
|
||||
# Test with normal interaction data
|
||||
df = pd.DataFrame({
|
||||
'ts': pd.date_range(start='2023-01-01', periods=100, freq='h'),
|
||||
'productId': random.choices([
|
||||
'd018efc1-25e9-4284-b276-80386e048b25',
|
||||
'51266ddb-5b07-47b7-89ee-5b5cae94bb11',
|
||||
'2cd7f756-fc65-4ba0-ab01-74521c1fff43'
|
||||
], k=100),
|
||||
'eventName': random.choices(['view', 'click', 'purchase'], k=100)
|
||||
})
|
||||
result = step.transform(df)
|
||||
assert type(result) == pd.DataFrame
|
||||
assert not result.empty
|
||||
assert set(result['productId']) == set(pipeline_context.products['id'])
|
||||
assert all(result['demand_score'] > 100/3 -10)
|
||||
|
||||
|
||||
def test_compute_demand_skewed(pipeline_context):
|
||||
step = ComputeDemandStep(context=pipeline_context)
|
||||
|
||||
# Test with normal interaction data
|
||||
df = pd.DataFrame({
|
||||
'ts': pd.date_range(start='2023-01-01', periods=100, freq='h'),
|
||||
'productId': random.choices([
|
||||
'd018efc1-25e9-4284-b276-80386e048b25',
|
||||
'51266ddb-5b07-47b7-89ee-5b5cae94bb11',
|
||||
'2cd7f756-fc65-4ba0-ab01-74521c1fff43'
|
||||
], weights=[0.7, 0.2, 0.1], k=100),
|
||||
'eventName': random.choices(['view', 'click', 'purchase'], k=100)
|
||||
})
|
||||
result = step.transform(df)
|
||||
assert type(result) == pd.DataFrame
|
||||
assert not result.empty
|
||||
assert set(result['productId']) == set(pipeline_context.products['id'])
|
||||
# test for skewness
|
||||
scores = result.set_index('productId')['demand_score'].to_dict()
|
||||
assert scores['d018efc1-25e9-4284-b276-80386e048b25'] > \
|
||||
scores['51266ddb-5b07-47b7-89ee-5b5cae94bb11'] > \
|
||||
scores['2cd7f756-fc65-4ba0-ab01-74521c1fff43']
|
||||
353
experiments/procesing/tests/test_elasticity.py
Normal file
353
experiments/procesing/tests/test_elasticity.py
Normal file
@@ -0,0 +1,353 @@
|
||||
import pytest
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from procesing.steps import (
|
||||
AggregatePriceLogsStep,
|
||||
ComputeElasticityStep
|
||||
)
|
||||
|
||||
|
||||
def test_aggregate_price_logs_basic(pipeline_context):
|
||||
"""Test basic price aggregation into time windows"""
|
||||
step = AggregatePriceLogsStep(pipeline_context)
|
||||
|
||||
# Create price logs with known window structure
|
||||
df = pd.DataFrame({
|
||||
'ts': pd.date_range(start='2023-01-01 10:00:00', periods=100, freq='10s'),
|
||||
'productId': np.tile([
|
||||
'd018efc1-25e9-4284-b276-80386e048b25',
|
||||
'51266ddb-5b07-47b7-89ee-5b5cae94bb11',
|
||||
'2cd7f756-fc65-4ba0-ab01-74521c1fff43'
|
||||
], 34)[:100],
|
||||
'price': np.random.uniform(100, 200, 100)
|
||||
})
|
||||
|
||||
result = step.transform(df)
|
||||
assert isinstance(result, list)
|
||||
assert len(result) > 0
|
||||
# each chunk should have window metadata and price vector
|
||||
for chunk in result:
|
||||
assert 'window_start' in chunk
|
||||
assert 'window_end' in chunk
|
||||
assert 'price_vector' in chunk
|
||||
assert isinstance(chunk['price_vector'], pd.DataFrame)
|
||||
assert 'productId' in chunk['price_vector'].columns
|
||||
assert 'price' in chunk['price_vector'].columns
|
||||
|
||||
|
||||
def test_aggregate_price_logs_handles_gaps(pipeline_context):
|
||||
"""Test that price aggregation forward-fills missing windows"""
|
||||
step = AggregatePriceLogsStep(pipeline_context)
|
||||
|
||||
# create sparse data with gaps
|
||||
df = pd.DataFrame({
|
||||
'ts': pd.to_datetime([
|
||||
'2023-01-01 10:00:00',
|
||||
'2023-01-01 10:00:05',
|
||||
'2023-01-01 10:02:00', # gap of ~2 mins
|
||||
'2023-01-01 10:02:30'
|
||||
]),
|
||||
'productId': [
|
||||
'd018efc1-25e9-4284-b276-80386e048b25',
|
||||
'd018efc1-25e9-4284-b276-80386e048b25',
|
||||
'51266ddb-5b07-47b7-89ee-5b5cae94bb11',
|
||||
'51266ddb-5b07-47b7-89ee-5b5cae94bb11'
|
||||
],
|
||||
'price': [100, 102, 150, 153]
|
||||
})
|
||||
|
||||
result = step.transform(df)
|
||||
assert isinstance(result, list)
|
||||
# should have multiple windows despite gaps
|
||||
assert len(result) >= 2
|
||||
|
||||
|
||||
def test_compute_elasticity_with_known_relationship(pipeline_context):
|
||||
"""Test elasticity computation with known price-demand relationship"""
|
||||
step = ComputeElasticityStep(pipeline_context)
|
||||
|
||||
# simulate elastic demand: when price ↑10%, demand ↓15% (elasticity ~ -1.5)
|
||||
base_price = 100
|
||||
base_demand = 50
|
||||
|
||||
demand_chunks = [
|
||||
{
|
||||
'window_start': pd.Timestamp('2023-01-01 10:00:00'),
|
||||
'window_end': pd.Timestamp('2023-01-01 10:00:30'),
|
||||
'demand_vector': pd.DataFrame({
|
||||
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
||||
'demand_score': [base_demand]
|
||||
})
|
||||
},
|
||||
{
|
||||
'window_start': pd.Timestamp('2023-01-01 10:00:30'),
|
||||
'window_end': pd.Timestamp('2023-01-01 10:01:00'),
|
||||
'demand_vector': pd.DataFrame({
|
||||
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
||||
'demand_score': [base_demand * 0.85] # 15% decrease
|
||||
})
|
||||
},
|
||||
{
|
||||
'window_start': pd.Timestamp('2023-01-01 10:01:00'),
|
||||
'window_end': pd.Timestamp('2023-01-01 10:01:30'),
|
||||
'demand_vector': pd.DataFrame({
|
||||
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
||||
'demand_score': [base_demand * 0.70] # further decrease
|
||||
})
|
||||
}
|
||||
]
|
||||
|
||||
price_chunks = [
|
||||
{
|
||||
'window_start': pd.Timestamp('2023-01-01 10:00:00'),
|
||||
'window_end': pd.Timestamp('2023-01-01 10:00:30'),
|
||||
'price_vector': pd.DataFrame({
|
||||
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
||||
'price': [base_price]
|
||||
})
|
||||
},
|
||||
{
|
||||
'window_start': pd.Timestamp('2023-01-01 10:00:30'),
|
||||
'window_end': pd.Timestamp('2023-01-01 10:01:00'),
|
||||
'price_vector': pd.DataFrame({
|
||||
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
||||
'price': [base_price * 1.10] # 10% increase
|
||||
})
|
||||
},
|
||||
{
|
||||
'window_start': pd.Timestamp('2023-01-01 10:01:00'),
|
||||
'window_end': pd.Timestamp('2023-01-01 10:01:30'),
|
||||
'price_vector': pd.DataFrame({
|
||||
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
||||
'price': [base_price * 1.20] # 20% increase
|
||||
})
|
||||
}
|
||||
]
|
||||
|
||||
result = step.transform((demand_chunks, price_chunks))
|
||||
assert isinstance(result, pd.DataFrame)
|
||||
assert not result.empty
|
||||
assert 'productId' in result.columns
|
||||
assert 'elasticity' in result.columns
|
||||
assert 'n_obs' in result.columns
|
||||
|
||||
# check elasticity is negative (normal good)
|
||||
product_elast = result[result['productId'] == 'd018efc1-25e9-4284-b276-80386e048b25']
|
||||
assert len(product_elast) == 1
|
||||
assert product_elast.iloc[0]['elasticity'] < 0
|
||||
# should be roughly elastic (< -1)
|
||||
assert product_elast.iloc[0]['n_obs'] == 3
|
||||
|
||||
|
||||
def test_compute_elasticity_inelastic_product(pipeline_context):
|
||||
"""Test with inelastic demand: price changes, demand barely moves"""
|
||||
step = ComputeElasticityStep(pipeline_context)
|
||||
|
||||
base_price = 150
|
||||
base_demand = 40
|
||||
|
||||
demand_chunks = [
|
||||
{
|
||||
'window_start': pd.Timestamp('2023-01-01 10:00:00'),
|
||||
'window_end': pd.Timestamp('2023-01-01 10:00:30'),
|
||||
'demand_vector': pd.DataFrame({
|
||||
'productId': ['51266ddb-5b07-47b7-89ee-5b5cae94bb11'],
|
||||
'demand_score': [base_demand]
|
||||
})
|
||||
},
|
||||
{
|
||||
'window_start': pd.Timestamp('2023-01-01 10:00:30'),
|
||||
'window_end': pd.Timestamp('2023-01-01 10:01:00'),
|
||||
'demand_vector': pd.DataFrame({
|
||||
'productId': ['51266ddb-5b07-47b7-89ee-5b5cae94bb11'],
|
||||
'demand_score': [base_demand * 0.98] # tiny 2% decrease
|
||||
})
|
||||
}
|
||||
]
|
||||
|
||||
price_chunks = [
|
||||
{
|
||||
'window_start': pd.Timestamp('2023-01-01 10:00:00'),
|
||||
'window_end': pd.Timestamp('2023-01-01 10:00:30'),
|
||||
'price_vector': pd.DataFrame({
|
||||
'productId': ['51266ddb-5b07-47b7-89ee-5b5cae94bb11'],
|
||||
'price': [base_price]
|
||||
})
|
||||
},
|
||||
{
|
||||
'window_start': pd.Timestamp('2023-01-01 10:00:30'),
|
||||
'window_end': pd.Timestamp('2023-01-01 10:01:00'),
|
||||
'price_vector': pd.DataFrame({
|
||||
'productId': ['51266ddb-5b07-47b7-89ee-5b5cae94bb11'],
|
||||
'price': [base_price * 1.20] # 20% increase
|
||||
})
|
||||
}
|
||||
]
|
||||
|
||||
result = step.transform((demand_chunks, price_chunks))
|
||||
product_elast = result[result['productId'] == '51266ddb-5b07-47b7-89ee-5b5cae94bb11']
|
||||
assert len(product_elast) == 1
|
||||
# inelastic: elasticity between 0 and -1
|
||||
assert -1 < product_elast.iloc[0]['elasticity'] < 0
|
||||
|
||||
|
||||
def test_compute_elasticity_multiple_products(pipeline_context):
|
||||
"""Test elasticity computation across multiple products simultaneously"""
|
||||
step = ComputeElasticityStep(pipeline_context)
|
||||
|
||||
products = [
|
||||
'd018efc1-25e9-4284-b276-80386e048b25',
|
||||
'51266ddb-5b07-47b7-89ee-5b5cae94bb11',
|
||||
'2cd7f756-fc65-4ba0-ab01-74521c1fff43'
|
||||
]
|
||||
|
||||
# create 5 time windows with all 3 products
|
||||
demand_chunks = []
|
||||
price_chunks = []
|
||||
|
||||
for i in range(5):
|
||||
ts = pd.Timestamp('2023-01-01 10:00:00') + pd.Timedelta(f'{i*30}s')
|
||||
|
||||
demand_chunks.append({
|
||||
'window_start': ts,
|
||||
'window_end': ts + pd.Timedelta('30s'),
|
||||
'demand_vector': pd.DataFrame({
|
||||
'productId': products,
|
||||
'demand_score': [
|
||||
50 * (0.9 ** i), # elastic: decreases as price rises
|
||||
40 * (0.98 ** i), # inelastic: barely changes
|
||||
30 * (0.85 ** i) # very elastic
|
||||
]
|
||||
})
|
||||
})
|
||||
|
||||
price_chunks.append({
|
||||
'window_start': ts,
|
||||
'window_end': ts + pd.Timedelta('30s'),
|
||||
'price_vector': pd.DataFrame({
|
||||
'productId': products,
|
||||
'price': [
|
||||
100 * (1.05 ** i),
|
||||
150 * (1.10 ** i),
|
||||
120 * (1.08 ** i)
|
||||
]
|
||||
})
|
||||
})
|
||||
|
||||
result = step.transform((demand_chunks, price_chunks))
|
||||
assert isinstance(result, pd.DataFrame)
|
||||
assert len(result) == 3 # all products should have elasticity
|
||||
assert set(result['productId']) == set(products)
|
||||
assert all(result['n_obs'] == 5)
|
||||
assert all(result['elasticity'] < 0) # all normal goods
|
||||
|
||||
|
||||
def test_compute_elasticity_insufficient_data(pipeline_context):
|
||||
"""Test behavior with insufficient observations"""
|
||||
step = ComputeElasticityStep(pipeline_context)
|
||||
|
||||
# only 1 observation
|
||||
demand_chunks = [{
|
||||
'window_start': pd.Timestamp('2023-01-01 10:00:00'),
|
||||
'window_end': pd.Timestamp('2023-01-01 10:00:30'),
|
||||
'demand_vector': pd.DataFrame({
|
||||
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
||||
'demand_score': [50]
|
||||
})
|
||||
}]
|
||||
|
||||
price_chunks = [{
|
||||
'window_start': pd.Timestamp('2023-01-01 10:00:00'),
|
||||
'window_end': pd.Timestamp('2023-01-01 10:00:30'),
|
||||
'price_vector': pd.DataFrame({
|
||||
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
||||
'price': [100]
|
||||
})
|
||||
}]
|
||||
|
||||
result = step.transform((demand_chunks, price_chunks))
|
||||
# should still return result but with low n_obs
|
||||
product_elast = result[result['productId'] == 'd018efc1-25e9-4284-b276-80386e048b25']
|
||||
assert len(product_elast) == 1
|
||||
assert product_elast.iloc[0]['n_obs'] == 1
|
||||
assert product_elast.iloc[0]['elasticity'] == 0.0 # not enough data
|
||||
|
||||
|
||||
def test_compute_elasticity_misaligned_chunks(pipeline_context):
|
||||
"""Test with non-overlapping demand and price windows"""
|
||||
step = ComputeElasticityStep(pipeline_context)
|
||||
|
||||
demand_chunks = [{
|
||||
'window_start': pd.Timestamp('2023-01-01 10:00:00'),
|
||||
'window_end': pd.Timestamp('2023-01-01 10:00:30'),
|
||||
'demand_vector': pd.DataFrame({
|
||||
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
||||
'demand_score': [50]
|
||||
})
|
||||
}]
|
||||
|
||||
price_chunks = [{
|
||||
'window_start': pd.Timestamp('2023-01-01 11:00:00'), # different time
|
||||
'window_end': pd.Timestamp('2023-01-01 11:00:30'),
|
||||
'price_vector': pd.DataFrame({
|
||||
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
||||
'price': [100]
|
||||
})
|
||||
}]
|
||||
|
||||
result = step.transform((demand_chunks, price_chunks))
|
||||
# should handle gracefully with no aligned data
|
||||
assert isinstance(result, pd.DataFrame)
|
||||
assert all(result['n_obs'] == 0)
|
||||
|
||||
|
||||
def test_elasticity_arc_method(pipeline_context):
|
||||
"""Test arc elasticity computation method"""
|
||||
# configure context for arc method
|
||||
pipeline_context.config['elasticity_method'] = 'arc'
|
||||
step = ComputeElasticityStep(pipeline_context)
|
||||
|
||||
demand_chunks = [
|
||||
{
|
||||
'window_start': pd.Timestamp('2023-01-01 10:00:00'),
|
||||
'window_end': pd.Timestamp('2023-01-01 10:00:30'),
|
||||
'demand_vector': pd.DataFrame({
|
||||
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
||||
'demand_score': [100]
|
||||
})
|
||||
},
|
||||
{
|
||||
'window_start': pd.Timestamp('2023-01-01 10:00:30'),
|
||||
'window_end': pd.Timestamp('2023-01-01 10:01:00'),
|
||||
'demand_vector': pd.DataFrame({
|
||||
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
||||
'demand_score': [80]
|
||||
})
|
||||
}
|
||||
]
|
||||
|
||||
price_chunks = [
|
||||
{
|
||||
'window_start': pd.Timestamp('2023-01-01 10:00:00'),
|
||||
'window_end': pd.Timestamp('2023-01-01 10:00:30'),
|
||||
'price_vector': pd.DataFrame({
|
||||
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
||||
'price': [100]
|
||||
})
|
||||
},
|
||||
{
|
||||
'window_start': pd.Timestamp('2023-01-01 10:00:30'),
|
||||
'window_end': pd.Timestamp('2023-01-01 10:01:00'),
|
||||
'price_vector': pd.DataFrame({
|
||||
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
||||
'price': [110]
|
||||
})
|
||||
}
|
||||
]
|
||||
|
||||
result = step.transform((demand_chunks, price_chunks))
|
||||
product_elast = result[result['productId'] == 'd018efc1-25e9-4284-b276-80386e048b25']
|
||||
assert len(product_elast) == 1
|
||||
assert product_elast.iloc[0]['elasticity'] < 0
|
||||
# reset config
|
||||
pipeline_context.config['elasticity_method'] = 'point'
|
||||
51
experiments/procesing/tests/test_fetch.py
Normal file
51
experiments/procesing/tests/test_fetch.py
Normal file
@@ -0,0 +1,51 @@
|
||||
import pytest
|
||||
import pandas as pd
|
||||
from procesing.steps import (
|
||||
FetchInteractionsStep,
|
||||
FetchPriceLogsStep,
|
||||
FetchExperimentsStep,
|
||||
)
|
||||
|
||||
|
||||
def test_fetch_interactions_data(pipeline_context):
|
||||
step = FetchInteractionsStep(pipeline_context)
|
||||
data = step.transform(None)
|
||||
assert data is not None
|
||||
assert isinstance(data, pd.DataFrame)
|
||||
expected_cols = [
|
||||
"eventName",
|
||||
"dateIndex",
|
||||
"experimentId",
|
||||
"storeMode",
|
||||
"metadata_elementText"
|
||||
]
|
||||
for expected in expected_cols:
|
||||
assert expected in data.columns
|
||||
|
||||
def test_fetch_price_logs(pipeline_context):
|
||||
step = FetchPriceLogsStep(pipeline_context)
|
||||
data = step.transform(None)
|
||||
assert data is not None
|
||||
assert isinstance(data, pd.DataFrame)
|
||||
expected_cols = [
|
||||
"price",
|
||||
"productId"
|
||||
]
|
||||
for expected in expected_cols:
|
||||
assert expected in data.columns
|
||||
prices = data['price'].to_list()
|
||||
assert min(prices) >= 0
|
||||
assert max(prices) <= 9999
|
||||
|
||||
|
||||
def test_experiments_fetching(pipeline_context):
|
||||
interactions = FetchInteractionsStep(pipeline_context).transform(None)
|
||||
assert interactions is not None
|
||||
experiments = FetchExperimentsStep(pipeline_context)
|
||||
experiment_data = experiments.transform(interactions)
|
||||
assert experiment_data is not None
|
||||
assert isinstance(experiment_data, pd.DataFrame)
|
||||
assert not experiment_data.empty
|
||||
assert 'id' in experiment_data.columns
|
||||
assert len(experiment_data) == 2
|
||||
assert '53aefd07-f66a-4d7f-ba8b-7ea1fc562d35' in experiment_data['id'].values
|
||||
87
experiments/procesing/tests/test_pricing.py
Normal file
87
experiments/procesing/tests/test_pricing.py
Normal file
@@ -0,0 +1,87 @@
|
||||
import pytest
|
||||
import pandas as pd
|
||||
|
||||
from procesing.pricers import (
|
||||
StaticPricer,
|
||||
RandomPricer,
|
||||
ElasticityBasedPricer
|
||||
)
|
||||
|
||||
|
||||
def test_static_pricer_fit_and_predict():
|
||||
# Sample historical data
|
||||
historical_data = pd.DataFrame({
|
||||
'product_id': [1, 2, 3],
|
||||
'base_price': [100.0, 150.0, 200.0]
|
||||
})
|
||||
|
||||
# Initialize and fit StaticPricer
|
||||
pricer = StaticPricer()
|
||||
pricer.fit(historical_data)
|
||||
|
||||
# Predict prices
|
||||
predicted_prices = pricer.predict(None)
|
||||
|
||||
# Assert that predicted prices match base prices
|
||||
expected_prices = historical_data['base_price'].values
|
||||
assert all(predicted_prices == expected_prices), "Predicted prices do not match base prices"
|
||||
|
||||
|
||||
def test_random_pricer_fit_and_predict():
|
||||
# Sample historical data
|
||||
historical_data = pd.DataFrame({
|
||||
'product_id': [1, 2, 3],
|
||||
'base_price': [100.0, 150.0, 200.0]
|
||||
})
|
||||
|
||||
# Initialize and fit RandomPricer
|
||||
pricer = RandomPricer(price_min=50.0, price_max=250.0, seed=42)
|
||||
pricer.fit(historical_data)
|
||||
|
||||
# Predict prices
|
||||
predicted_prices = pricer.predict(None)
|
||||
|
||||
# Assert that predicted prices are within bounds
|
||||
assert predicted_prices.min() >= 50.0, "Predicted prices are below minimum bound"
|
||||
assert predicted_prices.max() <= 250.0, "Predicted prices are above maximum bound"
|
||||
# distribution check (not so strict)
|
||||
assert len(set(predicted_prices)) > 1, "Predicted prices are not varied enough"
|
||||
assert len(predicted_prices) == len(historical_data), "Number of predicted prices does not match number of products"
|
||||
|
||||
def test_elasticity_based_pricer_fit_and_predict():
|
||||
# Sample historical data
|
||||
historical_data = pd.DataFrame({
|
||||
'productId': [1, 2, 3],
|
||||
'elasticity': [-1.5, -0.5, -2.0],
|
||||
'base_price': [100.0, 150.0, 200.0],
|
||||
'mean_demand': [10, 20, 15]
|
||||
})
|
||||
|
||||
# Initialize and fit ElasticityBasedPricer
|
||||
pricer = ElasticityBasedPricer(alpha=0.1, price_floor=50.0, price_ceil=300.0)
|
||||
pricer.fit(historical_data)
|
||||
|
||||
# Create a mock state space with demand deviations
|
||||
class MockStateSpace:
|
||||
def __init__(self, demand):
|
||||
self.demand = demand
|
||||
|
||||
# Simulate demand higher than mean for all products
|
||||
state_space = MockStateSpace(demand=[15, 25, 20])
|
||||
|
||||
# Predict prices
|
||||
predicted_prices = pricer.predict(state_space)
|
||||
|
||||
# Assert that predicted prices are within bounds
|
||||
assert predicted_prices.min() >= 50.0, "Predicted prices are below minimum bound"
|
||||
assert predicted_prices.max() <= 300.0, "Predicted prices are above maximum bound"
|
||||
assert len(predicted_prices) == len(historical_data), "Number of predicted prices does not match number of products"
|
||||
|
||||
# now we gotta check semantic validity
|
||||
# since demand is higher than mean, prices should generally increase
|
||||
for i, row in historical_data.iterrows():
|
||||
base_price = row['base_price']
|
||||
elasticity = row['elasticity']
|
||||
expected_increase = base_price * (1 + 0.1 * abs(elasticity) * ((state_space.demand[i] - row['mean_demand']) / row['mean_demand']))
|
||||
assert predicted_prices[i] >= base_price, f"Predicted price for product {row['productId']} did not increase as expected"
|
||||
assert abs(predicted_prices[i] - expected_increase) < 1e-5, f"Predicted price for product {row['productId']} does not match expected calculation within 1e-5 tolerance"
|
||||
Reference in New Issue
Block a user