mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 08:33:36 +00:00
* introducing airflow to run pipeline * chore: updating dag with upload to registry * introducing complete provider (non refactored and noisy) * chore: removing old shit * generic pricing baselines * feature: super simple model registry (to be updated maybe third party OS software) * chore: refactoring the providers docker config and requirements * chore: refactored and broke down components (braking * exporting all * local pipeline excution working * fix: fixing import structures from nonrelativistic * chore: enables cross comm pickling with fully e2e pipeline compilation * docs: what the pipeline is like now * pipelines local running and pipeline high level definition * cleaning old pipeline and vectorization * leaked but fixing, not so important * test: started with pipeline step testing * chore: cleaning up provider of prices * test: extra tests wit hsemantic meaning checks * migrating pricers * feature: introducing pricing predictors (pricers) * chore: e2e is done with new pipeline * extra session feature extraction * feature: experiemntal sessin pricer and metrics(vibe) * chore: redefined and connected pricers (#29)
354 lines
12 KiB
Python
354 lines
12 KiB
Python
import pytest
|
|
import pandas as pd
|
|
import numpy as np
|
|
from procesing.steps import (
|
|
AggregatePriceLogsStep,
|
|
ComputeElasticityStep
|
|
)
|
|
|
|
|
|
def test_aggregate_price_logs_basic(pipeline_context):
|
|
"""Test basic price aggregation into time windows"""
|
|
step = AggregatePriceLogsStep(pipeline_context)
|
|
|
|
# Create price logs with known window structure
|
|
df = pd.DataFrame({
|
|
'ts': pd.date_range(start='2023-01-01 10:00:00', periods=100, freq='10s'),
|
|
'productId': np.tile([
|
|
'd018efc1-25e9-4284-b276-80386e048b25',
|
|
'51266ddb-5b07-47b7-89ee-5b5cae94bb11',
|
|
'2cd7f756-fc65-4ba0-ab01-74521c1fff43'
|
|
], 34)[:100],
|
|
'price': np.random.uniform(100, 200, 100)
|
|
})
|
|
|
|
result = step.transform(df)
|
|
assert isinstance(result, list)
|
|
assert len(result) > 0
|
|
# each chunk should have window metadata and price vector
|
|
for chunk in result:
|
|
assert 'window_start' in chunk
|
|
assert 'window_end' in chunk
|
|
assert 'price_vector' in chunk
|
|
assert isinstance(chunk['price_vector'], pd.DataFrame)
|
|
assert 'productId' in chunk['price_vector'].columns
|
|
assert 'price' in chunk['price_vector'].columns
|
|
|
|
|
|
def test_aggregate_price_logs_handles_gaps(pipeline_context):
|
|
"""Test that price aggregation forward-fills missing windows"""
|
|
step = AggregatePriceLogsStep(pipeline_context)
|
|
|
|
# create sparse data with gaps
|
|
df = pd.DataFrame({
|
|
'ts': pd.to_datetime([
|
|
'2023-01-01 10:00:00',
|
|
'2023-01-01 10:00:05',
|
|
'2023-01-01 10:02:00', # gap of ~2 mins
|
|
'2023-01-01 10:02:30'
|
|
]),
|
|
'productId': [
|
|
'd018efc1-25e9-4284-b276-80386e048b25',
|
|
'd018efc1-25e9-4284-b276-80386e048b25',
|
|
'51266ddb-5b07-47b7-89ee-5b5cae94bb11',
|
|
'51266ddb-5b07-47b7-89ee-5b5cae94bb11'
|
|
],
|
|
'price': [100, 102, 150, 153]
|
|
})
|
|
|
|
result = step.transform(df)
|
|
assert isinstance(result, list)
|
|
# should have multiple windows despite gaps
|
|
assert len(result) >= 2
|
|
|
|
|
|
def test_compute_elasticity_with_known_relationship(pipeline_context):
|
|
"""Test elasticity computation with known price-demand relationship"""
|
|
step = ComputeElasticityStep(pipeline_context)
|
|
|
|
# simulate elastic demand: when price ↑10%, demand ↓15% (elasticity ~ -1.5)
|
|
base_price = 100
|
|
base_demand = 50
|
|
|
|
demand_chunks = [
|
|
{
|
|
'window_start': pd.Timestamp('2023-01-01 10:00:00'),
|
|
'window_end': pd.Timestamp('2023-01-01 10:00:30'),
|
|
'demand_vector': pd.DataFrame({
|
|
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
|
'demand_score': [base_demand]
|
|
})
|
|
},
|
|
{
|
|
'window_start': pd.Timestamp('2023-01-01 10:00:30'),
|
|
'window_end': pd.Timestamp('2023-01-01 10:01:00'),
|
|
'demand_vector': pd.DataFrame({
|
|
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
|
'demand_score': [base_demand * 0.85] # 15% decrease
|
|
})
|
|
},
|
|
{
|
|
'window_start': pd.Timestamp('2023-01-01 10:01:00'),
|
|
'window_end': pd.Timestamp('2023-01-01 10:01:30'),
|
|
'demand_vector': pd.DataFrame({
|
|
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
|
'demand_score': [base_demand * 0.70] # further decrease
|
|
})
|
|
}
|
|
]
|
|
|
|
price_chunks = [
|
|
{
|
|
'window_start': pd.Timestamp('2023-01-01 10:00:00'),
|
|
'window_end': pd.Timestamp('2023-01-01 10:00:30'),
|
|
'price_vector': pd.DataFrame({
|
|
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
|
'price': [base_price]
|
|
})
|
|
},
|
|
{
|
|
'window_start': pd.Timestamp('2023-01-01 10:00:30'),
|
|
'window_end': pd.Timestamp('2023-01-01 10:01:00'),
|
|
'price_vector': pd.DataFrame({
|
|
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
|
'price': [base_price * 1.10] # 10% increase
|
|
})
|
|
},
|
|
{
|
|
'window_start': pd.Timestamp('2023-01-01 10:01:00'),
|
|
'window_end': pd.Timestamp('2023-01-01 10:01:30'),
|
|
'price_vector': pd.DataFrame({
|
|
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
|
'price': [base_price * 1.20] # 20% increase
|
|
})
|
|
}
|
|
]
|
|
|
|
result = step.transform((demand_chunks, price_chunks))
|
|
assert isinstance(result, pd.DataFrame)
|
|
assert not result.empty
|
|
assert 'productId' in result.columns
|
|
assert 'elasticity' in result.columns
|
|
assert 'n_obs' in result.columns
|
|
|
|
# check elasticity is negative (normal good)
|
|
product_elast = result[result['productId'] == 'd018efc1-25e9-4284-b276-80386e048b25']
|
|
assert len(product_elast) == 1
|
|
assert product_elast.iloc[0]['elasticity'] < 0
|
|
# should be roughly elastic (< -1)
|
|
assert product_elast.iloc[0]['n_obs'] == 3
|
|
|
|
|
|
def test_compute_elasticity_inelastic_product(pipeline_context):
|
|
"""Test with inelastic demand: price changes, demand barely moves"""
|
|
step = ComputeElasticityStep(pipeline_context)
|
|
|
|
base_price = 150
|
|
base_demand = 40
|
|
|
|
demand_chunks = [
|
|
{
|
|
'window_start': pd.Timestamp('2023-01-01 10:00:00'),
|
|
'window_end': pd.Timestamp('2023-01-01 10:00:30'),
|
|
'demand_vector': pd.DataFrame({
|
|
'productId': ['51266ddb-5b07-47b7-89ee-5b5cae94bb11'],
|
|
'demand_score': [base_demand]
|
|
})
|
|
},
|
|
{
|
|
'window_start': pd.Timestamp('2023-01-01 10:00:30'),
|
|
'window_end': pd.Timestamp('2023-01-01 10:01:00'),
|
|
'demand_vector': pd.DataFrame({
|
|
'productId': ['51266ddb-5b07-47b7-89ee-5b5cae94bb11'],
|
|
'demand_score': [base_demand * 0.98] # tiny 2% decrease
|
|
})
|
|
}
|
|
]
|
|
|
|
price_chunks = [
|
|
{
|
|
'window_start': pd.Timestamp('2023-01-01 10:00:00'),
|
|
'window_end': pd.Timestamp('2023-01-01 10:00:30'),
|
|
'price_vector': pd.DataFrame({
|
|
'productId': ['51266ddb-5b07-47b7-89ee-5b5cae94bb11'],
|
|
'price': [base_price]
|
|
})
|
|
},
|
|
{
|
|
'window_start': pd.Timestamp('2023-01-01 10:00:30'),
|
|
'window_end': pd.Timestamp('2023-01-01 10:01:00'),
|
|
'price_vector': pd.DataFrame({
|
|
'productId': ['51266ddb-5b07-47b7-89ee-5b5cae94bb11'],
|
|
'price': [base_price * 1.20] # 20% increase
|
|
})
|
|
}
|
|
]
|
|
|
|
result = step.transform((demand_chunks, price_chunks))
|
|
product_elast = result[result['productId'] == '51266ddb-5b07-47b7-89ee-5b5cae94bb11']
|
|
assert len(product_elast) == 1
|
|
# inelastic: elasticity between 0 and -1
|
|
assert -1 < product_elast.iloc[0]['elasticity'] < 0
|
|
|
|
|
|
def test_compute_elasticity_multiple_products(pipeline_context):
|
|
"""Test elasticity computation across multiple products simultaneously"""
|
|
step = ComputeElasticityStep(pipeline_context)
|
|
|
|
products = [
|
|
'd018efc1-25e9-4284-b276-80386e048b25',
|
|
'51266ddb-5b07-47b7-89ee-5b5cae94bb11',
|
|
'2cd7f756-fc65-4ba0-ab01-74521c1fff43'
|
|
]
|
|
|
|
# create 5 time windows with all 3 products
|
|
demand_chunks = []
|
|
price_chunks = []
|
|
|
|
for i in range(5):
|
|
ts = pd.Timestamp('2023-01-01 10:00:00') + pd.Timedelta(f'{i*30}s')
|
|
|
|
demand_chunks.append({
|
|
'window_start': ts,
|
|
'window_end': ts + pd.Timedelta('30s'),
|
|
'demand_vector': pd.DataFrame({
|
|
'productId': products,
|
|
'demand_score': [
|
|
50 * (0.9 ** i), # elastic: decreases as price rises
|
|
40 * (0.98 ** i), # inelastic: barely changes
|
|
30 * (0.85 ** i) # very elastic
|
|
]
|
|
})
|
|
})
|
|
|
|
price_chunks.append({
|
|
'window_start': ts,
|
|
'window_end': ts + pd.Timedelta('30s'),
|
|
'price_vector': pd.DataFrame({
|
|
'productId': products,
|
|
'price': [
|
|
100 * (1.05 ** i),
|
|
150 * (1.10 ** i),
|
|
120 * (1.08 ** i)
|
|
]
|
|
})
|
|
})
|
|
|
|
result = step.transform((demand_chunks, price_chunks))
|
|
assert isinstance(result, pd.DataFrame)
|
|
assert len(result) == 3 # all products should have elasticity
|
|
assert set(result['productId']) == set(products)
|
|
assert all(result['n_obs'] == 5)
|
|
assert all(result['elasticity'] < 0) # all normal goods
|
|
|
|
|
|
def test_compute_elasticity_insufficient_data(pipeline_context):
|
|
"""Test behavior with insufficient observations"""
|
|
step = ComputeElasticityStep(pipeline_context)
|
|
|
|
# only 1 observation
|
|
demand_chunks = [{
|
|
'window_start': pd.Timestamp('2023-01-01 10:00:00'),
|
|
'window_end': pd.Timestamp('2023-01-01 10:00:30'),
|
|
'demand_vector': pd.DataFrame({
|
|
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
|
'demand_score': [50]
|
|
})
|
|
}]
|
|
|
|
price_chunks = [{
|
|
'window_start': pd.Timestamp('2023-01-01 10:00:00'),
|
|
'window_end': pd.Timestamp('2023-01-01 10:00:30'),
|
|
'price_vector': pd.DataFrame({
|
|
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
|
'price': [100]
|
|
})
|
|
}]
|
|
|
|
result = step.transform((demand_chunks, price_chunks))
|
|
# should still return result but with low n_obs
|
|
product_elast = result[result['productId'] == 'd018efc1-25e9-4284-b276-80386e048b25']
|
|
assert len(product_elast) == 1
|
|
assert product_elast.iloc[0]['n_obs'] == 1
|
|
assert product_elast.iloc[0]['elasticity'] == 0.0 # not enough data
|
|
|
|
|
|
def test_compute_elasticity_misaligned_chunks(pipeline_context):
|
|
"""Test with non-overlapping demand and price windows"""
|
|
step = ComputeElasticityStep(pipeline_context)
|
|
|
|
demand_chunks = [{
|
|
'window_start': pd.Timestamp('2023-01-01 10:00:00'),
|
|
'window_end': pd.Timestamp('2023-01-01 10:00:30'),
|
|
'demand_vector': pd.DataFrame({
|
|
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
|
'demand_score': [50]
|
|
})
|
|
}]
|
|
|
|
price_chunks = [{
|
|
'window_start': pd.Timestamp('2023-01-01 11:00:00'), # different time
|
|
'window_end': pd.Timestamp('2023-01-01 11:00:30'),
|
|
'price_vector': pd.DataFrame({
|
|
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
|
'price': [100]
|
|
})
|
|
}]
|
|
|
|
result = step.transform((demand_chunks, price_chunks))
|
|
# should handle gracefully with no aligned data
|
|
assert isinstance(result, pd.DataFrame)
|
|
assert all(result['n_obs'] == 0)
|
|
|
|
|
|
def test_elasticity_arc_method(pipeline_context):
|
|
"""Test arc elasticity computation method"""
|
|
# configure context for arc method
|
|
pipeline_context.config['elasticity_method'] = 'arc'
|
|
step = ComputeElasticityStep(pipeline_context)
|
|
|
|
demand_chunks = [
|
|
{
|
|
'window_start': pd.Timestamp('2023-01-01 10:00:00'),
|
|
'window_end': pd.Timestamp('2023-01-01 10:00:30'),
|
|
'demand_vector': pd.DataFrame({
|
|
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
|
'demand_score': [100]
|
|
})
|
|
},
|
|
{
|
|
'window_start': pd.Timestamp('2023-01-01 10:00:30'),
|
|
'window_end': pd.Timestamp('2023-01-01 10:01:00'),
|
|
'demand_vector': pd.DataFrame({
|
|
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
|
'demand_score': [80]
|
|
})
|
|
}
|
|
]
|
|
|
|
price_chunks = [
|
|
{
|
|
'window_start': pd.Timestamp('2023-01-01 10:00:00'),
|
|
'window_end': pd.Timestamp('2023-01-01 10:00:30'),
|
|
'price_vector': pd.DataFrame({
|
|
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
|
'price': [100]
|
|
})
|
|
},
|
|
{
|
|
'window_start': pd.Timestamp('2023-01-01 10:00:30'),
|
|
'window_end': pd.Timestamp('2023-01-01 10:01:00'),
|
|
'price_vector': pd.DataFrame({
|
|
'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
|
|
'price': [110]
|
|
})
|
|
}
|
|
]
|
|
|
|
result = step.transform((demand_chunks, price_chunks))
|
|
product_elast = result[result['productId'] == 'd018efc1-25e9-4284-b276-80386e048b25']
|
|
assert len(product_elast) == 1
|
|
assert product_elast.iloc[0]['elasticity'] < 0
|
|
# reset config
|
|
pipeline_context.config['elasticity_method'] = 'point'
|