Airflow addition (#28)

* introducing airflow to run pipeline * chore: updating dag with upload to registry * introducing complete provider (non refactored and noisy) * chore: removing old shit * generic pricing baselines * feature: super simple model registry (to be updated maybe third party OS software) * chore: refactoring the providers docker config and requirements * chore: refactored and broke down components (braking * exporting all * local pipeline excution working * fix: fixing import structures from nonrelativistic * chore: enables cross comm pickling with fully e2e pipeline compilation * docs: what the pipeline is like now * pipelines local running and pipeline high level definition * cleaning old pipeline and vectorization * leaked but fixing, not so important * test: started with pipeline step testing * chore: cleaning up provider of prices * test: extra tests wit hsemantic meaning checks * migrating pricers * feature: introducing pricing predictors (pricers) * chore: e2e is done with new pipeline * extra session feature extraction * feature: experiemntal sessin pricer and metrics(vibe) * chore: redefined and connected pricers (#29)
2026-07-16 01:53:37 +00:00 · 2025-11-29 17:50:16 +01:00
parent 2a0e44ab24
commit ad9423bf59
49 changed files with 3642 additions and 619 deletions
--- a/experiments/procesing/tests/init.py
+++ b/experiments/procesing/tests/init.py
--- a/experiments/procesing/tests/conftest.py
+++ b/experiments/procesing/tests/conftest.py
@@ -0,0 +1,271 @@
+import pytest
+import pandas as pd
+from typing import List
+from procesing.providers.base import DataProvider
+from procesing.context import PipelineContext
+
+
+class MockProvider(DataProvider):
+    """Mock provider for testing, holds in-memory fixtures"""
+
+    def __init__(self, products_df=None, experiments_df=None, kafka_data=None):
+        self._products = products_df if products_df is not None else pd.DataFrame()
+        self._experiments = experiments_df if experiments_df is not None else pd.DataFrame()
+        self._kafka_data = kafka_data if kafka_data is not None else {}
+
+    def fetch_products(self, store_mode: str) -> pd.DataFrame:
+        return self._products.copy()
+
+    def fetch_experiments(self, experiment_ids: List[str]) -> pd.DataFrame:
+        if self._experiments.empty:
+            return pd.DataFrame()
+        return self._experiments[
+            self._experiments['id'].isin(experiment_ids)
+        ].copy()
+
+    def fetch_kafka_topic(self, topic: str) -> pd.DataFrame:
+        return self._kafka_data.get(topic, pd.DataFrame()).copy()
+
+
+@pytest.fixture
+def mock_products():
+    """Standard product catalog fixture with realistic IDs from test data"""
+    return pd.DataFrame({
+        'id': [
+            'd018efc1-25e9-4284-b276-80386e048b25',
+            '51266ddb-5b07-47b7-89ee-5b5cae94bb11',
+            '2cd7f756-fc65-4ba0-ab01-74521c1fff43'
+        ],
+        'name': ['Junior Suite', 'Superior Room', 'Deluxe Room'],
+        'base_price': [200.0, 150.0, 180.0]
+    })
+
+
+@pytest.fixture
+def mock_interactions_raw_kafka():
+    """Raw Kafka message structure for interactions, matches production format"""
+    return [
+        {
+            'partitionID': 0, 'offset': 203, 'timestamp': 1764102082676,
+            'value': {
+                'payload': {
+                    'sessionId': 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',
+                    'experimentId': '53aefd07-f66a-4d7f-ba8b-7ea1fc562d35',
+                    'eventName': 'learn_more_about_item',
+                    'page': '/hotel/products/d018efc1-25e9-4284-b276-80386e048b25',
+                    'productId': 'd018efc1-25e9-4284-b276-80386e048b25',
+                    'metadata': {'type': 'hotel', 'dateIndex': 1, 'roomType': 'Junior Suite'},
+                    'storeMode': 'hotel',
+                    'ts': '2025-11-25T20:21:22.674Z'
+                }
+            }
+        },
+        {
+            'partitionID': 0, 'offset': 204, 'timestamp': 1764102086982,
+            'value': {
+                'payload': {
+                    'sessionId': 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',
+                    'experimentId': '53aefd07-f66a-4d7f-ba8b-7ea1fc562d35',
+                    'eventName': 'page_view',
+                    'page': '/hotel/products',
+                    'productId': None,
+                    'metadata': {'referrer': ''},
+                    'storeMode': 'hotel',
+                    'ts': '2025-11-25T20:21:26.947Z'
+                }
+            }
+        },
+        {
+            'partitionID': 0, 'offset': 205, 'timestamp': 1764102091825,
+            'value': {
+                'payload': {
+                    'sessionId': 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',
+                    'experimentId': '53aefd07-f66a-4d7f-ba8b-7ea1fc562d35',
+                    'eventName': 'hover_over_title',
+                    'page': '/hotel/products',
+                    'productId': '51266ddb-5b07-47b7-89ee-5b5cae94bb11',
+                    'metadata': {'elementText': 'Superior Room', 'dateIndex': 1, 'dwellTime': 1200},
+                    'storeMode': 'hotel',
+                    'ts': '2025-11-25T20:21:31.823Z'
+                }
+            }
+        },
+        {
+            'partitionID': 0, 'offset': 206, 'timestamp': 1764102094193,
+            'value': {
+                'payload': {
+                    'sessionId': 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',
+                    'experimentId': 'bbbbcccc-dddd-eeee-ffff-000011112222',
+                    'eventName': 'hover_over_paragraph',
+                    'page': '/hotel/products',
+                    'productId': '51266ddb-5b07-47b7-89ee-5b5cae94bb11',
+                    'metadata': {'elementText': 'price', 'dateIndex': 1, 'dwellTime': 1307},
+                    'storeMode': 'hotel',
+                    'ts': '2025-11-25T20:21:34.191Z'
+                }
+            }
+        },
+        {
+            'partitionID': 0, 'offset': 207, 'timestamp': 1764102101970,
+            'value': {
+                'payload': {
+                    'sessionId': 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',
+                    'experimentId': 'bbbbcccc-dddd-eeee-ffff-000011112222',
+                    'eventName': 'hover_over_paragraph',
+                    'page': '/hotel/products',
+                    'productId': 'd018efc1-25e9-4284-b276-80386e048b25',
+                    'metadata': {'elementText': 'price', 'dateIndex': 1, 'dwellTime': 1201},
+                    'storeMode': 'hotel',
+                    'ts': '2025-11-25T20:21:41.967Z'
+                }
+            }
+        }
+    ]
+
+
+@pytest.fixture
+def mock_interactions(mock_interactions_raw_kafka):
+    """Processed interaction DataFrame (what provider.fetch_kafka_topic returns)"""
+    records = [msg['value']['payload'] for msg in mock_interactions_raw_kafka]
+    df = pd.DataFrame(records)
+    df['timestamp'] = pd.to_datetime(df['ts'])
+    return df
+
+
+@pytest.fixture
+def mock_price_logs_raw_kafka():
+    """Raw Kafka message structure for price logs, matches production format"""
+    return [
+        {
+            'partitionID': 0, 'offset': 32, 'timestamp': 1764104757969,
+            'value': {
+                'payload': {
+                    'productId': '2cd7f756-fc65-4ba0-ab01-74521c1fff43',
+                    'price': 162.47,
+                    'sessionId': 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',
+                    'experimentId': '53aefd07-f66a-4d7f-ba8b-7ea1fc562d35',
+                    'storeMode': 'shop',
+                    'ts': '2025-11-25T21:05:57.967Z'
+                }
+            }
+        },
+        {
+            'partitionID': 0, 'offset': 33, 'timestamp': 1764104757995,
+            'value': {
+                'payload': {
+                    'productId': '2ddabbfc-4127-48fc-86dc-ebc4c677efa2',
+                    'price': 743.49,
+                    'sessionId': 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',
+                    'experimentId': '53aefd07-f66a-4d7f-ba8b-7ea1fc562d35',
+                    'storeMode': 'shop',
+                    'ts': '2025-11-25T21:05:57.993Z'
+                }
+            }
+        },
+        {
+            'partitionID': 0, 'offset': 34, 'timestamp': 1764104758011,
+            'value': {
+                'payload': {
+                    'productId': '2cd7f756-fc65-4ba0-ab01-74521c1fff43',
+                    'price': 163.87,
+                    'sessionId': 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',
+                    'experimentId': '53aefd07-f66a-4d7f-ba8b-7ea1fc562d35',
+                    'storeMode': 'shop',
+                    'ts': '2025-11-25T21:05:58.009Z'
+                }
+            }
+        },
+        {
+            'partitionID': 0, 'offset': 35, 'timestamp': 1764104758050,
+            'value': {
+                'payload': {
+                    'productId': '2ddabbfc-4127-48fc-86dc-ebc4c677efa2',
+                    'price': 397.46,
+                    'sessionId': 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',
+                    'experimentId': '53aefd07-f66a-4d7f-ba8b-7ea1fc562d35',
+                    'storeMode': 'shop',
+                    'ts': '2025-11-25T21:05:58.049Z'
+                }
+            }
+        },
+        {
+            'partitionID': 0, 'offset': 36, 'timestamp': 1764104768865,
+            'value': {
+                'payload': {
+                    'productId': '2cd7f756-fc65-4ba0-ab01-74521c1fff43',
+                    'price': 401.66,
+                    'sessionId': 'd423ce8a-77aa-4c9a-94d4-d1adddcc3472',
+                    'experimentId': '53aefd07-f66a-4d7f-ba8b-7ea1fc562d35',
+                    'storeMode': 'shop',
+                    'ts': '2025-11-25T21:06:08.864Z'
+                }
+            }
+        }
+    ]
+
+
+@pytest.fixture
+def mock_price_logs(mock_price_logs_raw_kafka):
+    """Processed price logs DataFrame (what provider.fetch_kafka_topic returns)"""
+    # extract payloads and flatten
+    records = [msg['value']['payload'] for msg in mock_price_logs_raw_kafka]
+    df = pd.DataFrame(records)
+    df['timestamp'] = pd.to_datetime(df['ts'])
+    return df
+
+
+@pytest.fixture
+def mock_experiments():
+    """Standard experiment metadata fixture matching Supabase schema"""
+    return pd.DataFrame({
+        'id': ['53aefd07-f66a-4d7f-ba8b-7ea1fc562d35', 'bbbbcccc-dddd-eeee-ffff-000011112222'],
+        'created_at': pd.to_datetime(['2025-11-25T20:00:00Z', '2025-11-26T10:00:00Z']),
+        'subject_name': ['Session A', 'Session B'],
+        'xp_human_only': [True, False],
+        'xp_market_mode': ['hotel', 'shop'],
+        'xp_task_id': [None, None]
+    })
+
+
+@pytest.fixture
+def mock_provider(mock_products, mock_experiments, mock_interactions, mock_price_logs):
+    """Fully configured mock provider"""
+    return MockProvider(
+        products_df=mock_products,
+        experiments_df=mock_experiments,
+        kafka_data={
+            'user-interactions': mock_interactions,
+            'price-logs': mock_price_logs
+        }
+    )
+
+
+@pytest.fixture
+def pipeline_context(mock_provider):
+    """Standard pipeline context for testing"""
+    return PipelineContext(
+        provider=mock_provider,
+        store_mode='hotel',
+        window_size='30s',
+        n_price_buckets=3
+    )
+
+
+@pytest.fixture
+def empty_provider():
+    """Provider with no data, for edge case testing"""
+    return MockProvider(
+        products_df=pd.DataFrame(columns=['id', 'name', 'base_price']),
+        experiments_df=pd.DataFrame(columns=['id', 'created_at', 'subject_name', 'xp_human_only', 'xp_market_mode', 'xp_task_id']),
+        kafka_data={'user-interactions': pd.DataFrame(), 'price-logs': pd.DataFrame()}
+    )
+
+
+@pytest.fixture
+def empty_context(empty_provider):
+    """Context with empty provider"""
+    return PipelineContext(
+        provider=empty_provider,
+        store_mode='hotel',
+        window_size='30s'
+    )
--- a/experiments/procesing/tests/test_augement.py
+++ b/experiments/procesing/tests/test_augement.py
@@ -0,0 +1,45 @@
+import pytest
+import random
+import pandas as pd
+from procesing.steps import (
+    CreatePriceBucketsStep,
+    AugmentEventNamesStep
+)
+
+def test_bucketing(pipeline_context):
+    step = CreatePriceBucketsStep(context=pipeline_context)
+
+    # Test with normal price data
+    df = pd.DataFrame({
+        'metadata_price': random.sample(range(10, 1000), 100)
+    })
+    result = step.transform(df)
+    assert 'price_bucket' in result.columns
+    # test if is categorical
+    assert isinstance(result['price_bucket'].dtype, pd.CategoricalDtype)
+    assert result['price_bucket'].nunique() == 3 # as per context config
+    # distribution check
+    counts = result['price_bucket'].value_counts()
+    assert all(counts > 0)
+    assert counts.max() - counts.min() <= 10  # roughly equal distribution for 100 samples
+    # Test with empty DataFrame
+    df = pd.DataFrame()
+    result = step.transform(df)
+    assert 'price_bucket' in result.columns
+    assert result.empty
+
+
+def test_augment_names(pipeline_context):
+    df = pd.DataFrame({
+        'eventName': ['click', 'view', 'purchase'],
+        'productId': ['prod_1', 'prod_2', None],
+        'price_bucket': ['PB_1', None, 'PB_3']
+    })
+    step = AugmentEventNamesStep(context=pipeline_context)
+    result = step.transform(df)
+    expected_event_names = [
+        'click_prod_1@PB_1',
+        'view',
+        'purchase'
+    ]
+    assert result['eventName'].tolist() == expected_event_names
--- a/experiments/procesing/tests/test_demand.py
+++ b/experiments/procesing/tests/test_demand.py
@@ -0,0 +1,49 @@
+import pytest
+import random
+import pandas as pd
+from procesing.steps import (
+    ComputeDemandStep
+)
+
+def test_compute_demand(pipeline_context):
+    step = ComputeDemandStep(context=pipeline_context)
+
+    # Test with normal interaction data
+    df = pd.DataFrame({
+        'ts': pd.date_range(start='2023-01-01', periods=100, freq='h'),
+        'productId': random.choices([
+            'd018efc1-25e9-4284-b276-80386e048b25',
+            '51266ddb-5b07-47b7-89ee-5b5cae94bb11',
+            '2cd7f756-fc65-4ba0-ab01-74521c1fff43'
+        ], k=100),
+        'eventName': random.choices(['view', 'click', 'purchase'], k=100)
+    })
+    result = step.transform(df)
+    assert type(result) == pd.DataFrame
+    assert not result.empty
+    assert set(result['productId']) == set(pipeline_context.products['id'])
+    assert all(result['demand_score'] > 100/3 -10)
+
+
+def test_compute_demand_skewed(pipeline_context):
+    step = ComputeDemandStep(context=pipeline_context)
+
+    # Test with normal interaction data
+    df = pd.DataFrame({
+        'ts': pd.date_range(start='2023-01-01', periods=100, freq='h'),
+        'productId': random.choices([
+            'd018efc1-25e9-4284-b276-80386e048b25',
+            '51266ddb-5b07-47b7-89ee-5b5cae94bb11',
+            '2cd7f756-fc65-4ba0-ab01-74521c1fff43'
+        ], weights=[0.7, 0.2, 0.1], k=100),
+        'eventName': random.choices(['view', 'click', 'purchase'], k=100)
+    })
+    result = step.transform(df)
+    assert type(result) == pd.DataFrame
+    assert not result.empty
+    assert set(result['productId']) == set(pipeline_context.products['id'])
+    # test for skewness
+    scores = result.set_index('productId')['demand_score'].to_dict()
+    assert scores['d018efc1-25e9-4284-b276-80386e048b25'] > \
+           scores['51266ddb-5b07-47b7-89ee-5b5cae94bb11'] > \
+           scores['2cd7f756-fc65-4ba0-ab01-74521c1fff43']
--- a/experiments/procesing/tests/test_elasticity.py
+++ b/experiments/procesing/tests/test_elasticity.py
@@ -0,0 +1,353 @@
+import pytest
+import pandas as pd
+import numpy as np
+from procesing.steps import (
+    AggregatePriceLogsStep,
+    ComputeElasticityStep
+)
+
+
+def test_aggregate_price_logs_basic(pipeline_context):
+    """Test basic price aggregation into time windows"""
+    step = AggregatePriceLogsStep(pipeline_context)
+
+    # Create price logs with known window structure
+    df = pd.DataFrame({
+        'ts': pd.date_range(start='2023-01-01 10:00:00', periods=100, freq='10s'),
+        'productId': np.tile([
+            'd018efc1-25e9-4284-b276-80386e048b25',
+            '51266ddb-5b07-47b7-89ee-5b5cae94bb11',
+            '2cd7f756-fc65-4ba0-ab01-74521c1fff43'
+        ], 34)[:100],
+        'price': np.random.uniform(100, 200, 100)
+    })
+
+    result = step.transform(df)
+    assert isinstance(result, list)
+    assert len(result) > 0
+    # each chunk should have window metadata and price vector
+    for chunk in result:
+        assert 'window_start' in chunk
+        assert 'window_end' in chunk
+        assert 'price_vector' in chunk
+        assert isinstance(chunk['price_vector'], pd.DataFrame)
+        assert 'productId' in chunk['price_vector'].columns
+        assert 'price' in chunk['price_vector'].columns
+
+
+def test_aggregate_price_logs_handles_gaps(pipeline_context):
+    """Test that price aggregation forward-fills missing windows"""
+    step = AggregatePriceLogsStep(pipeline_context)
+
+    # create sparse data with gaps
+    df = pd.DataFrame({
+        'ts': pd.to_datetime([
+            '2023-01-01 10:00:00',
+            '2023-01-01 10:00:05',
+            '2023-01-01 10:02:00',  # gap of ~2 mins
+            '2023-01-01 10:02:30'
+        ]),
+        'productId': [
+            'd018efc1-25e9-4284-b276-80386e048b25',
+            'd018efc1-25e9-4284-b276-80386e048b25',
+            '51266ddb-5b07-47b7-89ee-5b5cae94bb11',
+            '51266ddb-5b07-47b7-89ee-5b5cae94bb11'
+        ],
+        'price': [100, 102, 150, 153]
+    })
+
+    result = step.transform(df)
+    assert isinstance(result, list)
+    # should have multiple windows despite gaps
+    assert len(result) >= 2
+
+
+def test_compute_elasticity_with_known_relationship(pipeline_context):
+    """Test elasticity computation with known price-demand relationship"""
+    step = ComputeElasticityStep(pipeline_context)
+
+    # simulate elastic demand: when price ↑10%, demand ↓15% (elasticity ~ -1.5)
+    base_price = 100
+    base_demand = 50
+
+    demand_chunks = [
+        {
+            'window_start': pd.Timestamp('2023-01-01 10:00:00'),
+            'window_end': pd.Timestamp('2023-01-01 10:00:30'),
+            'demand_vector': pd.DataFrame({
+                'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
+                'demand_score': [base_demand]
+            })
+        },
+        {
+            'window_start': pd.Timestamp('2023-01-01 10:00:30'),
+            'window_end': pd.Timestamp('2023-01-01 10:01:00'),
+            'demand_vector': pd.DataFrame({
+                'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
+                'demand_score': [base_demand * 0.85]  # 15% decrease
+            })
+        },
+        {
+            'window_start': pd.Timestamp('2023-01-01 10:01:00'),
+            'window_end': pd.Timestamp('2023-01-01 10:01:30'),
+            'demand_vector': pd.DataFrame({
+                'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
+                'demand_score': [base_demand * 0.70]  # further decrease
+            })
+        }
+    ]
+
+    price_chunks = [
+        {
+            'window_start': pd.Timestamp('2023-01-01 10:00:00'),
+            'window_end': pd.Timestamp('2023-01-01 10:00:30'),
+            'price_vector': pd.DataFrame({
+                'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
+                'price': [base_price]
+            })
+        },
+        {
+            'window_start': pd.Timestamp('2023-01-01 10:00:30'),
+            'window_end': pd.Timestamp('2023-01-01 10:01:00'),
+            'price_vector': pd.DataFrame({
+                'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
+                'price': [base_price * 1.10]  # 10% increase
+            })
+        },
+        {
+            'window_start': pd.Timestamp('2023-01-01 10:01:00'),
+            'window_end': pd.Timestamp('2023-01-01 10:01:30'),
+            'price_vector': pd.DataFrame({
+                'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
+                'price': [base_price * 1.20]  # 20% increase
+            })
+        }
+    ]
+
+    result = step.transform((demand_chunks, price_chunks))
+    assert isinstance(result, pd.DataFrame)
+    assert not result.empty
+    assert 'productId' in result.columns
+    assert 'elasticity' in result.columns
+    assert 'n_obs' in result.columns
+
+    # check elasticity is negative (normal good)
+    product_elast = result[result['productId'] == 'd018efc1-25e9-4284-b276-80386e048b25']
+    assert len(product_elast) == 1
+    assert product_elast.iloc[0]['elasticity'] < 0
+    # should be roughly elastic (< -1)
+    assert product_elast.iloc[0]['n_obs'] == 3
+
+
+def test_compute_elasticity_inelastic_product(pipeline_context):
+    """Test with inelastic demand: price changes, demand barely moves"""
+    step = ComputeElasticityStep(pipeline_context)
+
+    base_price = 150
+    base_demand = 40
+
+    demand_chunks = [
+        {
+            'window_start': pd.Timestamp('2023-01-01 10:00:00'),
+            'window_end': pd.Timestamp('2023-01-01 10:00:30'),
+            'demand_vector': pd.DataFrame({
+                'productId': ['51266ddb-5b07-47b7-89ee-5b5cae94bb11'],
+                'demand_score': [base_demand]
+            })
+        },
+        {
+            'window_start': pd.Timestamp('2023-01-01 10:00:30'),
+            'window_end': pd.Timestamp('2023-01-01 10:01:00'),
+            'demand_vector': pd.DataFrame({
+                'productId': ['51266ddb-5b07-47b7-89ee-5b5cae94bb11'],
+                'demand_score': [base_demand * 0.98]  # tiny 2% decrease
+            })
+        }
+    ]
+
+    price_chunks = [
+        {
+            'window_start': pd.Timestamp('2023-01-01 10:00:00'),
+            'window_end': pd.Timestamp('2023-01-01 10:00:30'),
+            'price_vector': pd.DataFrame({
+                'productId': ['51266ddb-5b07-47b7-89ee-5b5cae94bb11'],
+                'price': [base_price]
+            })
+        },
+        {
+            'window_start': pd.Timestamp('2023-01-01 10:00:30'),
+            'window_end': pd.Timestamp('2023-01-01 10:01:00'),
+            'price_vector': pd.DataFrame({
+                'productId': ['51266ddb-5b07-47b7-89ee-5b5cae94bb11'],
+                'price': [base_price * 1.20]  # 20% increase
+            })
+        }
+    ]
+
+    result = step.transform((demand_chunks, price_chunks))
+    product_elast = result[result['productId'] == '51266ddb-5b07-47b7-89ee-5b5cae94bb11']
+    assert len(product_elast) == 1
+    # inelastic: elasticity between 0 and -1
+    assert -1 < product_elast.iloc[0]['elasticity'] < 0
+
+
+def test_compute_elasticity_multiple_products(pipeline_context):
+    """Test elasticity computation across multiple products simultaneously"""
+    step = ComputeElasticityStep(pipeline_context)
+
+    products = [
+        'd018efc1-25e9-4284-b276-80386e048b25',
+        '51266ddb-5b07-47b7-89ee-5b5cae94bb11',
+        '2cd7f756-fc65-4ba0-ab01-74521c1fff43'
+    ]
+
+    # create 5 time windows with all 3 products
+    demand_chunks = []
+    price_chunks = []
+
+    for i in range(5):
+        ts = pd.Timestamp('2023-01-01 10:00:00') + pd.Timedelta(f'{i*30}s')
+
+        demand_chunks.append({
+            'window_start': ts,
+            'window_end': ts + pd.Timedelta('30s'),
+            'demand_vector': pd.DataFrame({
+                'productId': products,
+                'demand_score': [
+                    50 * (0.9 ** i),  # elastic: decreases as price rises
+                    40 * (0.98 ** i), # inelastic: barely changes
+                    30 * (0.85 ** i)  # very elastic
+                ]
+            })
+        })
+
+        price_chunks.append({
+            'window_start': ts,
+            'window_end': ts + pd.Timedelta('30s'),
+            'price_vector': pd.DataFrame({
+                'productId': products,
+                'price': [
+                    100 * (1.05 ** i),
+                    150 * (1.10 ** i),
+                    120 * (1.08 ** i)
+                ]
+            })
+        })
+
+    result = step.transform((demand_chunks, price_chunks))
+    assert isinstance(result, pd.DataFrame)
+    assert len(result) == 3  # all products should have elasticity
+    assert set(result['productId']) == set(products)
+    assert all(result['n_obs'] == 5)
+    assert all(result['elasticity'] < 0)  # all normal goods
+
+
+def test_compute_elasticity_insufficient_data(pipeline_context):
+    """Test behavior with insufficient observations"""
+    step = ComputeElasticityStep(pipeline_context)
+
+    # only 1 observation
+    demand_chunks = [{
+        'window_start': pd.Timestamp('2023-01-01 10:00:00'),
+        'window_end': pd.Timestamp('2023-01-01 10:00:30'),
+        'demand_vector': pd.DataFrame({
+            'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
+            'demand_score': [50]
+        })
+    }]
+
+    price_chunks = [{
+        'window_start': pd.Timestamp('2023-01-01 10:00:00'),
+        'window_end': pd.Timestamp('2023-01-01 10:00:30'),
+        'price_vector': pd.DataFrame({
+            'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
+            'price': [100]
+        })
+    }]
+
+    result = step.transform((demand_chunks, price_chunks))
+    # should still return result but with low n_obs
+    product_elast = result[result['productId'] == 'd018efc1-25e9-4284-b276-80386e048b25']
+    assert len(product_elast) == 1
+    assert product_elast.iloc[0]['n_obs'] == 1
+    assert product_elast.iloc[0]['elasticity'] == 0.0  # not enough data
+
+
+def test_compute_elasticity_misaligned_chunks(pipeline_context):
+    """Test with non-overlapping demand and price windows"""
+    step = ComputeElasticityStep(pipeline_context)
+
+    demand_chunks = [{
+        'window_start': pd.Timestamp('2023-01-01 10:00:00'),
+        'window_end': pd.Timestamp('2023-01-01 10:00:30'),
+        'demand_vector': pd.DataFrame({
+            'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
+            'demand_score': [50]
+        })
+    }]
+
+    price_chunks = [{
+        'window_start': pd.Timestamp('2023-01-01 11:00:00'),  # different time
+        'window_end': pd.Timestamp('2023-01-01 11:00:30'),
+        'price_vector': pd.DataFrame({
+            'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
+            'price': [100]
+        })
+    }]
+
+    result = step.transform((demand_chunks, price_chunks))
+    # should handle gracefully with no aligned data
+    assert isinstance(result, pd.DataFrame)
+    assert all(result['n_obs'] == 0)
+
+
+def test_elasticity_arc_method(pipeline_context):
+    """Test arc elasticity computation method"""
+    # configure context for arc method
+    pipeline_context.config['elasticity_method'] = 'arc'
+    step = ComputeElasticityStep(pipeline_context)
+
+    demand_chunks = [
+        {
+            'window_start': pd.Timestamp('2023-01-01 10:00:00'),
+            'window_end': pd.Timestamp('2023-01-01 10:00:30'),
+            'demand_vector': pd.DataFrame({
+                'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
+                'demand_score': [100]
+            })
+        },
+        {
+            'window_start': pd.Timestamp('2023-01-01 10:00:30'),
+            'window_end': pd.Timestamp('2023-01-01 10:01:00'),
+            'demand_vector': pd.DataFrame({
+                'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
+                'demand_score': [80]
+            })
+        }
+    ]
+
+    price_chunks = [
+        {
+            'window_start': pd.Timestamp('2023-01-01 10:00:00'),
+            'window_end': pd.Timestamp('2023-01-01 10:00:30'),
+            'price_vector': pd.DataFrame({
+                'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
+                'price': [100]
+            })
+        },
+        {
+            'window_start': pd.Timestamp('2023-01-01 10:00:30'),
+            'window_end': pd.Timestamp('2023-01-01 10:01:00'),
+            'price_vector': pd.DataFrame({
+                'productId': ['d018efc1-25e9-4284-b276-80386e048b25'],
+                'price': [110]
+            })
+        }
+    ]
+
+    result = step.transform((demand_chunks, price_chunks))
+    product_elast = result[result['productId'] == 'd018efc1-25e9-4284-b276-80386e048b25']
+    assert len(product_elast) == 1
+    assert product_elast.iloc[0]['elasticity'] < 0
+    # reset config
+    pipeline_context.config['elasticity_method'] = 'point'
--- a/experiments/procesing/tests/test_fetch.py
+++ b/experiments/procesing/tests/test_fetch.py
@@ -0,0 +1,51 @@
+import pytest
+import pandas as pd
+from procesing.steps import (
+    FetchInteractionsStep,
+    FetchPriceLogsStep,
+    FetchExperimentsStep,
+)
+
+
+def test_fetch_interactions_data(pipeline_context):
+    step = FetchInteractionsStep(pipeline_context)
+    data = step.transform(None)
+    assert data is not None
+    assert isinstance(data, pd.DataFrame)
+    expected_cols = [
+        "eventName",
+        "dateIndex",
+        "experimentId",
+        "storeMode",
+        "metadata_elementText"
+    ]
+    for expected in expected_cols:
+        assert expected in data.columns
+
+def test_fetch_price_logs(pipeline_context):
+    step = FetchPriceLogsStep(pipeline_context)
+    data = step.transform(None)
+    assert data is not None
+    assert isinstance(data, pd.DataFrame)
+    expected_cols = [
+        "price",
+        "productId"
+    ]
+    for expected in expected_cols:
+        assert expected in data.columns
+    prices = data['price'].to_list()
+    assert min(prices) >= 0
+    assert max(prices) <= 9999
+
+
+def test_experiments_fetching(pipeline_context):
+    interactions = FetchInteractionsStep(pipeline_context).transform(None)
+    assert interactions is not None
+    experiments = FetchExperimentsStep(pipeline_context)
+    experiment_data = experiments.transform(interactions)
+    assert experiment_data is not None
+    assert isinstance(experiment_data, pd.DataFrame)
+    assert not experiment_data.empty
+    assert 'id' in experiment_data.columns
+    assert len(experiment_data) == 2
+    assert '53aefd07-f66a-4d7f-ba8b-7ea1fc562d35' in experiment_data['id'].values
--- a/experiments/procesing/tests/test_pricing.py
+++ b/experiments/procesing/tests/test_pricing.py
@@ -0,0 +1,87 @@
+import pytest
+import pandas as pd
+
+from procesing.pricers import (
+    StaticPricer,
+    RandomPricer,
+    ElasticityBasedPricer
+)
+
+
+def test_static_pricer_fit_and_predict():
+    # Sample historical data
+    historical_data = pd.DataFrame({
+        'product_id': [1, 2, 3],
+        'base_price': [100.0, 150.0, 200.0]
+    })
+
+    # Initialize and fit StaticPricer
+    pricer = StaticPricer()
+    pricer.fit(historical_data)
+
+    # Predict prices
+    predicted_prices = pricer.predict(None)
+
+    # Assert that predicted prices match base prices
+    expected_prices = historical_data['base_price'].values
+    assert all(predicted_prices == expected_prices), "Predicted prices do not match base prices"
+
+
+def test_random_pricer_fit_and_predict():
+    # Sample historical data
+    historical_data = pd.DataFrame({
+        'product_id': [1, 2, 3],
+        'base_price': [100.0, 150.0, 200.0]
+    })
+
+    # Initialize and fit RandomPricer
+    pricer = RandomPricer(price_min=50.0, price_max=250.0, seed=42)
+    pricer.fit(historical_data)
+
+    # Predict prices
+    predicted_prices = pricer.predict(None)
+
+    # Assert that predicted prices are within bounds
+    assert predicted_prices.min() >= 50.0, "Predicted prices are below minimum bound"
+    assert predicted_prices.max() <= 250.0, "Predicted prices are above maximum bound"
+    # distribution check (not so strict)
+    assert len(set(predicted_prices)) > 1, "Predicted prices are not varied enough"
+    assert len(predicted_prices) == len(historical_data), "Number of predicted prices does not match number of products"
+
+def test_elasticity_based_pricer_fit_and_predict():
+    # Sample historical data
+    historical_data = pd.DataFrame({
+        'productId': [1, 2, 3],
+        'elasticity': [-1.5, -0.5, -2.0],
+        'base_price': [100.0, 150.0, 200.0],
+        'mean_demand': [10, 20, 15]
+    })
+
+    # Initialize and fit ElasticityBasedPricer
+    pricer = ElasticityBasedPricer(alpha=0.1, price_floor=50.0, price_ceil=300.0)
+    pricer.fit(historical_data)
+
+    # Create a mock state space with demand deviations
+    class MockStateSpace:
+        def __init__(self, demand):
+            self.demand = demand
+
+    # Simulate demand higher than mean for all products
+    state_space = MockStateSpace(demand=[15, 25, 20])
+
+    # Predict prices
+    predicted_prices = pricer.predict(state_space)
+
+    # Assert that predicted prices are within bounds
+    assert predicted_prices.min() >= 50.0, "Predicted prices are below minimum bound"
+    assert predicted_prices.max() <= 300.0, "Predicted prices are above maximum bound"
+    assert len(predicted_prices) == len(historical_data), "Number of predicted prices does not match number of products"
+
+    # now we gotta check semantic validity
+    # since demand is higher than mean, prices should generally increase
+    for i, row in historical_data.iterrows():
+        base_price = row['base_price']
+        elasticity = row['elasticity']
+        expected_increase = base_price * (1 + 0.1 * abs(elasticity) * ((state_space.demand[i] - row['mean_demand']) / row['mean_demand']))
+        assert predicted_prices[i] >= base_price, f"Predicted price for product {row['productId']} did not increase as expected"
+        assert abs(predicted_prices[i] - expected_increase) < 1e-5, f"Predicted price for product {row['productId']} does not match expected calculation within 1e-5 tolerance"