mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 08:33:36 +00:00
Airflow addition (#28)
* introducing airflow to run pipeline * chore: updating dag with upload to registry * introducing complete provider (non refactored and noisy) * chore: removing old shit * generic pricing baselines * feature: super simple model registry (to be updated maybe third party OS software) * chore: refactoring the providers docker config and requirements * chore: refactored and broke down components (braking * exporting all * local pipeline excution working * fix: fixing import structures from nonrelativistic * chore: enables cross comm pickling with fully e2e pipeline compilation * docs: what the pipeline is like now * pipelines local running and pipeline high level definition * cleaning old pipeline and vectorization * leaked but fixing, not so important * test: started with pipeline step testing * chore: cleaning up provider of prices * test: extra tests wit hsemantic meaning checks * migrating pricers * feature: introducing pricing predictors (pricers) * chore: e2e is done with new pipeline * extra session feature extraction * feature: experiemntal sessin pricer and metrics(vibe) * chore: redefined and connected pricers (#29)
This commit is contained in:
committed by
GitHub
parent
2a0e44ab24
commit
ad9423bf59
45
experiments/procesing/tests/test_augement.py
Normal file
45
experiments/procesing/tests/test_augement.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import pytest
|
||||
import random
|
||||
import pandas as pd
|
||||
from procesing.steps import (
|
||||
CreatePriceBucketsStep,
|
||||
AugmentEventNamesStep
|
||||
)
|
||||
|
||||
def test_bucketing(pipeline_context):
|
||||
step = CreatePriceBucketsStep(context=pipeline_context)
|
||||
|
||||
# Test with normal price data
|
||||
df = pd.DataFrame({
|
||||
'metadata_price': random.sample(range(10, 1000), 100)
|
||||
})
|
||||
result = step.transform(df)
|
||||
assert 'price_bucket' in result.columns
|
||||
# test if is categorical
|
||||
assert isinstance(result['price_bucket'].dtype, pd.CategoricalDtype)
|
||||
assert result['price_bucket'].nunique() == 3 # as per context config
|
||||
# distribution check
|
||||
counts = result['price_bucket'].value_counts()
|
||||
assert all(counts > 0)
|
||||
assert counts.max() - counts.min() <= 10 # roughly equal distribution for 100 samples
|
||||
# Test with empty DataFrame
|
||||
df = pd.DataFrame()
|
||||
result = step.transform(df)
|
||||
assert 'price_bucket' in result.columns
|
||||
assert result.empty
|
||||
|
||||
|
||||
def test_augment_names(pipeline_context):
|
||||
df = pd.DataFrame({
|
||||
'eventName': ['click', 'view', 'purchase'],
|
||||
'productId': ['prod_1', 'prod_2', None],
|
||||
'price_bucket': ['PB_1', None, 'PB_3']
|
||||
})
|
||||
step = AugmentEventNamesStep(context=pipeline_context)
|
||||
result = step.transform(df)
|
||||
expected_event_names = [
|
||||
'click_prod_1@PB_1',
|
||||
'view',
|
||||
'purchase'
|
||||
]
|
||||
assert result['eventName'].tolist() == expected_event_names
|
||||
Reference in New Issue
Block a user