mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 16:43:36 +00:00
* introducing airflow to run pipeline * chore: updating dag with upload to registry * introducing complete provider (non refactored and noisy) * chore: removing old shit * generic pricing baselines * feature: super simple model registry (to be updated maybe third party OS software) * chore: refactoring the providers docker config and requirements * chore: refactored and broke down components (braking * exporting all * local pipeline excution working * fix: fixing import structures from nonrelativistic * chore: enables cross comm pickling with fully e2e pipeline compilation * docs: what the pipeline is like now * pipelines local running and pipeline high level definition * cleaning old pipeline and vectorization * leaked but fixing, not so important * test: started with pipeline step testing * chore: cleaning up provider of prices * test: extra tests wit hsemantic meaning checks * migrating pricers * feature: introducing pricing predictors (pricers) * chore: e2e is done with new pipeline * extra session feature extraction * feature: experiemntal sessin pricer and metrics(vibe) * chore: redefined and connected pricers (#29)
52 lines
1.5 KiB
Python
52 lines
1.5 KiB
Python
import pytest
|
|
import pandas as pd
|
|
from procesing.steps import (
|
|
FetchInteractionsStep,
|
|
FetchPriceLogsStep,
|
|
FetchExperimentsStep,
|
|
)
|
|
|
|
|
|
def test_fetch_interactions_data(pipeline_context):
|
|
step = FetchInteractionsStep(pipeline_context)
|
|
data = step.transform(None)
|
|
assert data is not None
|
|
assert isinstance(data, pd.DataFrame)
|
|
expected_cols = [
|
|
"eventName",
|
|
"dateIndex",
|
|
"experimentId",
|
|
"storeMode",
|
|
"metadata_elementText"
|
|
]
|
|
for expected in expected_cols:
|
|
assert expected in data.columns
|
|
|
|
def test_fetch_price_logs(pipeline_context):
|
|
step = FetchPriceLogsStep(pipeline_context)
|
|
data = step.transform(None)
|
|
assert data is not None
|
|
assert isinstance(data, pd.DataFrame)
|
|
expected_cols = [
|
|
"price",
|
|
"productId"
|
|
]
|
|
for expected in expected_cols:
|
|
assert expected in data.columns
|
|
prices = data['price'].to_list()
|
|
assert min(prices) >= 0
|
|
assert max(prices) <= 9999
|
|
|
|
|
|
def test_experiments_fetching(pipeline_context):
|
|
interactions = FetchInteractionsStep(pipeline_context).transform(None)
|
|
assert interactions is not None
|
|
experiments = FetchExperimentsStep(pipeline_context)
|
|
experiment_data = experiments.transform(interactions)
|
|
assert experiment_data is not None
|
|
assert isinstance(experiment_data, pd.DataFrame)
|
|
assert not experiment_data.empty
|
|
assert 'id' in experiment_data.columns
|
|
assert len(experiment_data) == 2
|
|
assert '53aefd07-f66a-4d7f-ba8b-7ea1fc562d35' in experiment_data['id'].values
|