mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 08:33:36 +00:00
Airflow addition (#28)
* introducing airflow to run pipeline * chore: updating dag with upload to registry * introducing complete provider (non refactored and noisy) * chore: removing old shit * generic pricing baselines * feature: super simple model registry (to be updated maybe third party OS software) * chore: refactoring the providers docker config and requirements * chore: refactored and broke down components (braking * exporting all * local pipeline excution working * fix: fixing import structures from nonrelativistic * chore: enables cross comm pickling with fully e2e pipeline compilation * docs: what the pipeline is like now * pipelines local running and pipeline high level definition * cleaning old pipeline and vectorization * leaked but fixing, not so important * test: started with pipeline step testing * chore: cleaning up provider of prices * test: extra tests wit hsemantic meaning checks * migrating pricers * feature: introducing pricing predictors (pricers) * chore: e2e is done with new pipeline * extra session feature extraction * feature: experiemntal sessin pricer and metrics(vibe) * chore: redefined and connected pricers (#29)
This commit is contained in:
committed by
GitHub
parent
2a0e44ab24
commit
ad9423bf59
49
experiments/procesing/tests/test_demand.py
Normal file
49
experiments/procesing/tests/test_demand.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import pytest
|
||||
import random
|
||||
import pandas as pd
|
||||
from procesing.steps import (
|
||||
ComputeDemandStep
|
||||
)
|
||||
|
||||
def test_compute_demand(pipeline_context):
|
||||
step = ComputeDemandStep(context=pipeline_context)
|
||||
|
||||
# Test with normal interaction data
|
||||
df = pd.DataFrame({
|
||||
'ts': pd.date_range(start='2023-01-01', periods=100, freq='h'),
|
||||
'productId': random.choices([
|
||||
'd018efc1-25e9-4284-b276-80386e048b25',
|
||||
'51266ddb-5b07-47b7-89ee-5b5cae94bb11',
|
||||
'2cd7f756-fc65-4ba0-ab01-74521c1fff43'
|
||||
], k=100),
|
||||
'eventName': random.choices(['view', 'click', 'purchase'], k=100)
|
||||
})
|
||||
result = step.transform(df)
|
||||
assert type(result) == pd.DataFrame
|
||||
assert not result.empty
|
||||
assert set(result['productId']) == set(pipeline_context.products['id'])
|
||||
assert all(result['demand_score'] > 100/3 -10)
|
||||
|
||||
|
||||
def test_compute_demand_skewed(pipeline_context):
|
||||
step = ComputeDemandStep(context=pipeline_context)
|
||||
|
||||
# Test with normal interaction data
|
||||
df = pd.DataFrame({
|
||||
'ts': pd.date_range(start='2023-01-01', periods=100, freq='h'),
|
||||
'productId': random.choices([
|
||||
'd018efc1-25e9-4284-b276-80386e048b25',
|
||||
'51266ddb-5b07-47b7-89ee-5b5cae94bb11',
|
||||
'2cd7f756-fc65-4ba0-ab01-74521c1fff43'
|
||||
], weights=[0.7, 0.2, 0.1], k=100),
|
||||
'eventName': random.choices(['view', 'click', 'purchase'], k=100)
|
||||
})
|
||||
result = step.transform(df)
|
||||
assert type(result) == pd.DataFrame
|
||||
assert not result.empty
|
||||
assert set(result['productId']) == set(pipeline_context.products['id'])
|
||||
# test for skewness
|
||||
scores = result.set_index('productId')['demand_score'].to_dict()
|
||||
assert scores['d018efc1-25e9-4284-b276-80386e048b25'] > \
|
||||
scores['51266ddb-5b07-47b7-89ee-5b5cae94bb11'] > \
|
||||
scores['2cd7f756-fc65-4ba0-ab01-74521c1fff43']
|
||||
Reference in New Issue
Block a user