Airflow addition (#28)

* introducing airflow to run pipeline

* chore: updating dag with upload to registry

* introducing complete provider (non refactored and noisy)

* chore: removing old shit

* generic pricing baselines

* feature: super simple model registry (to be updated maybe third party OS software)

* chore: refactoring the providers docker config and requirements

* chore: refactored and broke down components (braking

* exporting all

* local pipeline excution working

* fix: fixing import structures from nonrelativistic

* chore: enables cross comm pickling with fully e2e pipeline compilation

* docs: what the pipeline is like now

* pipelines local running and pipeline high level definition

* cleaning old pipeline and vectorization

* leaked but fixing, not so important

* test: started with pipeline step testing

* chore: cleaning up provider of prices

* test: extra tests wit hsemantic meaning checks

* migrating pricers

* feature: introducing pricing predictors (pricers)

* chore: e2e is done with new pipeline

* extra session feature extraction

* feature: experiemntal sessin pricer and metrics(vibe)

* chore: redefined and connected pricers (#29)
This commit is contained in:
Daniel Alves Rösel
2025-11-29 17:50:16 +01:00
committed by GitHub
parent 2a0e44ab24
commit ad9423bf59
49 changed files with 3642 additions and 619 deletions

View File

@@ -71,6 +71,153 @@ services:
- "${REDPANDA_CONSOLE_PORT:-8080}:8080"
restart: unless-stopped
postgres:
container_name: "PHANTOM-postgres"
image: postgres:13
environment:
- POSTGRES_USER=airflow
- POSTGRES_PASSWORD=airflow
- POSTGRES_DB=airflow
ports:
- "5433:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
restart: unless-stopped
airflow-init:
container_name: "PHANTOM-airflow-init"
build:
context: .
dockerfile: docker/Airflow.dockerfile
depends_on:
- postgres
environment:
- AIRFLOW__CORE__EXECUTOR=SequentialExecutor
- AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow
- AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY}
- AIRFLOW__CORE__LOAD_EXAMPLES=false
- AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true
- _AIRFLOW_DB_MIGRATE=true
- _AIRFLOW_WWW_USER_CREATE=true
- _AIRFLOW_WWW_USER_USERNAME=admin
- _AIRFLOW_WWW_USER_PASSWORD=admin
- REDIS_HOST=redis
- REDIS_PORT=6379
volumes:
- ./experiments/airflow/dags:/opt/airflow/dags
- ./experiments/airflow/logs:/opt/airflow/logs
- ./experiments/airflow/plugins:/opt/airflow/plugins
- ./experiments/procesing:/opt/airflow/procesing
- ./lib:/opt/airflow/lib
command: version
restart: "no"
airflow-webserver:
container_name: "PHANTOM-airflow-webserver"
build:
context: .
dockerfile: docker/Airflow.dockerfile
depends_on:
- postgres
- airflow-init
- redis
environment:
- AIRFLOW__CORE__EXECUTOR=SequentialExecutor
- AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow
- AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY}
- AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION=true
- AIRFLOW__CORE__LOAD_EXAMPLES=false
- AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true
- AIRFLOW__WEBSERVER__EXPOSE_CONFIG=true
- KAFKA_HOST=kafka
- KAFKA_PORT=29092
- BACKEND_URL=http://backend:5000
- NEXT_PUBLIC_SUPABASE_URL=${NEXT_PUBLIC_SUPABASE_URL}
- NEXT_PUBLIC_SUPABASE_ANON_KEY=${NEXT_PUBLIC_SUPABASE_ANON_KEY}
- REDIS_HOST=redis
- REDIS_PORT=6379
ports:
- "${AIRFLOW_WEBSERVER_PORT:-8085}:8080"
volumes:
- ./experiments/airflow/dags:/opt/airflow/dags:ro
- ./experiments/airflow/logs:/opt/airflow/logs
- ./experiments/airflow/plugins:/opt/airflow/plugins:ro
- ./experiments/procesing:/opt/airflow/procesing:ro
- ./lib:/opt/airflow/lib:ro
command: webserver
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "--fail", "http://localhost:8080/health"]
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
airflow-scheduler:
container_name: "PHANTOM-airflow-scheduler"
build:
context: .
dockerfile: docker/Airflow.dockerfile
depends_on:
airflow-webserver:
condition: service_healthy
redis:
condition: service_started
environment:
- AIRFLOW__CORE__EXECUTOR=SequentialExecutor
- AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow
- AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY}
- AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION=true
- AIRFLOW__CORE__LOAD_EXAMPLES=false
- AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true
- KAFKA_HOST=kafka
- KAFKA_PORT=29092
- BACKEND_URL=http://backend:5000
- NEXT_PUBLIC_SUPABASE_URL=${NEXT_PUBLIC_SUPABASE_URL}
- NEXT_PUBLIC_SUPABASE_ANON_KEY=${NEXT_PUBLIC_SUPABASE_ANON_KEY}
- REDIS_HOST=redis
- REDIS_PORT=6379
volumes:
- ./experiments/airflow/dags:/opt/airflow/dags:ro
- ./experiments/airflow/logs:/opt/airflow/logs
- ./experiments/airflow/plugins:/opt/airflow/plugins:ro
- ./experiments/procesing:/opt/airflow/procesing:ro
- ./lib:/opt/airflow/lib:ro
command: scheduler
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", 'airflow jobs check --job-type SchedulerJob --hostname "$${HOSTNAME}"']
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
pricing-provider:
container_name: "PHANTOM-pricing-provider"
build:
context: .
dockerfile: docker/Provider.dockerfile
depends_on:
- redis
- kafka
environment:
- PROVIDER_PORT=5001
- REDIS_HOST=redis
- REDIS_PORT=6379
- KAFKA_HOST=kafka
- KAFKA_PORT=29092
- NEXT_PUBLIC_SUPABASE_URL=${NEXT_PUBLIC_SUPABASE_URL}
- NEXT_PUBLIC_SUPABASE_ANON_KEY=${NEXT_PUBLIC_SUPABASE_ANON_KEY}
ports:
- "${PROVIDER_PORT:-5001}:5001"
volumes:
- ./lib:/app/lib:ro
- ./experiments/procesing:/app/procesing:ro
- ./backend/provider:/app/provider:ro
command: python -m uvicorn provider.app:app --host 0.0.0.0 --port 5001
restart: unless-stopped
volumes:
phantom_kafka_data:
phantom_redis_data:
postgres_data: