updating computation power graph

This commit is contained in:
2026-03-08 14:22:54 +01:00
parent 17c128cbc0
commit 28dbcacd95
6 changed files with 142 additions and 114 deletions

130
Makefile
View File

@@ -54,160 +54,84 @@ $(BUILDDIR):
mkdir -p paper/$(BUILDDIR)
.PHONY: pdf.build
pdf.build: $(BUILDDIR)
@bash paper/concat_code.sh
@cd $(SRCDIR) && \
$(LATEXMK) -pdf -jobname=$(JOBNAME) -f \
-interaction=nonstopmode -file-line-error \
-r ../.latexmkrc \
-outdir=../$(BUILDDIR) $(TEX)
pdf.build:
@$(NX) run paper:build
.PHONY: pdf.watch
pdf.watch: $(BUILDDIR)
@cd $(SRCDIR) && \
$(LATEXMK) -pvc -pdf -jobname=$(JOBNAME) -f \
-interaction=nonstopmode -file-line-error \
-r ../.latexmkrc \
-outdir=../$(BUILDDIR) $(TEX)
pdf.watch:
@$(NX) run paper:watch
.PHONY: pdf.clean
pdf.clean:
@cd $(SRCDIR) && \
$(LATEXMK) -C -jobname=$(JOBNAME) -outdir=../$(BUILDDIR) || true
rm -rf paper/$(BUILDDIR)/*
@$(NX) run paper:clean
.PHONY: test.backend
test.backend: $(VENV)
$(PYTEST) -v
test.backend:
@$(NX) run research:test
.PHONY: test.e2e
test.e2e:
@cd tests/e2e && npm install
@cd tests/e2e && npx playwright install chromium
@test -f tests/e2e/.env || cp tests/e2e/.env.example tests/e2e/.env
@timeout 30 bash -c 'until curl -sf http://localhost:5000/health > /dev/null 2>&1; do sleep 1; done' || (echo "Backend not ready" && exit 1)
@timeout 30 bash -c 'until curl -sf http://localhost:3000 > /dev/null 2>&1; do sleep 1; done' || (echo "Web app not ready" && exit 1)
@timeout 30 bash -c 'until curl -sf http://localhost:8085/health > /dev/null 2>&1; do sleep 1; done' || (echo "Airflow not ready" && exit 1)
@cd tests/e2e && npm test
@$(NX) run e2e:test
.PHONY: test.all
test.all: test.backend test.e2e
test.all:
@$(NX) run-many -t test --projects=research,e2e --parallel=1
.PHONY: web.dev
web.dev:
@cd web && npm install && npm run dev
@$(NX) run web:dev
$(VENV):
python3 -m venv $(VENV)
$(PIP) install --upgrade pip
.PHONY: install
install: $(VENV)
$(PIP) install -r requirements.txt
install:
@$(NX) run research:install
.PHONY: train
train: install
@$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1)
@$(SWEEP_ENV_LOAD); WANDB_API_KEY="$$WANDB_API_KEY" WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" \
$(PYTHON) -m engine.train $(LOCAL_TRAIN_ARGS)
train:
@WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" LOCAL_TRAIN_ARGS="$(LOCAL_TRAIN_ARGS)" $(NX) run research:train
.PHONY: train.agent
train.agent: install
@$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1)
@test -n "$(SWEEP_ID)" || (echo "SWEEP_ID required, e.g. SWEEP_ID=entity/project/id" && exit 1)
@$(SWEEP_ENV_LOAD); WANDB_API_KEY="$$WANDB_API_KEY" WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" \
$(PYTHON) -m engine.train --sweep-agent --sweep-id "$(SWEEP_ID)" \
$(if $(filter-out 0,$(AGENT_COUNT)),--count $(AGENT_COUNT),)
train.agent:
@WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" $(NX) run research:train-agent
.PHONY: train.bootstrap
train.bootstrap:
@$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1)
@$(SWEEP_ENV_LOAD); test -n "$$GITHUB_TOKEN" || (echo "GITHUB_TOKEN required — set it in $(SWEEP_ENV_FILE)" && exit 1)
@test -n "$(REPO_URL)" || (echo "REPO_URL required, e.g. REPO_URL=https://github.com/org/repo.git" && exit 1)
@test -n "$(SWEEP_ID)" || (echo "SWEEP_ID required, e.g. SWEEP_ID=entity/project/id" && exit 1)
@$(SWEEP_ENV_LOAD); \
WANDB_API_KEY="$$WANDB_API_KEY" \
WANDB_ENTITY="$(WANDB_ENTITY)" \
WANDB_PROJECT="$(WANDB_PROJECT)" \
GITHUB_TOKEN="$$GITHUB_TOKEN" \
REPO_URL="$(REPO_URL)" \
BRANCH="$(BRANCH)" \
WORKDIR="$(WORKDIR)" \
SWEEP_ID="$(SWEEP_ID)" \
AGENT_COUNT="$(AGENT_COUNT)" \
AGENT_LOOP="$(AGENT_LOOP)" \
RETRY_SECONDS="$(RETRY_SECONDS)" \
bash scripts/wandb_agent_bootstrap.sh
@WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" REPO_URL="$(REPO_URL)" BRANCH="$(BRANCH)" WORKDIR="$(WORKDIR)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" AGENT_LOOP="$(AGENT_LOOP)" RETRY_SECONDS="$(RETRY_SECONDS)" $(NX) run research:train-bootstrap
.PHONY: stats.lines
stats.lines:
@find . \( -path '*/node_modules' -o -path '*/.venv' -o -path '*/venv' \) -prune -o \
\( -name "*.ts" -o -name "*.py" \) -type f -print0 | xargs -0 cat | wc -l
@$(NX) run research:stats
.PHONY: wordcount
wordcount:
@echo "Counting words in main text (excluding appendix)..."
@texcount -nosub -total -sum -1 \
$(SRCDIR)/chapters/01-intro.tex \
$(SRCDIR)/chapters/02-literature-review.tex \
$(SRCDIR)/chapters/03-methodology.tex \
$(SRCDIR)/chapters/04-results.tex \
$(SRCDIR)/chapters/05-discussion.tex \
$(SRCDIR)/chapters/06-conclusion.tex
@$(NX) run paper:wordcount
.PHONY: docker.train.publish
docker.train.publish:
docker build -f docker/Trainer.dockerfile --target gpu -t $(TRAIN_IMAGE_REF):gpu-latest .
docker push $(TRAIN_IMAGE_REF):gpu-latest
docker build -f docker/Trainer.dockerfile --target tpu -t $(TRAIN_IMAGE_REF):tpu-latest .
docker push $(TRAIN_IMAGE_REF):tpu-latest
@TRAIN_IMAGE_REF="$(TRAIN_IMAGE_REF)" $(NX) run research:docker-train-publish
.PHONY: train.tpu.pod
train.tpu.pod:
@test -n "$(TPU_NAME)" || (echo "TPU_NAME required, e.g. TPU_NAME=TPUlong" && exit 1)
@test -n "$(SWEEP_ID)" || (echo "SWEEP_ID required, e.g. SWEEP_ID=entity/project/id" && exit 1)
@$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1)
gcloud compute tpus tpu-vm scp scripts/tpu_pod_run.sh $(TPU_NAME):/tmp/tpu_pod_run.sh \
--zone=$(TPU_ZONE) --project=$(TPU_PROJECT) --worker=all
@$(SWEEP_ENV_LOAD); \
gcloud compute tpus tpu-vm ssh $(TPU_NAME) \
--zone=$(TPU_ZONE) --project=$(TPU_PROJECT) --worker=all \
--command="WANDB_API_KEY='$$WANDB_API_KEY' SWEEP_ID='$(SWEEP_ID)' AGENT_COUNT='$(AGENT_COUNT)' sh /tmp/tpu_pod_run.sh"
@TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" $(NX) run research:train-tpu-pod
.PHONY: train.tpu.vm.prepare
train.tpu.vm.prepare:
@test -n "$(TPU_NAME)" || (echo "TPU_NAME required, e.g. TPU_NAME=TPUlong" && exit 1)
TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" \
LOCAL_REPO_DIR="$(CURDIR)" REMOTE_REPO_DIR="$(TPU_REPO_DIR)" \
sh scripts/tpu_sync_repo.sh
gcloud compute tpus tpu-vm scp scripts/tpu_vm_train.sh $(TPU_NAME):/tmp/tpu_vm_train.sh \
--zone=$(TPU_ZONE) --project=$(TPU_PROJECT) --worker=all
@TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" TPU_REPO_DIR="$(TPU_REPO_DIR)" $(NX) run research:train-tpu-vm-prepare
.PHONY: train.tpu.vm.run
train.tpu.vm.run:
@test -n "$(TPU_NAME)" || (echo "TPU_NAME required, e.g. TPU_NAME=TPUlong" && exit 1)
@test -n "$(LOCAL_TRAIN_ARGS)" || (echo "LOCAL_TRAIN_ARGS required, e.g. --algo ppo --jax --total-timesteps 200000" && exit 1)
@$(SWEEP_ENV_LOAD); \
gcloud compute tpus tpu-vm ssh $(TPU_NAME) \
--zone=$(TPU_ZONE) --project=$(TPU_PROJECT) --worker=all \
--command="REPO_DIR='$(TPU_REPO_DIR)' TRAIN_ARGS='$(LOCAL_TRAIN_ARGS)' WANDB_API_KEY='$$WANDB_API_KEY' sh /tmp/tpu_vm_train.sh"
@TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" TPU_REPO_DIR="$(TPU_REPO_DIR)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" LOCAL_TRAIN_ARGS="$(LOCAL_TRAIN_ARGS)" $(NX) run research:train-tpu-vm-run
.PHONY: train.tpu.vm
train.tpu.vm: train.tpu.vm.prepare train.tpu.vm.run
train.tpu.vm:
@TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" TPU_REPO_DIR="$(TPU_REPO_DIR)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" LOCAL_TRAIN_ARGS="$(LOCAL_TRAIN_ARGS)" $(NX) run research:train-tpu-vm
.PHONY: train.tpu.vm.sweep
train.tpu.vm.sweep:
@test -n "$(TPU_NAME)" || (echo "TPU_NAME required, e.g. TPU_NAME=TPUlong" && exit 1)
@test -n "$(SWEEP_ID)" || (echo "SWEEP_ID required, e.g. SWEEP_ID=lusiana/phantom-pricing/abc123" && exit 1)
@$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1)
@$(SWEEP_ENV_LOAD); WANDB_API_KEY="$$WANDB_API_KEY" \
python3 scripts/tpu_vm_sweep_agent.py \
--sweep-id "$(SWEEP_ID)" \
--tpu-name "$(TPU_NAME)" \
--tpu-zone "$(TPU_ZONE)" \
--tpu-project "$(TPU_PROJECT)" \
--tpu-repo-dir "$(TPU_REPO_DIR)" \
$(if $(filter-out 0,$(AGENT_COUNT)),--count $(AGENT_COUNT),)
@TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" TPU_REPO_DIR="$(TPU_REPO_DIR)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" $(NX) run research:train-tpu-vm-sweep
.PHONY: backend.server backend.provider backend.worker platform.up platform.down platform.logs
backend.server: