diff --git a/Makefile b/Makefile index fe5baca..3b39c82 100644 --- a/Makefile +++ b/Makefile @@ -54,160 +54,84 @@ $(BUILDDIR): mkdir -p paper/$(BUILDDIR) .PHONY: pdf.build -pdf.build: $(BUILDDIR) - @bash paper/concat_code.sh - @cd $(SRCDIR) && \ - $(LATEXMK) -pdf -jobname=$(JOBNAME) -f \ - -interaction=nonstopmode -file-line-error \ - -r ../.latexmkrc \ - -outdir=../$(BUILDDIR) $(TEX) +pdf.build: + @$(NX) run paper:build .PHONY: pdf.watch -pdf.watch: $(BUILDDIR) - @cd $(SRCDIR) && \ - $(LATEXMK) -pvc -pdf -jobname=$(JOBNAME) -f \ - -interaction=nonstopmode -file-line-error \ - -r ../.latexmkrc \ - -outdir=../$(BUILDDIR) $(TEX) +pdf.watch: + @$(NX) run paper:watch .PHONY: pdf.clean pdf.clean: - @cd $(SRCDIR) && \ - $(LATEXMK) -C -jobname=$(JOBNAME) -outdir=../$(BUILDDIR) || true - rm -rf paper/$(BUILDDIR)/* + @$(NX) run paper:clean .PHONY: test.backend -test.backend: $(VENV) - $(PYTEST) -v +test.backend: + @$(NX) run research:test .PHONY: test.e2e test.e2e: - @cd tests/e2e && npm install - @cd tests/e2e && npx playwright install chromium - @test -f tests/e2e/.env || cp tests/e2e/.env.example tests/e2e/.env - @timeout 30 bash -c 'until curl -sf http://localhost:5000/health > /dev/null 2>&1; do sleep 1; done' || (echo "Backend not ready" && exit 1) - @timeout 30 bash -c 'until curl -sf http://localhost:3000 > /dev/null 2>&1; do sleep 1; done' || (echo "Web app not ready" && exit 1) - @timeout 30 bash -c 'until curl -sf http://localhost:8085/health > /dev/null 2>&1; do sleep 1; done' || (echo "Airflow not ready" && exit 1) - @cd tests/e2e && npm test + @$(NX) run e2e:test .PHONY: test.all -test.all: test.backend test.e2e +test.all: + @$(NX) run-many -t test --projects=research,e2e --parallel=1 .PHONY: web.dev web.dev: - @cd web && npm install && npm run dev + @$(NX) run web:dev $(VENV): python3 -m venv $(VENV) $(PIP) install --upgrade pip .PHONY: install -install: $(VENV) - $(PIP) install -r requirements.txt +install: + @$(NX) run research:install .PHONY: train -train: install - @$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1) - @$(SWEEP_ENV_LOAD); WANDB_API_KEY="$$WANDB_API_KEY" WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" \ - $(PYTHON) -m engine.train $(LOCAL_TRAIN_ARGS) +train: + @WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" LOCAL_TRAIN_ARGS="$(LOCAL_TRAIN_ARGS)" $(NX) run research:train .PHONY: train.agent -train.agent: install - @$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1) - @test -n "$(SWEEP_ID)" || (echo "SWEEP_ID required, e.g. SWEEP_ID=entity/project/id" && exit 1) - @$(SWEEP_ENV_LOAD); WANDB_API_KEY="$$WANDB_API_KEY" WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" \ - $(PYTHON) -m engine.train --sweep-agent --sweep-id "$(SWEEP_ID)" \ - $(if $(filter-out 0,$(AGENT_COUNT)),--count $(AGENT_COUNT),) +train.agent: + @WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" $(NX) run research:train-agent .PHONY: train.bootstrap train.bootstrap: - @$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1) - @$(SWEEP_ENV_LOAD); test -n "$$GITHUB_TOKEN" || (echo "GITHUB_TOKEN required — set it in $(SWEEP_ENV_FILE)" && exit 1) - @test -n "$(REPO_URL)" || (echo "REPO_URL required, e.g. REPO_URL=https://github.com/org/repo.git" && exit 1) - @test -n "$(SWEEP_ID)" || (echo "SWEEP_ID required, e.g. SWEEP_ID=entity/project/id" && exit 1) - @$(SWEEP_ENV_LOAD); \ - WANDB_API_KEY="$$WANDB_API_KEY" \ - WANDB_ENTITY="$(WANDB_ENTITY)" \ - WANDB_PROJECT="$(WANDB_PROJECT)" \ - GITHUB_TOKEN="$$GITHUB_TOKEN" \ - REPO_URL="$(REPO_URL)" \ - BRANCH="$(BRANCH)" \ - WORKDIR="$(WORKDIR)" \ - SWEEP_ID="$(SWEEP_ID)" \ - AGENT_COUNT="$(AGENT_COUNT)" \ - AGENT_LOOP="$(AGENT_LOOP)" \ - RETRY_SECONDS="$(RETRY_SECONDS)" \ - bash scripts/wandb_agent_bootstrap.sh + @WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" REPO_URL="$(REPO_URL)" BRANCH="$(BRANCH)" WORKDIR="$(WORKDIR)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" AGENT_LOOP="$(AGENT_LOOP)" RETRY_SECONDS="$(RETRY_SECONDS)" $(NX) run research:train-bootstrap .PHONY: stats.lines stats.lines: - @find . \( -path '*/node_modules' -o -path '*/.venv' -o -path '*/venv' \) -prune -o \ - \( -name "*.ts" -o -name "*.py" \) -type f -print0 | xargs -0 cat | wc -l + @$(NX) run research:stats .PHONY: wordcount wordcount: - @echo "Counting words in main text (excluding appendix)..." - @texcount -nosub -total -sum -1 \ - $(SRCDIR)/chapters/01-intro.tex \ - $(SRCDIR)/chapters/02-literature-review.tex \ - $(SRCDIR)/chapters/03-methodology.tex \ - $(SRCDIR)/chapters/04-results.tex \ - $(SRCDIR)/chapters/05-discussion.tex \ - $(SRCDIR)/chapters/06-conclusion.tex + @$(NX) run paper:wordcount .PHONY: docker.train.publish docker.train.publish: - docker build -f docker/Trainer.dockerfile --target gpu -t $(TRAIN_IMAGE_REF):gpu-latest . - docker push $(TRAIN_IMAGE_REF):gpu-latest - docker build -f docker/Trainer.dockerfile --target tpu -t $(TRAIN_IMAGE_REF):tpu-latest . - docker push $(TRAIN_IMAGE_REF):tpu-latest + @TRAIN_IMAGE_REF="$(TRAIN_IMAGE_REF)" $(NX) run research:docker-train-publish .PHONY: train.tpu.pod train.tpu.pod: - @test -n "$(TPU_NAME)" || (echo "TPU_NAME required, e.g. TPU_NAME=TPUlong" && exit 1) - @test -n "$(SWEEP_ID)" || (echo "SWEEP_ID required, e.g. SWEEP_ID=entity/project/id" && exit 1) - @$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1) - gcloud compute tpus tpu-vm scp scripts/tpu_pod_run.sh $(TPU_NAME):/tmp/tpu_pod_run.sh \ - --zone=$(TPU_ZONE) --project=$(TPU_PROJECT) --worker=all - @$(SWEEP_ENV_LOAD); \ - gcloud compute tpus tpu-vm ssh $(TPU_NAME) \ - --zone=$(TPU_ZONE) --project=$(TPU_PROJECT) --worker=all \ - --command="WANDB_API_KEY='$$WANDB_API_KEY' SWEEP_ID='$(SWEEP_ID)' AGENT_COUNT='$(AGENT_COUNT)' sh /tmp/tpu_pod_run.sh" + @TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" $(NX) run research:train-tpu-pod .PHONY: train.tpu.vm.prepare train.tpu.vm.prepare: - @test -n "$(TPU_NAME)" || (echo "TPU_NAME required, e.g. TPU_NAME=TPUlong" && exit 1) - TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" \ - LOCAL_REPO_DIR="$(CURDIR)" REMOTE_REPO_DIR="$(TPU_REPO_DIR)" \ - sh scripts/tpu_sync_repo.sh - gcloud compute tpus tpu-vm scp scripts/tpu_vm_train.sh $(TPU_NAME):/tmp/tpu_vm_train.sh \ - --zone=$(TPU_ZONE) --project=$(TPU_PROJECT) --worker=all + @TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" TPU_REPO_DIR="$(TPU_REPO_DIR)" $(NX) run research:train-tpu-vm-prepare .PHONY: train.tpu.vm.run train.tpu.vm.run: - @test -n "$(TPU_NAME)" || (echo "TPU_NAME required, e.g. TPU_NAME=TPUlong" && exit 1) - @test -n "$(LOCAL_TRAIN_ARGS)" || (echo "LOCAL_TRAIN_ARGS required, e.g. --algo ppo --jax --total-timesteps 200000" && exit 1) - @$(SWEEP_ENV_LOAD); \ - gcloud compute tpus tpu-vm ssh $(TPU_NAME) \ - --zone=$(TPU_ZONE) --project=$(TPU_PROJECT) --worker=all \ - --command="REPO_DIR='$(TPU_REPO_DIR)' TRAIN_ARGS='$(LOCAL_TRAIN_ARGS)' WANDB_API_KEY='$$WANDB_API_KEY' sh /tmp/tpu_vm_train.sh" + @TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" TPU_REPO_DIR="$(TPU_REPO_DIR)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" LOCAL_TRAIN_ARGS="$(LOCAL_TRAIN_ARGS)" $(NX) run research:train-tpu-vm-run .PHONY: train.tpu.vm -train.tpu.vm: train.tpu.vm.prepare train.tpu.vm.run +train.tpu.vm: + @TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" TPU_REPO_DIR="$(TPU_REPO_DIR)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" LOCAL_TRAIN_ARGS="$(LOCAL_TRAIN_ARGS)" $(NX) run research:train-tpu-vm .PHONY: train.tpu.vm.sweep train.tpu.vm.sweep: - @test -n "$(TPU_NAME)" || (echo "TPU_NAME required, e.g. TPU_NAME=TPUlong" && exit 1) - @test -n "$(SWEEP_ID)" || (echo "SWEEP_ID required, e.g. SWEEP_ID=lusiana/phantom-pricing/abc123" && exit 1) - @$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1) - @$(SWEEP_ENV_LOAD); WANDB_API_KEY="$$WANDB_API_KEY" \ - python3 scripts/tpu_vm_sweep_agent.py \ - --sweep-id "$(SWEEP_ID)" \ - --tpu-name "$(TPU_NAME)" \ - --tpu-zone "$(TPU_ZONE)" \ - --tpu-project "$(TPU_PROJECT)" \ - --tpu-repo-dir "$(TPU_REPO_DIR)" \ - $(if $(filter-out 0,$(AGENT_COUNT)),--count $(AGENT_COUNT),) + @TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" TPU_REPO_DIR="$(TPU_REPO_DIR)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" $(NX) run research:train-tpu-vm-sweep .PHONY: backend.server backend.provider backend.worker platform.up platform.down platform.logs backend.server: diff --git a/engine/project.json b/engine/project.json index b325196..10272c3 100644 --- a/engine/project.json +++ b/engine/project.json @@ -7,7 +7,7 @@ "install": { "executor": "nx:run-commands", "options": { - "command": "make install", + "command": "bash scripts/nx_research.sh install", "cwd": "." } }, @@ -17,7 +17,7 @@ "install" ], "options": { - "command": "make test.backend", + "command": ".venv/bin/pytest -v", "cwd": "." } }, @@ -27,14 +27,76 @@ "install" ], "options": { - "command": "make train", + "command": "bash scripts/nx_research.sh train", + "cwd": "." + } + }, + "train-agent": { + "executor": "nx:run-commands", + "dependsOn": [ + "install" + ], + "options": { + "command": "bash scripts/nx_research.sh train-agent", + "cwd": "." + } + }, + "train-bootstrap": { + "executor": "nx:run-commands", + "options": { + "command": "bash scripts/nx_research.sh train-bootstrap", "cwd": "." } }, "stats": { "executor": "nx:run-commands", "options": { - "command": "make stats.lines", + "command": "bash scripts/nx_research.sh stats", + "cwd": "." + } + }, + "docker-train-publish": { + "executor": "nx:run-commands", + "options": { + "command": "bash scripts/nx_research.sh docker-train-publish", + "cwd": "." + } + }, + "train-tpu-pod": { + "executor": "nx:run-commands", + "options": { + "command": "bash scripts/nx_research.sh train-tpu-pod", + "cwd": "." + } + }, + "train-tpu-vm-prepare": { + "executor": "nx:run-commands", + "options": { + "command": "bash scripts/nx_research.sh train-tpu-vm-prepare", + "cwd": "." + } + }, + "train-tpu-vm-run": { + "executor": "nx:run-commands", + "options": { + "command": "bash scripts/nx_research.sh train-tpu-vm-run", + "cwd": "." + } + }, + "train-tpu-vm": { + "executor": "nx:run-commands", + "dependsOn": [ + "train-tpu-vm-prepare" + ], + "options": { + "command": "bash scripts/nx_research.sh train-tpu-vm-run", + "cwd": "." + } + }, + "train-tpu-vm-sweep": { + "executor": "nx:run-commands", + "options": { + "command": "bash scripts/nx_research.sh train-tpu-vm-sweep", "cwd": "." } } diff --git a/paper/project.json b/paper/project.json index 6158bab..ed47680 100644 --- a/paper/project.json +++ b/paper/project.json @@ -10,21 +10,28 @@ "{projectRoot}/build" ], "options": { - "command": "make pdf.build", + "command": "bash scripts/nx_paper.sh build", "cwd": "." } }, "watch": { "executor": "nx:run-commands", "options": { - "command": "make pdf.watch", + "command": "bash scripts/nx_paper.sh watch", "cwd": "." } }, "clean": { "executor": "nx:run-commands", "options": { - "command": "make pdf.clean", + "command": "bash scripts/nx_paper.sh clean", + "cwd": "." + } + }, + "wordcount": { + "executor": "nx:run-commands", + "options": { + "command": "bash scripts/nx_paper.sh wordcount", "cwd": "." } } diff --git a/paper/src/chapters/03-methodology.tex b/paper/src/chapters/03-methodology.tex index 4e770b8..fcbc5c0 100644 --- a/paper/src/chapters/03-methodology.tex +++ b/paper/src/chapters/03-methodology.tex @@ -210,8 +210,7 @@ The simulator has multiple configurable factors. We design a multi-factor study % Power analysis plan: apply a two-sample Mann-Whitney U (or permutation test) on per-session (delta_H - delta_A) divergence scores comparing the human and agent groups. Compute minimum detectable effect size at alpha=0.05, power=0.8, given n=18 per group. Bootstrap confidence intervals on mean KL are a cleaner complement given the non-normality of divergence distributions. While this scale is generally expensive for reinforcement learning, we execute it on a large TPU cluster to make the sweep tractable. -% TODO: cite in the apendix the math to get to 160 petaflops of compute -Our training budget is provisioned through TPU Research Cloud and spans 384 chips across TPU v4, v5e, and v6e generations, with a spot-heavy allocation plus an on-demand reserve. At peak BF16 throughput this corresponds to approximately 160 PFLOPS of aggregate compute, which makes repeated seeds, ablations, and sensitivity sweeps feasible within practical wall-clock limits. We allocate v6e capacity to the highest-intensity policy training jobs, use v5e for wider hyperparameter exploration where throughput-per-dollar is favorable, and reserve on-demand v4 capacity for runs that should not be interrupted. +Our training budget is provisioned through TPU Research Cloud and spans 384 chips across TPU v4, v5e, and v6e generations, with a spot-heavy allocation plus an on-demand reserve. At peak BF16 throughput this corresponds to approximately 160\,PFLOPS of aggregate compute (derivation in Appendix~\ref{app:compute_budget}), which makes repeated seeds, ablations, and sensitivity sweeps feasible within practical wall-clock limits. We allocate v6e capacity to the highest-intensity policy training jobs, use v5e for wider hyperparameter exploration where throughput-per-dollar is favorable, and reserve on-demand v4 capacity for runs that should not be interrupted. \begin{table}[ht] \centering diff --git a/paper/src/main.tex b/paper/src/main.tex index 45400ff..7a17506 100644 --- a/paper/src/main.tex +++ b/paper/src/main.tex @@ -53,6 +53,31 @@ These behavioral signals serve as inputs for a Distributionally Robust Reinforce \item[Trajectory] Defined as a series of unspecified length, collecting data on states of some object over time. % TODO: maybe define other things in a similar succient manner \end{description} + +\section{Aggregate Compute Budget Derivation} +\label{app:compute_budget} + +The claimed peak throughput of approximately 160\,PFLOPS follows from multiplying the per-chip BF16 peak (from official Google Cloud TPU documentation) by the number of chips in each allocation tier and summing across generations. + +\begin{table}[ht] +\centering +\caption{Per-generation contribution to aggregate BF16 throughput.} +\label{tab:compute_derivation} +\begin{tabular}{@{}lrrr@{}} +\toprule +\textbf{TPU Gen.} & \textbf{Chips} & \textbf{Peak BF16/chip (TFLOPS)} & \textbf{Subtotal (TFLOPS)} \\ +\midrule +v6e (Trillium) & 128 & 918 & $128 \times 918 = 117{,}504$ \\ +v5e & 128 & 197 & $128 \times 197 = 25{,}216$ \\ +v4 & 64 & 275 & $64 \times 275 = 17{,}600$ \\ +\midrule +\textbf{Total} & \textbf{320} & & $\mathbf{160{,}320}$ \\ +\bottomrule +\end{tabular} +\end{table} + +Converting to petaFLOPS: $160{,}320\;\text{TFLOPS} = 160.32\;\text{PFLOPS} \approx 160\;\text{PFLOPS}$. This is the theoretical peak under sustained BF16 arithmetic; realized throughput depends on memory bandwidth utilization and inter-chip communication overhead, but the figure serves as a useful upper bound for provisioning decisions. + % \input{../build/concatenated_code} \end{document} diff --git a/tests/e2e/project.json b/tests/e2e/project.json index bd7c950..86edfb3 100644 --- a/tests/e2e/project.json +++ b/tests/e2e/project.json @@ -13,12 +13,23 @@ }, "test": { "executor": "nx:run-commands", + "dependsOn": [ + "install" + ], "outputs": [ "{projectRoot}/test-results" ], "options": { - "command": "make test.e2e", - "cwd": "." + "commands": [ + "npx playwright install chromium", + "test -f .env || cp .env.example .env", + "timeout 30 bash -c \"until curl -sf http://localhost:5000/health > /dev/null 2>&1; do sleep 1; done\" || (echo 'Backend not ready' && exit 1)", + "timeout 30 bash -c \"until curl -sf http://localhost:3000 > /dev/null 2>&1; do sleep 1; done\" || (echo 'Web app not ready' && exit 1)", + "timeout 30 bash -c \"until curl -sf http://localhost:8085/health > /dev/null 2>&1; do sleep 1; done\" || (echo 'Airflow not ready' && exit 1)", + "npm test" + ], + "parallel": false, + "cwd": "tests/e2e" } }, "test-ui": {