updating computation power graph

2026-07-15 17:43:36 +00:00 · 2026-03-08 14:22:54 +01:00
parent 17c128cbc0
commit 28dbcacd95
6 changed files with 142 additions and 114 deletions
--- a/130
+++ b/130
@@ -54,160 +54,84 @@ $(BUILDDIR):
 	mkdir -p paper/$(BUILDDIR)

 .PHONY: pdf.build
-pdf.build: $(BUILDDIR)
-	@bash paper/concat_code.sh
-	@cd $(SRCDIR) && \
-	$(LATEXMK) -pdf -jobname=$(JOBNAME) -f \
-		-interaction=nonstopmode -file-line-error \
-		-r ../.latexmkrc \
-		-outdir=../$(BUILDDIR) $(TEX)
+pdf.build:
+	@$(NX) run paper:build

 .PHONY: pdf.watch
-pdf.watch: $(BUILDDIR)
-	@cd $(SRCDIR) && \
-	$(LATEXMK) -pvc -pdf -jobname=$(JOBNAME) -f \
-		-interaction=nonstopmode -file-line-error \
-		-r ../.latexmkrc \
-		-outdir=../$(BUILDDIR) $(TEX)
+pdf.watch:
+	@$(NX) run paper:watch

 .PHONY: pdf.clean
 pdf.clean:
-	@cd $(SRCDIR) && \
-	$(LATEXMK) -C -jobname=$(JOBNAME) -outdir=../$(BUILDDIR) || true
-	rm -rf paper/$(BUILDDIR)/*
+	@$(NX) run paper:clean

 .PHONY: test.backend
-test.backend: $(VENV)
-	$(PYTEST) -v
+test.backend:
+	@$(NX) run research:test

 .PHONY: test.e2e
 test.e2e:
-	@cd tests/e2e && npm install
-	@cd tests/e2e && npx playwright install chromium
-	@test -f tests/e2e/.env || cp tests/e2e/.env.example tests/e2e/.env
-	@timeout 30 bash -c 'until curl -sf http://localhost:5000/health > /dev/null 2>&1; do sleep 1; done' || (echo "Backend not ready" && exit 1)
-	@timeout 30 bash -c 'until curl -sf http://localhost:3000 > /dev/null 2>&1; do sleep 1; done' || (echo "Web app not ready" && exit 1)
-	@timeout 30 bash -c 'until curl -sf http://localhost:8085/health > /dev/null 2>&1; do sleep 1; done' || (echo "Airflow not ready" && exit 1)
-	@cd tests/e2e && npm test
+	@$(NX) run e2e:test

 .PHONY: test.all
-test.all: test.backend test.e2e
+test.all:
+	@$(NX) run-many -t test --projects=research,e2e --parallel=1

 .PHONY: web.dev
 web.dev:
-	@cd web && npm install && npm run dev
+	@$(NX) run web:dev

 $(VENV):
 	python3 -m venv $(VENV)
 	$(PIP) install --upgrade pip

 .PHONY: install
-install: $(VENV)
-	$(PIP) install -r requirements.txt
+install:
+	@$(NX) run research:install

 .PHONY: train
-train: install
-	@$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1)
-	@$(SWEEP_ENV_LOAD); WANDB_API_KEY="$$WANDB_API_KEY" WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" \
-		$(PYTHON) -m engine.train $(LOCAL_TRAIN_ARGS)
+train:
+	@WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" LOCAL_TRAIN_ARGS="$(LOCAL_TRAIN_ARGS)" $(NX) run research:train

 .PHONY: train.agent
-train.agent: install
-	@$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1)
-	@test -n "$(SWEEP_ID)" || (echo "SWEEP_ID required, e.g. SWEEP_ID=entity/project/id" && exit 1)
-	@$(SWEEP_ENV_LOAD); WANDB_API_KEY="$$WANDB_API_KEY" WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" \
-		$(PYTHON) -m engine.train --sweep-agent --sweep-id "$(SWEEP_ID)" \
-		$(if $(filter-out 0,$(AGENT_COUNT)),--count $(AGENT_COUNT),)
+train.agent:
+	@WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" $(NX) run research:train-agent

 .PHONY: train.bootstrap
 train.bootstrap:
-	@$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1)
-	@$(SWEEP_ENV_LOAD); test -n "$$GITHUB_TOKEN" || (echo "GITHUB_TOKEN required — set it in $(SWEEP_ENV_FILE)" && exit 1)
-	@test -n "$(REPO_URL)" || (echo "REPO_URL required, e.g. REPO_URL=https://github.com/org/repo.git" && exit 1)
-	@test -n "$(SWEEP_ID)" || (echo "SWEEP_ID required, e.g. SWEEP_ID=entity/project/id" && exit 1)
-	@$(SWEEP_ENV_LOAD); \
-		WANDB_API_KEY="$$WANDB_API_KEY" \
-		WANDB_ENTITY="$(WANDB_ENTITY)" \
-		WANDB_PROJECT="$(WANDB_PROJECT)" \
-		GITHUB_TOKEN="$$GITHUB_TOKEN" \
-		REPO_URL="$(REPO_URL)" \
-		BRANCH="$(BRANCH)" \
-		WORKDIR="$(WORKDIR)" \
-		SWEEP_ID="$(SWEEP_ID)" \
-		AGENT_COUNT="$(AGENT_COUNT)" \
-		AGENT_LOOP="$(AGENT_LOOP)" \
-		RETRY_SECONDS="$(RETRY_SECONDS)" \
-		bash scripts/wandb_agent_bootstrap.sh
+	@WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" REPO_URL="$(REPO_URL)" BRANCH="$(BRANCH)" WORKDIR="$(WORKDIR)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" AGENT_LOOP="$(AGENT_LOOP)" RETRY_SECONDS="$(RETRY_SECONDS)" $(NX) run research:train-bootstrap

 .PHONY: stats.lines
 stats.lines:
-	@find . \( -path '*/node_modules' -o -path '*/.venv' -o -path '*/venv' \) -prune -o \
-	\( -name "*.ts" -o -name "*.py" \) -type f -print0 | xargs -0 cat | wc -l
+	@$(NX) run research:stats

 .PHONY: wordcount
 wordcount:
-	@echo "Counting words in main text (excluding appendix)..."
-	@texcount -nosub -total -sum -1 \
-		$(SRCDIR)/chapters/01-intro.tex \
-		$(SRCDIR)/chapters/02-literature-review.tex \
-		$(SRCDIR)/chapters/03-methodology.tex \
-		$(SRCDIR)/chapters/04-results.tex \
-		$(SRCDIR)/chapters/05-discussion.tex \
-		$(SRCDIR)/chapters/06-conclusion.tex
+	@$(NX) run paper:wordcount

 .PHONY: docker.train.publish
 docker.train.publish:
-	docker build -f docker/Trainer.dockerfile --target gpu -t $(TRAIN_IMAGE_REF):gpu-latest .
-	docker push $(TRAIN_IMAGE_REF):gpu-latest
-	docker build -f docker/Trainer.dockerfile --target tpu -t $(TRAIN_IMAGE_REF):tpu-latest .
-	docker push $(TRAIN_IMAGE_REF):tpu-latest
+	@TRAIN_IMAGE_REF="$(TRAIN_IMAGE_REF)" $(NX) run research:docker-train-publish

 .PHONY: train.tpu.pod
 train.tpu.pod:
-	@test -n "$(TPU_NAME)"  || (echo "TPU_NAME required, e.g. TPU_NAME=TPUlong" && exit 1)
-	@test -n "$(SWEEP_ID)"  || (echo "SWEEP_ID required, e.g. SWEEP_ID=entity/project/id" && exit 1)
-	@$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1)
-	gcloud compute tpus tpu-vm scp scripts/tpu_pod_run.sh $(TPU_NAME):/tmp/tpu_pod_run.sh \
-		--zone=$(TPU_ZONE) --project=$(TPU_PROJECT) --worker=all
-	@$(SWEEP_ENV_LOAD); \
-		gcloud compute tpus tpu-vm ssh $(TPU_NAME) \
-		--zone=$(TPU_ZONE) --project=$(TPU_PROJECT) --worker=all \
-		--command="WANDB_API_KEY='$$WANDB_API_KEY' SWEEP_ID='$(SWEEP_ID)' AGENT_COUNT='$(AGENT_COUNT)' sh /tmp/tpu_pod_run.sh"
+	@TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" $(NX) run research:train-tpu-pod

 .PHONY: train.tpu.vm.prepare
 train.tpu.vm.prepare:
-	@test -n "$(TPU_NAME)"  || (echo "TPU_NAME required, e.g. TPU_NAME=TPUlong" && exit 1)
-	TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" \
-		LOCAL_REPO_DIR="$(CURDIR)" REMOTE_REPO_DIR="$(TPU_REPO_DIR)" \
-		sh scripts/tpu_sync_repo.sh
-	gcloud compute tpus tpu-vm scp scripts/tpu_vm_train.sh $(TPU_NAME):/tmp/tpu_vm_train.sh \
-		--zone=$(TPU_ZONE) --project=$(TPU_PROJECT) --worker=all
+	@TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" TPU_REPO_DIR="$(TPU_REPO_DIR)" $(NX) run research:train-tpu-vm-prepare

 .PHONY: train.tpu.vm.run
 train.tpu.vm.run:
-	@test -n "$(TPU_NAME)"  || (echo "TPU_NAME required, e.g. TPU_NAME=TPUlong" && exit 1)
-	@test -n "$(LOCAL_TRAIN_ARGS)" || (echo "LOCAL_TRAIN_ARGS required, e.g. --algo ppo --jax --total-timesteps 200000" && exit 1)
-	@$(SWEEP_ENV_LOAD); \
-		gcloud compute tpus tpu-vm ssh $(TPU_NAME) \
-		--zone=$(TPU_ZONE) --project=$(TPU_PROJECT) --worker=all \
-		--command="REPO_DIR='$(TPU_REPO_DIR)' TRAIN_ARGS='$(LOCAL_TRAIN_ARGS)' WANDB_API_KEY='$$WANDB_API_KEY' sh /tmp/tpu_vm_train.sh"
+	@TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" TPU_REPO_DIR="$(TPU_REPO_DIR)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" LOCAL_TRAIN_ARGS="$(LOCAL_TRAIN_ARGS)" $(NX) run research:train-tpu-vm-run

 .PHONY: train.tpu.vm
-train.tpu.vm: train.tpu.vm.prepare train.tpu.vm.run
+train.tpu.vm:
+	@TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" TPU_REPO_DIR="$(TPU_REPO_DIR)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" LOCAL_TRAIN_ARGS="$(LOCAL_TRAIN_ARGS)" $(NX) run research:train-tpu-vm

 .PHONY: train.tpu.vm.sweep
 train.tpu.vm.sweep:
-	@test -n "$(TPU_NAME)"  || (echo "TPU_NAME required, e.g. TPU_NAME=TPUlong" && exit 1)
-	@test -n "$(SWEEP_ID)"  || (echo "SWEEP_ID required, e.g. SWEEP_ID=lusiana/phantom-pricing/abc123" && exit 1)
-	@$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1)
-	@$(SWEEP_ENV_LOAD); WANDB_API_KEY="$$WANDB_API_KEY" \
-		python3 scripts/tpu_vm_sweep_agent.py \
-		--sweep-id "$(SWEEP_ID)" \
-		--tpu-name "$(TPU_NAME)" \
-		--tpu-zone "$(TPU_ZONE)" \
-		--tpu-project "$(TPU_PROJECT)" \
-		--tpu-repo-dir "$(TPU_REPO_DIR)" \
-		$(if $(filter-out 0,$(AGENT_COUNT)),--count $(AGENT_COUNT),)
+	@TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" TPU_REPO_DIR="$(TPU_REPO_DIR)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" $(NX) run research:train-tpu-vm-sweep

 .PHONY: backend.server backend.provider backend.worker platform.up platform.down platform.logs
 backend.server:
--- a/engine/project.json
+++ b/engine/project.json
@@ -7,7 +7,7 @@
    "install": {
      "executor": "nx:run-commands",
      "options": {
-        "command": "make install",
+        "command": "bash scripts/nx_research.sh install",
        "cwd": "."
      }
    },
@@ -17,7 +17,7 @@
        "install"
      ],
      "options": {
-        "command": "make test.backend",
+        "command": ".venv/bin/pytest -v",
        "cwd": "."
      }
    },
@@ -27,14 +27,76 @@
        "install"
      ],
      "options": {
-        "command": "make train",
+        "command": "bash scripts/nx_research.sh train",
+        "cwd": "."
+      }
+    },
+    "train-agent": {
+      "executor": "nx:run-commands",
+      "dependsOn": [
+        "install"
+      ],
+      "options": {
+        "command": "bash scripts/nx_research.sh train-agent",
+        "cwd": "."
+      }
+    },
+    "train-bootstrap": {
+      "executor": "nx:run-commands",
+      "options": {
+        "command": "bash scripts/nx_research.sh train-bootstrap",
        "cwd": "."
      }
    },
    "stats": {
      "executor": "nx:run-commands",
      "options": {
-        "command": "make stats.lines",
+        "command": "bash scripts/nx_research.sh stats",
+        "cwd": "."
+      }
+    },
+    "docker-train-publish": {
+      "executor": "nx:run-commands",
+      "options": {
+        "command": "bash scripts/nx_research.sh docker-train-publish",
+        "cwd": "."
+      }
+    },
+    "train-tpu-pod": {
+      "executor": "nx:run-commands",
+      "options": {
+        "command": "bash scripts/nx_research.sh train-tpu-pod",
+        "cwd": "."
+      }
+    },
+    "train-tpu-vm-prepare": {
+      "executor": "nx:run-commands",
+      "options": {
+        "command": "bash scripts/nx_research.sh train-tpu-vm-prepare",
+        "cwd": "."
+      }
+    },
+    "train-tpu-vm-run": {
+      "executor": "nx:run-commands",
+      "options": {
+        "command": "bash scripts/nx_research.sh train-tpu-vm-run",
+        "cwd": "."
+      }
+    },
+    "train-tpu-vm": {
+      "executor": "nx:run-commands",
+      "dependsOn": [
+        "train-tpu-vm-prepare"
+      ],
+      "options": {
+        "command": "bash scripts/nx_research.sh train-tpu-vm-run",
+        "cwd": "."
+      }
+    },
+    "train-tpu-vm-sweep": {
+      "executor": "nx:run-commands",
+      "options": {
+        "command": "bash scripts/nx_research.sh train-tpu-vm-sweep",
        "cwd": "."
      }
    }
--- a/paper/project.json
+++ b/paper/project.json
@@ -10,21 +10,28 @@
        "{projectRoot}/build"
      ],
      "options": {
-        "command": "make pdf.build",
+        "command": "bash scripts/nx_paper.sh build",
        "cwd": "."
      }
    },
    "watch": {
      "executor": "nx:run-commands",
      "options": {
-        "command": "make pdf.watch",
+        "command": "bash scripts/nx_paper.sh watch",
        "cwd": "."
      }
    },
    "clean": {
      "executor": "nx:run-commands",
      "options": {
-        "command": "make pdf.clean",
+        "command": "bash scripts/nx_paper.sh clean",
+        "cwd": "."
+      }
+    },
+    "wordcount": {
+      "executor": "nx:run-commands",
+      "options": {
+        "command": "bash scripts/nx_paper.sh wordcount",
        "cwd": "."
      }
    }
--- a/paper/src/chapters/03-methodology.tex
+++ b/paper/src/chapters/03-methodology.tex
@@ -210,8 +210,7 @@ The simulator has multiple configurable factors. We design a multi-factor study
 % Power analysis plan: apply a two-sample Mann-Whitney U (or permutation test) on per-session (delta_H - delta_A) divergence scores comparing the human and agent groups. Compute minimum detectable effect size at alpha=0.05, power=0.8, given n=18 per group. Bootstrap confidence intervals on mean KL are a cleaner complement given the non-normality of divergence distributions.
 While this scale is generally expensive for reinforcement learning, we execute it on a large TPU cluster to make the sweep tractable.

-% TODO: cite in the apendix the math to get to 160 petaflops of compute
-Our training budget is provisioned through TPU Research Cloud and spans 384 chips across TPU v4, v5e, and v6e generations, with a spot-heavy allocation plus an on-demand reserve. At peak BF16 throughput this corresponds to approximately 160 PFLOPS of aggregate compute, which makes repeated seeds, ablations, and sensitivity sweeps feasible within practical wall-clock limits. We allocate v6e capacity to the highest-intensity policy training jobs, use v5e for wider hyperparameter exploration where throughput-per-dollar is favorable, and reserve on-demand v4 capacity for runs that should not be interrupted.
+Our training budget is provisioned through TPU Research Cloud and spans 384 chips across TPU v4, v5e, and v6e generations, with a spot-heavy allocation plus an on-demand reserve. At peak BF16 throughput this corresponds to approximately 160\,PFLOPS of aggregate compute (derivation in Appendix~\ref{app:compute_budget}), which makes repeated seeds, ablations, and sensitivity sweeps feasible within practical wall-clock limits. We allocate v6e capacity to the highest-intensity policy training jobs, use v5e for wider hyperparameter exploration where throughput-per-dollar is favorable, and reserve on-demand v4 capacity for runs that should not be interrupted.

 \begin{table}[ht]
 \centering
--- a/paper/src/main.tex
+++ b/paper/src/main.tex
@@ -53,6 +53,31 @@ These behavioral signals serve as inputs for a Distributionally Robust Reinforce
 \item[Trajectory] Defined as a series of unspecified length, collecting data on states of some object over time.
 % TODO: maybe define other things in a similar succient manner
 \end{description}
+
+\section{Aggregate Compute Budget Derivation}
+\label{app:compute_budget}
+
+The claimed peak throughput of approximately 160\,PFLOPS follows from multiplying the per-chip BF16 peak (from official Google Cloud TPU documentation) by the number of chips in each allocation tier and summing across generations.
+
+\begin{table}[ht]
+\centering
+\caption{Per-generation contribution to aggregate BF16 throughput.}
+\label{tab:compute_derivation}
+\begin{tabular}{@{}lrrr@{}}
+\toprule
+\textbf{TPU Gen.} & \textbf{Chips} & \textbf{Peak BF16/chip (TFLOPS)} & \textbf{Subtotal (TFLOPS)} \\
+\midrule
+v6e (Trillium) & 128 & 918 & $128 \times 918 = 117{,}504$ \\
+v5e            & 128 & 197 & $128 \times 197 = 25{,}216$  \\
+v4             &  64 & 275 & $64  \times 275 = 17{,}600$  \\
+\midrule
+\textbf{Total} & \textbf{320} & & $\mathbf{160{,}320}$ \\
+\bottomrule
+\end{tabular}
+\end{table}
+
+Converting to petaFLOPS: $160{,}320\;\text{TFLOPS} = 160.32\;\text{PFLOPS} \approx 160\;\text{PFLOPS}$. This is the theoretical peak under sustained BF16 arithmetic; realized throughput depends on memory bandwidth utilization and inter-chip communication overhead, but the figure serves as a useful upper bound for provisioning decisions.
+
 % \input{../build/concatenated_code}

 \end{document}
--- a/tests/e2e/project.json
+++ b/tests/e2e/project.json
@@ -13,12 +13,23 @@
    },
    "test": {
      "executor": "nx:run-commands",
+      "dependsOn": [
+        "install"
+      ],
      "outputs": [
        "{projectRoot}/test-results"
      ],
      "options": {
-        "command": "make test.e2e",
-        "cwd": "."
+        "commands": [
+          "npx playwright install chromium",
+          "test -f .env || cp .env.example .env",
+          "timeout 30 bash -c \"until curl -sf http://localhost:5000/health > /dev/null 2>&1; do sleep 1; done\" || (echo 'Backend not ready' && exit 1)",
+          "timeout 30 bash -c \"until curl -sf http://localhost:3000 > /dev/null 2>&1; do sleep 1; done\" || (echo 'Web app not ready' && exit 1)",
+          "timeout 30 bash -c \"until curl -sf http://localhost:8085/health > /dev/null 2>&1; do sleep 1; done\" || (echo 'Airflow not ready' && exit 1)",
+          "npm test"
+        ],
+        "parallel": false,
+        "cwd": "tests/e2e"
      }
    },
    "test-ui": {