updating computation power graph

2026-07-15 17:43:36 +00:00 · 2026-03-08 14:22:54 +01:00
parent 17c128cbc0
commit 28dbcacd95
6 changed files with 142 additions and 114 deletions
--- a/130
+++ b/130
@@ -54,160 +54,84 @@ $(BUILDDIR):
 	mkdir -p paper/$(BUILDDIR)
 .PHONY: pdf.build
-pdf.build: $(BUILDDIR)
+pdf.build:
-	@bash paper/concat_code.sh
+	@$(NX) run paper:build
 	@cd $(SRCDIR) && \
 	$(LATEXMK) -pdf -jobname=$(JOBNAME) -f \
 		-interaction=nonstopmode -file-line-error \
 		-r ../.latexmkrc \
 		-outdir=../$(BUILDDIR) $(TEX)
 .PHONY: pdf.watch
-pdf.watch: $(BUILDDIR)
+pdf.watch:
-	@cd $(SRCDIR) && \
+	@$(NX) run paper:watch
 	$(LATEXMK) -pvc -pdf -jobname=$(JOBNAME) -f \
 		-interaction=nonstopmode -file-line-error \
 		-r ../.latexmkrc \
 		-outdir=../$(BUILDDIR) $(TEX)
 .PHONY: pdf.clean
 pdf.clean:
-	@cd $(SRCDIR) && \
+	@$(NX) run paper:clean
 	$(LATEXMK) -C -jobname=$(JOBNAME) -outdir=../$(BUILDDIR) || true
 	rm -rf paper/$(BUILDDIR)/*
 .PHONY: test.backend
-test.backend: $(VENV)
+test.backend:
-	$(PYTEST) -v
+	@$(NX) run research:test
 .PHONY: test.e2e
 test.e2e:
-	@cd tests/e2e && npm install
+	@$(NX) run e2e:test
 	@cd tests/e2e && npx playwright install chromium
 	@test -f tests/e2e/.env || cp tests/e2e/.env.example tests/e2e/.env
 	@timeout 30 bash -c 'until curl -sf http://localhost:5000/health > /dev/null 2>&1; do sleep 1; done' || (echo "Backend not ready" && exit 1)
 	@timeout 30 bash -c 'until curl -sf http://localhost:3000 > /dev/null 2>&1; do sleep 1; done' || (echo "Web app not ready" && exit 1)
 	@timeout 30 bash -c 'until curl -sf http://localhost:8085/health > /dev/null 2>&1; do sleep 1; done' || (echo "Airflow not ready" && exit 1)
 	@cd tests/e2e && npm test
 .PHONY: test.all
-test.all: test.backend test.e2e
+test.all:
 	@$(NX) run-many -t test --projects=research,e2e --parallel=1
 .PHONY: web.dev
 web.dev:
-	@cd web && npm install && npm run dev
+	@$(NX) run web:dev
 $(VENV):
 	python3 -m venv $(VENV)
 	$(PIP) install --upgrade pip
 .PHONY: install
-install: $(VENV)
+install:
-	$(PIP) install -r requirements.txt
+	@$(NX) run research:install
 .PHONY: train
-train: install
+train:
-	@$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1)
+	@WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" LOCAL_TRAIN_ARGS="$(LOCAL_TRAIN_ARGS)" $(NX) run research:train
 	@$(SWEEP_ENV_LOAD); WANDB_API_KEY="$$WANDB_API_KEY" WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" \
 		$(PYTHON) -m engine.train $(LOCAL_TRAIN_ARGS)
 .PHONY: train.agent
-train.agent: install
+train.agent:
-	@$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1)
+	@WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" $(NX) run research:train-agent
 	@test -n "$(SWEEP_ID)" || (echo "SWEEP_ID required, e.g. SWEEP_ID=entity/project/id" && exit 1)
 	@$(SWEEP_ENV_LOAD); WANDB_API_KEY="$$WANDB_API_KEY" WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" \
 		$(PYTHON) -m engine.train --sweep-agent --sweep-id "$(SWEEP_ID)" \
 		$(if $(filter-out 0,$(AGENT_COUNT)),--count $(AGENT_COUNT),)
 .PHONY: train.bootstrap
 train.bootstrap:
-	@$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1)
+	@WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" REPO_URL="$(REPO_URL)" BRANCH="$(BRANCH)" WORKDIR="$(WORKDIR)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" AGENT_LOOP="$(AGENT_LOOP)" RETRY_SECONDS="$(RETRY_SECONDS)" $(NX) run research:train-bootstrap
 	@$(SWEEP_ENV_LOAD); test -n "$$GITHUB_TOKEN" || (echo "GITHUB_TOKEN required — set it in $(SWEEP_ENV_FILE)" && exit 1)
 	@test -n "$(REPO_URL)" || (echo "REPO_URL required, e.g. REPO_URL=https://github.com/org/repo.git" && exit 1)
 	@test -n "$(SWEEP_ID)" || (echo "SWEEP_ID required, e.g. SWEEP_ID=entity/project/id" && exit 1)
 	@$(SWEEP_ENV_LOAD); \
 		WANDB_API_KEY="$$WANDB_API_KEY" \
 		WANDB_ENTITY="$(WANDB_ENTITY)" \
 		WANDB_PROJECT="$(WANDB_PROJECT)" \
 		GITHUB_TOKEN="$$GITHUB_TOKEN" \
 		REPO_URL="$(REPO_URL)" \
 		BRANCH="$(BRANCH)" \
 		WORKDIR="$(WORKDIR)" \
 		SWEEP_ID="$(SWEEP_ID)" \
 		AGENT_COUNT="$(AGENT_COUNT)" \
 		AGENT_LOOP="$(AGENT_LOOP)" \
 		RETRY_SECONDS="$(RETRY_SECONDS)" \
 		bash scripts/wandb_agent_bootstrap.sh
 .PHONY: stats.lines
 stats.lines:
-	@find . \( -path '*/node_modules' -o -path '*/.venv' -o -path '*/venv' \) -prune -o \
+	@$(NX) run research:stats
 	\( -name "*.ts" -o -name "*.py" \) -type f -print0 | xargs -0 cat | wc -l
 .PHONY: wordcount
 wordcount:
-	@echo "Counting words in main text (excluding appendix)..."
+	@$(NX) run paper:wordcount
 	@texcount -nosub -total -sum -1 \
 		$(SRCDIR)/chapters/01-intro.tex \
 		$(SRCDIR)/chapters/02-literature-review.tex \
 		$(SRCDIR)/chapters/03-methodology.tex \
 		$(SRCDIR)/chapters/04-results.tex \
 		$(SRCDIR)/chapters/05-discussion.tex \
 		$(SRCDIR)/chapters/06-conclusion.tex
 .PHONY: docker.train.publish
 docker.train.publish:
-	docker build -f docker/Trainer.dockerfile --target gpu -t $(TRAIN_IMAGE_REF):gpu-latest .
+	@TRAIN_IMAGE_REF="$(TRAIN_IMAGE_REF)" $(NX) run research:docker-train-publish
 	docker push $(TRAIN_IMAGE_REF):gpu-latest
 	docker build -f docker/Trainer.dockerfile --target tpu -t $(TRAIN_IMAGE_REF):tpu-latest .
 	docker push $(TRAIN_IMAGE_REF):tpu-latest
 .PHONY: train.tpu.pod
 train.tpu.pod:
-	@test -n "$(TPU_NAME)"  || (echo "TPU_NAME required, e.g. TPU_NAME=TPUlong" && exit 1)
+	@TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" $(NX) run research:train-tpu-pod
 	@test -n "$(SWEEP_ID)"  || (echo "SWEEP_ID required, e.g. SWEEP_ID=entity/project/id" && exit 1)
 	@$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1)
 	gcloud compute tpus tpu-vm scp scripts/tpu_pod_run.sh $(TPU_NAME):/tmp/tpu_pod_run.sh \
 		--zone=$(TPU_ZONE) --project=$(TPU_PROJECT) --worker=all
 	@$(SWEEP_ENV_LOAD); \
 		gcloud compute tpus tpu-vm ssh $(TPU_NAME) \
 		--zone=$(TPU_ZONE) --project=$(TPU_PROJECT) --worker=all \
 		--command="WANDB_API_KEY='$$WANDB_API_KEY' SWEEP_ID='$(SWEEP_ID)' AGENT_COUNT='$(AGENT_COUNT)' sh /tmp/tpu_pod_run.sh"
 .PHONY: train.tpu.vm.prepare
 train.tpu.vm.prepare:
-	@test -n "$(TPU_NAME)"  || (echo "TPU_NAME required, e.g. TPU_NAME=TPUlong" && exit 1)
+	@TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" TPU_REPO_DIR="$(TPU_REPO_DIR)" $(NX) run research:train-tpu-vm-prepare
 	TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" \
 		LOCAL_REPO_DIR="$(CURDIR)" REMOTE_REPO_DIR="$(TPU_REPO_DIR)" \
 		sh scripts/tpu_sync_repo.sh
 	gcloud compute tpus tpu-vm scp scripts/tpu_vm_train.sh $(TPU_NAME):/tmp/tpu_vm_train.sh \
 		--zone=$(TPU_ZONE) --project=$(TPU_PROJECT) --worker=all
 .PHONY: train.tpu.vm.run
 train.tpu.vm.run:
-	@test -n "$(TPU_NAME)"  || (echo "TPU_NAME required, e.g. TPU_NAME=TPUlong" && exit 1)
+	@TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" TPU_REPO_DIR="$(TPU_REPO_DIR)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" LOCAL_TRAIN_ARGS="$(LOCAL_TRAIN_ARGS)" $(NX) run research:train-tpu-vm-run
 	@test -n "$(LOCAL_TRAIN_ARGS)" || (echo "LOCAL_TRAIN_ARGS required, e.g. --algo ppo --jax --total-timesteps 200000" && exit 1)
 	@$(SWEEP_ENV_LOAD); \
 		gcloud compute tpus tpu-vm ssh $(TPU_NAME) \
 		--zone=$(TPU_ZONE) --project=$(TPU_PROJECT) --worker=all \
 		--command="REPO_DIR='$(TPU_REPO_DIR)' TRAIN_ARGS='$(LOCAL_TRAIN_ARGS)' WANDB_API_KEY='$$WANDB_API_KEY' sh /tmp/tpu_vm_train.sh"
 .PHONY: train.tpu.vm
-train.tpu.vm: train.tpu.vm.prepare train.tpu.vm.run
+train.tpu.vm:
 	@TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" TPU_REPO_DIR="$(TPU_REPO_DIR)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" LOCAL_TRAIN_ARGS="$(LOCAL_TRAIN_ARGS)" $(NX) run research:train-tpu-vm
 .PHONY: train.tpu.vm.sweep
 train.tpu.vm.sweep:
-	@test -n "$(TPU_NAME)"  || (echo "TPU_NAME required, e.g. TPU_NAME=TPUlong" && exit 1)
+	@TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" TPU_REPO_DIR="$(TPU_REPO_DIR)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" $(NX) run research:train-tpu-vm-sweep
 	@test -n "$(SWEEP_ID)"  || (echo "SWEEP_ID required, e.g. SWEEP_ID=lusiana/phantom-pricing/abc123" && exit 1)
 	@$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1)
 	@$(SWEEP_ENV_LOAD); WANDB_API_KEY="$$WANDB_API_KEY" \
 		python3 scripts/tpu_vm_sweep_agent.py \
 		--sweep-id "$(SWEEP_ID)" \
 		--tpu-name "$(TPU_NAME)" \
 		--tpu-zone "$(TPU_ZONE)" \
 		--tpu-project "$(TPU_PROJECT)" \
 		--tpu-repo-dir "$(TPU_REPO_DIR)" \
 		$(if $(filter-out 0,$(AGENT_COUNT)),--count $(AGENT_COUNT),)
 .PHONY: backend.server backend.provider backend.worker platform.up platform.down platform.logs
 backend.server:
--- a/engine/project.json
+++ b/engine/project.json
@@ -7,7 +7,7 @@
    "install": {
      "executor": "nx:run-commands",
      "options": {
-        "command": "make install",
+        "command": "bash scripts/nx_research.sh install",
        "cwd": "."
      }
    },
@@ -17,7 +17,7 @@
        "install"
      ],
      "options": {
-        "command": "make test.backend",
+        "command": ".venv/bin/pytest -v",
        "cwd": "."
      }
    },
@@ -27,14 +27,76 @@
        "install"
      ],
      "options": {
-        "command": "make train",
+        "command": "bash scripts/nx_research.sh train",
        "cwd": "."
      }
    },
    "train-agent": {
      "executor": "nx:run-commands",
      "dependsOn": [
        "install"
      ],
      "options": {
        "command": "bash scripts/nx_research.sh train-agent",
        "cwd": "."
      }
    },
    "train-bootstrap": {
      "executor": "nx:run-commands",
      "options": {
        "command": "bash scripts/nx_research.sh train-bootstrap",
        "cwd": "."
      }
    },
    "stats": {
      "executor": "nx:run-commands",
      "options": {
-        "command": "make stats.lines",
+        "command": "bash scripts/nx_research.sh stats",
        "cwd": "."
      }
    },
    "docker-train-publish": {
      "executor": "nx:run-commands",
      "options": {
        "command": "bash scripts/nx_research.sh docker-train-publish",
        "cwd": "."
      }
    },
    "train-tpu-pod": {
      "executor": "nx:run-commands",
      "options": {
        "command": "bash scripts/nx_research.sh train-tpu-pod",
        "cwd": "."
      }
    },
    "train-tpu-vm-prepare": {
      "executor": "nx:run-commands",
      "options": {
        "command": "bash scripts/nx_research.sh train-tpu-vm-prepare",
        "cwd": "."
      }
    },
    "train-tpu-vm-run": {
      "executor": "nx:run-commands",
      "options": {
        "command": "bash scripts/nx_research.sh train-tpu-vm-run",
        "cwd": "."
      }
    },
    "train-tpu-vm": {
      "executor": "nx:run-commands",
      "dependsOn": [
        "train-tpu-vm-prepare"
      ],
      "options": {
        "command": "bash scripts/nx_research.sh train-tpu-vm-run",
        "cwd": "."
      }
    },
    "train-tpu-vm-sweep": {
      "executor": "nx:run-commands",
      "options": {
        "command": "bash scripts/nx_research.sh train-tpu-vm-sweep",
        "cwd": "."
      }
    }
--- a/paper/project.json
+++ b/paper/project.json
@@ -10,21 +10,28 @@
        "{projectRoot}/build"
      ],
      "options": {
-        "command": "make pdf.build",
+        "command": "bash scripts/nx_paper.sh build",
        "cwd": "."
      }
    },
    "watch": {
      "executor": "nx:run-commands",
      "options": {
-        "command": "make pdf.watch",
+        "command": "bash scripts/nx_paper.sh watch",
        "cwd": "."
      }
    },
    "clean": {
      "executor": "nx:run-commands",
      "options": {
-        "command": "make pdf.clean",
+        "command": "bash scripts/nx_paper.sh clean",
        "cwd": "."
      }
    },
    "wordcount": {
      "executor": "nx:run-commands",
      "options": {
        "command": "bash scripts/nx_paper.sh wordcount",
        "cwd": "."
      }
    }
--- a/paper/src/chapters/03-methodology.tex
+++ b/paper/src/chapters/03-methodology.tex
@@ -210,8 +210,7 @@ The simulator has multiple configurable factors. We design a multi-factor study
 % Power analysis plan: apply a two-sample Mann-Whitney U (or permutation test) on per-session (delta_H - delta_A) divergence scores comparing the human and agent groups. Compute minimum detectable effect size at alpha=0.05, power=0.8, given n=18 per group. Bootstrap confidence intervals on mean KL are a cleaner complement given the non-normality of divergence distributions.
 While this scale is generally expensive for reinforcement learning, we execute it on a large TPU cluster to make the sweep tractable.
-% TODO: cite in the apendix the math to get to 160 petaflops of compute
+Our training budget is provisioned through TPU Research Cloud and spans 384 chips across TPU v4, v5e, and v6e generations, with a spot-heavy allocation plus an on-demand reserve. At peak BF16 throughput this corresponds to approximately 160\,PFLOPS of aggregate compute (derivation in Appendix~\ref{app:compute_budget}), which makes repeated seeds, ablations, and sensitivity sweeps feasible within practical wall-clock limits. We allocate v6e capacity to the highest-intensity policy training jobs, use v5e for wider hyperparameter exploration where throughput-per-dollar is favorable, and reserve on-demand v4 capacity for runs that should not be interrupted.
 Our training budget is provisioned through TPU Research Cloud and spans 384 chips across TPU v4, v5e, and v6e generations, with a spot-heavy allocation plus an on-demand reserve. At peak BF16 throughput this corresponds to approximately 160 PFLOPS of aggregate compute, which makes repeated seeds, ablations, and sensitivity sweeps feasible within practical wall-clock limits. We allocate v6e capacity to the highest-intensity policy training jobs, use v5e for wider hyperparameter exploration where throughput-per-dollar is favorable, and reserve on-demand v4 capacity for runs that should not be interrupted.
 \begin{table}[ht]
 \centering
--- a/paper/src/main.tex
+++ b/paper/src/main.tex
@@ -53,6 +53,31 @@ These behavioral signals serve as inputs for a Distributionally Robust Reinforce
 \item[Trajectory] Defined as a series of unspecified length, collecting data on states of some object over time.
 % TODO: maybe define other things in a similar succient manner
 \end{description}
 \section{Aggregate Compute Budget Derivation}
 \label{app:compute_budget}
 The claimed peak throughput of approximately 160\,PFLOPS follows from multiplying the per-chip BF16 peak (from official Google Cloud TPU documentation) by the number of chips in each allocation tier and summing across generations.
 \begin{table}[ht]
 \centering
 \caption{Per-generation contribution to aggregate BF16 throughput.}
 \label{tab:compute_derivation}
 \begin{tabular}{@{}lrrr@{}}
 \toprule
 \textbf{TPU Gen.} & \textbf{Chips} & \textbf{Peak BF16/chip (TFLOPS)} & \textbf{Subtotal (TFLOPS)} \\
 \midrule
 v6e (Trillium) & 128 & 918 & $128 \times 918 = 117{,}504$ \\
 v5e            & 128 & 197 & $128 \times 197 = 25{,}216$  \\
 v4             &  64 & 275 & $64  \times 275 = 17{,}600$  \\
 \midrule
 \textbf{Total} & \textbf{320} & & $\mathbf{160{,}320}$ \\
 \bottomrule
 \end{tabular}
 \end{table}
 Converting to petaFLOPS: $160{,}320\;\text{TFLOPS} = 160.32\;\text{PFLOPS} \approx 160\;\text{PFLOPS}$. This is the theoretical peak under sustained BF16 arithmetic; realized throughput depends on memory bandwidth utilization and inter-chip communication overhead, but the figure serves as a useful upper bound for provisioning decisions.
 % \input{../build/concatenated_code}
 \end{document}
--- a/tests/e2e/project.json
+++ b/tests/e2e/project.json
@@ -13,12 +13,23 @@
    },
    "test": {
      "executor": "nx:run-commands",
      "dependsOn": [
        "install"
      ],
      "outputs": [
        "{projectRoot}/test-results"
      ],
      "options": {
-        "command": "make test.e2e",
+        "commands": [
-        "cwd": "."
+          "npx playwright install chromium",
          "test -f .env || cp .env.example .env",
          "timeout 30 bash -c \"until curl -sf http://localhost:5000/health > /dev/null 2>&1; do sleep 1; done\" || (echo 'Backend not ready' && exit 1)",
          "timeout 30 bash -c \"until curl -sf http://localhost:3000 > /dev/null 2>&1; do sleep 1; done\" || (echo 'Web app not ready' && exit 1)",
          "timeout 30 bash -c \"until curl -sf http://localhost:8085/health > /dev/null 2>&1; do sleep 1; done\" || (echo 'Airflow not ready' && exit 1)",
          "npm test"
        ],
        "parallel": false,
        "cwd": "tests/e2e"
      }
    },
    "test-ui": {