mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 08:33:36 +00:00
updating computation power graph
This commit is contained in:
130
Makefile
130
Makefile
@@ -54,160 +54,84 @@ $(BUILDDIR):
|
|||||||
mkdir -p paper/$(BUILDDIR)
|
mkdir -p paper/$(BUILDDIR)
|
||||||
|
|
||||||
.PHONY: pdf.build
|
.PHONY: pdf.build
|
||||||
pdf.build: $(BUILDDIR)
|
pdf.build:
|
||||||
@bash paper/concat_code.sh
|
@$(NX) run paper:build
|
||||||
@cd $(SRCDIR) && \
|
|
||||||
$(LATEXMK) -pdf -jobname=$(JOBNAME) -f \
|
|
||||||
-interaction=nonstopmode -file-line-error \
|
|
||||||
-r ../.latexmkrc \
|
|
||||||
-outdir=../$(BUILDDIR) $(TEX)
|
|
||||||
|
|
||||||
.PHONY: pdf.watch
|
.PHONY: pdf.watch
|
||||||
pdf.watch: $(BUILDDIR)
|
pdf.watch:
|
||||||
@cd $(SRCDIR) && \
|
@$(NX) run paper:watch
|
||||||
$(LATEXMK) -pvc -pdf -jobname=$(JOBNAME) -f \
|
|
||||||
-interaction=nonstopmode -file-line-error \
|
|
||||||
-r ../.latexmkrc \
|
|
||||||
-outdir=../$(BUILDDIR) $(TEX)
|
|
||||||
|
|
||||||
.PHONY: pdf.clean
|
.PHONY: pdf.clean
|
||||||
pdf.clean:
|
pdf.clean:
|
||||||
@cd $(SRCDIR) && \
|
@$(NX) run paper:clean
|
||||||
$(LATEXMK) -C -jobname=$(JOBNAME) -outdir=../$(BUILDDIR) || true
|
|
||||||
rm -rf paper/$(BUILDDIR)/*
|
|
||||||
|
|
||||||
.PHONY: test.backend
|
.PHONY: test.backend
|
||||||
test.backend: $(VENV)
|
test.backend:
|
||||||
$(PYTEST) -v
|
@$(NX) run research:test
|
||||||
|
|
||||||
.PHONY: test.e2e
|
.PHONY: test.e2e
|
||||||
test.e2e:
|
test.e2e:
|
||||||
@cd tests/e2e && npm install
|
@$(NX) run e2e:test
|
||||||
@cd tests/e2e && npx playwright install chromium
|
|
||||||
@test -f tests/e2e/.env || cp tests/e2e/.env.example tests/e2e/.env
|
|
||||||
@timeout 30 bash -c 'until curl -sf http://localhost:5000/health > /dev/null 2>&1; do sleep 1; done' || (echo "Backend not ready" && exit 1)
|
|
||||||
@timeout 30 bash -c 'until curl -sf http://localhost:3000 > /dev/null 2>&1; do sleep 1; done' || (echo "Web app not ready" && exit 1)
|
|
||||||
@timeout 30 bash -c 'until curl -sf http://localhost:8085/health > /dev/null 2>&1; do sleep 1; done' || (echo "Airflow not ready" && exit 1)
|
|
||||||
@cd tests/e2e && npm test
|
|
||||||
|
|
||||||
.PHONY: test.all
|
.PHONY: test.all
|
||||||
test.all: test.backend test.e2e
|
test.all:
|
||||||
|
@$(NX) run-many -t test --projects=research,e2e --parallel=1
|
||||||
|
|
||||||
.PHONY: web.dev
|
.PHONY: web.dev
|
||||||
web.dev:
|
web.dev:
|
||||||
@cd web && npm install && npm run dev
|
@$(NX) run web:dev
|
||||||
|
|
||||||
$(VENV):
|
$(VENV):
|
||||||
python3 -m venv $(VENV)
|
python3 -m venv $(VENV)
|
||||||
$(PIP) install --upgrade pip
|
$(PIP) install --upgrade pip
|
||||||
|
|
||||||
.PHONY: install
|
.PHONY: install
|
||||||
install: $(VENV)
|
install:
|
||||||
$(PIP) install -r requirements.txt
|
@$(NX) run research:install
|
||||||
|
|
||||||
.PHONY: train
|
.PHONY: train
|
||||||
train: install
|
train:
|
||||||
@$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1)
|
@WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" LOCAL_TRAIN_ARGS="$(LOCAL_TRAIN_ARGS)" $(NX) run research:train
|
||||||
@$(SWEEP_ENV_LOAD); WANDB_API_KEY="$$WANDB_API_KEY" WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" \
|
|
||||||
$(PYTHON) -m engine.train $(LOCAL_TRAIN_ARGS)
|
|
||||||
|
|
||||||
.PHONY: train.agent
|
.PHONY: train.agent
|
||||||
train.agent: install
|
train.agent:
|
||||||
@$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1)
|
@WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" $(NX) run research:train-agent
|
||||||
@test -n "$(SWEEP_ID)" || (echo "SWEEP_ID required, e.g. SWEEP_ID=entity/project/id" && exit 1)
|
|
||||||
@$(SWEEP_ENV_LOAD); WANDB_API_KEY="$$WANDB_API_KEY" WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" \
|
|
||||||
$(PYTHON) -m engine.train --sweep-agent --sweep-id "$(SWEEP_ID)" \
|
|
||||||
$(if $(filter-out 0,$(AGENT_COUNT)),--count $(AGENT_COUNT),)
|
|
||||||
|
|
||||||
.PHONY: train.bootstrap
|
.PHONY: train.bootstrap
|
||||||
train.bootstrap:
|
train.bootstrap:
|
||||||
@$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1)
|
@WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" REPO_URL="$(REPO_URL)" BRANCH="$(BRANCH)" WORKDIR="$(WORKDIR)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" AGENT_LOOP="$(AGENT_LOOP)" RETRY_SECONDS="$(RETRY_SECONDS)" $(NX) run research:train-bootstrap
|
||||||
@$(SWEEP_ENV_LOAD); test -n "$$GITHUB_TOKEN" || (echo "GITHUB_TOKEN required — set it in $(SWEEP_ENV_FILE)" && exit 1)
|
|
||||||
@test -n "$(REPO_URL)" || (echo "REPO_URL required, e.g. REPO_URL=https://github.com/org/repo.git" && exit 1)
|
|
||||||
@test -n "$(SWEEP_ID)" || (echo "SWEEP_ID required, e.g. SWEEP_ID=entity/project/id" && exit 1)
|
|
||||||
@$(SWEEP_ENV_LOAD); \
|
|
||||||
WANDB_API_KEY="$$WANDB_API_KEY" \
|
|
||||||
WANDB_ENTITY="$(WANDB_ENTITY)" \
|
|
||||||
WANDB_PROJECT="$(WANDB_PROJECT)" \
|
|
||||||
GITHUB_TOKEN="$$GITHUB_TOKEN" \
|
|
||||||
REPO_URL="$(REPO_URL)" \
|
|
||||||
BRANCH="$(BRANCH)" \
|
|
||||||
WORKDIR="$(WORKDIR)" \
|
|
||||||
SWEEP_ID="$(SWEEP_ID)" \
|
|
||||||
AGENT_COUNT="$(AGENT_COUNT)" \
|
|
||||||
AGENT_LOOP="$(AGENT_LOOP)" \
|
|
||||||
RETRY_SECONDS="$(RETRY_SECONDS)" \
|
|
||||||
bash scripts/wandb_agent_bootstrap.sh
|
|
||||||
|
|
||||||
.PHONY: stats.lines
|
.PHONY: stats.lines
|
||||||
stats.lines:
|
stats.lines:
|
||||||
@find . \( -path '*/node_modules' -o -path '*/.venv' -o -path '*/venv' \) -prune -o \
|
@$(NX) run research:stats
|
||||||
\( -name "*.ts" -o -name "*.py" \) -type f -print0 | xargs -0 cat | wc -l
|
|
||||||
|
|
||||||
.PHONY: wordcount
|
.PHONY: wordcount
|
||||||
wordcount:
|
wordcount:
|
||||||
@echo "Counting words in main text (excluding appendix)..."
|
@$(NX) run paper:wordcount
|
||||||
@texcount -nosub -total -sum -1 \
|
|
||||||
$(SRCDIR)/chapters/01-intro.tex \
|
|
||||||
$(SRCDIR)/chapters/02-literature-review.tex \
|
|
||||||
$(SRCDIR)/chapters/03-methodology.tex \
|
|
||||||
$(SRCDIR)/chapters/04-results.tex \
|
|
||||||
$(SRCDIR)/chapters/05-discussion.tex \
|
|
||||||
$(SRCDIR)/chapters/06-conclusion.tex
|
|
||||||
|
|
||||||
.PHONY: docker.train.publish
|
.PHONY: docker.train.publish
|
||||||
docker.train.publish:
|
docker.train.publish:
|
||||||
docker build -f docker/Trainer.dockerfile --target gpu -t $(TRAIN_IMAGE_REF):gpu-latest .
|
@TRAIN_IMAGE_REF="$(TRAIN_IMAGE_REF)" $(NX) run research:docker-train-publish
|
||||||
docker push $(TRAIN_IMAGE_REF):gpu-latest
|
|
||||||
docker build -f docker/Trainer.dockerfile --target tpu -t $(TRAIN_IMAGE_REF):tpu-latest .
|
|
||||||
docker push $(TRAIN_IMAGE_REF):tpu-latest
|
|
||||||
|
|
||||||
.PHONY: train.tpu.pod
|
.PHONY: train.tpu.pod
|
||||||
train.tpu.pod:
|
train.tpu.pod:
|
||||||
@test -n "$(TPU_NAME)" || (echo "TPU_NAME required, e.g. TPU_NAME=TPUlong" && exit 1)
|
@TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" $(NX) run research:train-tpu-pod
|
||||||
@test -n "$(SWEEP_ID)" || (echo "SWEEP_ID required, e.g. SWEEP_ID=entity/project/id" && exit 1)
|
|
||||||
@$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1)
|
|
||||||
gcloud compute tpus tpu-vm scp scripts/tpu_pod_run.sh $(TPU_NAME):/tmp/tpu_pod_run.sh \
|
|
||||||
--zone=$(TPU_ZONE) --project=$(TPU_PROJECT) --worker=all
|
|
||||||
@$(SWEEP_ENV_LOAD); \
|
|
||||||
gcloud compute tpus tpu-vm ssh $(TPU_NAME) \
|
|
||||||
--zone=$(TPU_ZONE) --project=$(TPU_PROJECT) --worker=all \
|
|
||||||
--command="WANDB_API_KEY='$$WANDB_API_KEY' SWEEP_ID='$(SWEEP_ID)' AGENT_COUNT='$(AGENT_COUNT)' sh /tmp/tpu_pod_run.sh"
|
|
||||||
|
|
||||||
.PHONY: train.tpu.vm.prepare
|
.PHONY: train.tpu.vm.prepare
|
||||||
train.tpu.vm.prepare:
|
train.tpu.vm.prepare:
|
||||||
@test -n "$(TPU_NAME)" || (echo "TPU_NAME required, e.g. TPU_NAME=TPUlong" && exit 1)
|
@TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" TPU_REPO_DIR="$(TPU_REPO_DIR)" $(NX) run research:train-tpu-vm-prepare
|
||||||
TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" \
|
|
||||||
LOCAL_REPO_DIR="$(CURDIR)" REMOTE_REPO_DIR="$(TPU_REPO_DIR)" \
|
|
||||||
sh scripts/tpu_sync_repo.sh
|
|
||||||
gcloud compute tpus tpu-vm scp scripts/tpu_vm_train.sh $(TPU_NAME):/tmp/tpu_vm_train.sh \
|
|
||||||
--zone=$(TPU_ZONE) --project=$(TPU_PROJECT) --worker=all
|
|
||||||
|
|
||||||
.PHONY: train.tpu.vm.run
|
.PHONY: train.tpu.vm.run
|
||||||
train.tpu.vm.run:
|
train.tpu.vm.run:
|
||||||
@test -n "$(TPU_NAME)" || (echo "TPU_NAME required, e.g. TPU_NAME=TPUlong" && exit 1)
|
@TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" TPU_REPO_DIR="$(TPU_REPO_DIR)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" LOCAL_TRAIN_ARGS="$(LOCAL_TRAIN_ARGS)" $(NX) run research:train-tpu-vm-run
|
||||||
@test -n "$(LOCAL_TRAIN_ARGS)" || (echo "LOCAL_TRAIN_ARGS required, e.g. --algo ppo --jax --total-timesteps 200000" && exit 1)
|
|
||||||
@$(SWEEP_ENV_LOAD); \
|
|
||||||
gcloud compute tpus tpu-vm ssh $(TPU_NAME) \
|
|
||||||
--zone=$(TPU_ZONE) --project=$(TPU_PROJECT) --worker=all \
|
|
||||||
--command="REPO_DIR='$(TPU_REPO_DIR)' TRAIN_ARGS='$(LOCAL_TRAIN_ARGS)' WANDB_API_KEY='$$WANDB_API_KEY' sh /tmp/tpu_vm_train.sh"
|
|
||||||
|
|
||||||
.PHONY: train.tpu.vm
|
.PHONY: train.tpu.vm
|
||||||
train.tpu.vm: train.tpu.vm.prepare train.tpu.vm.run
|
train.tpu.vm:
|
||||||
|
@TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" TPU_REPO_DIR="$(TPU_REPO_DIR)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" LOCAL_TRAIN_ARGS="$(LOCAL_TRAIN_ARGS)" $(NX) run research:train-tpu-vm
|
||||||
|
|
||||||
.PHONY: train.tpu.vm.sweep
|
.PHONY: train.tpu.vm.sweep
|
||||||
train.tpu.vm.sweep:
|
train.tpu.vm.sweep:
|
||||||
@test -n "$(TPU_NAME)" || (echo "TPU_NAME required, e.g. TPU_NAME=TPUlong" && exit 1)
|
@TPU_NAME="$(TPU_NAME)" TPU_ZONE="$(TPU_ZONE)" TPU_PROJECT="$(TPU_PROJECT)" TPU_REPO_DIR="$(TPU_REPO_DIR)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" $(NX) run research:train-tpu-vm-sweep
|
||||||
@test -n "$(SWEEP_ID)" || (echo "SWEEP_ID required, e.g. SWEEP_ID=lusiana/phantom-pricing/abc123" && exit 1)
|
|
||||||
@$(SWEEP_ENV_LOAD); test -n "$$WANDB_API_KEY" || (echo "WANDB_API_KEY required — set it in $(SWEEP_ENV_FILE)" && exit 1)
|
|
||||||
@$(SWEEP_ENV_LOAD); WANDB_API_KEY="$$WANDB_API_KEY" \
|
|
||||||
python3 scripts/tpu_vm_sweep_agent.py \
|
|
||||||
--sweep-id "$(SWEEP_ID)" \
|
|
||||||
--tpu-name "$(TPU_NAME)" \
|
|
||||||
--tpu-zone "$(TPU_ZONE)" \
|
|
||||||
--tpu-project "$(TPU_PROJECT)" \
|
|
||||||
--tpu-repo-dir "$(TPU_REPO_DIR)" \
|
|
||||||
$(if $(filter-out 0,$(AGENT_COUNT)),--count $(AGENT_COUNT),)
|
|
||||||
|
|
||||||
.PHONY: backend.server backend.provider backend.worker platform.up platform.down platform.logs
|
.PHONY: backend.server backend.provider backend.worker platform.up platform.down platform.logs
|
||||||
backend.server:
|
backend.server:
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"install": {
|
"install": {
|
||||||
"executor": "nx:run-commands",
|
"executor": "nx:run-commands",
|
||||||
"options": {
|
"options": {
|
||||||
"command": "make install",
|
"command": "bash scripts/nx_research.sh install",
|
||||||
"cwd": "."
|
"cwd": "."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -17,7 +17,7 @@
|
|||||||
"install"
|
"install"
|
||||||
],
|
],
|
||||||
"options": {
|
"options": {
|
||||||
"command": "make test.backend",
|
"command": ".venv/bin/pytest -v",
|
||||||
"cwd": "."
|
"cwd": "."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -27,14 +27,76 @@
|
|||||||
"install"
|
"install"
|
||||||
],
|
],
|
||||||
"options": {
|
"options": {
|
||||||
"command": "make train",
|
"command": "bash scripts/nx_research.sh train",
|
||||||
|
"cwd": "."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"train-agent": {
|
||||||
|
"executor": "nx:run-commands",
|
||||||
|
"dependsOn": [
|
||||||
|
"install"
|
||||||
|
],
|
||||||
|
"options": {
|
||||||
|
"command": "bash scripts/nx_research.sh train-agent",
|
||||||
|
"cwd": "."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"train-bootstrap": {
|
||||||
|
"executor": "nx:run-commands",
|
||||||
|
"options": {
|
||||||
|
"command": "bash scripts/nx_research.sh train-bootstrap",
|
||||||
"cwd": "."
|
"cwd": "."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"stats": {
|
"stats": {
|
||||||
"executor": "nx:run-commands",
|
"executor": "nx:run-commands",
|
||||||
"options": {
|
"options": {
|
||||||
"command": "make stats.lines",
|
"command": "bash scripts/nx_research.sh stats",
|
||||||
|
"cwd": "."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"docker-train-publish": {
|
||||||
|
"executor": "nx:run-commands",
|
||||||
|
"options": {
|
||||||
|
"command": "bash scripts/nx_research.sh docker-train-publish",
|
||||||
|
"cwd": "."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"train-tpu-pod": {
|
||||||
|
"executor": "nx:run-commands",
|
||||||
|
"options": {
|
||||||
|
"command": "bash scripts/nx_research.sh train-tpu-pod",
|
||||||
|
"cwd": "."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"train-tpu-vm-prepare": {
|
||||||
|
"executor": "nx:run-commands",
|
||||||
|
"options": {
|
||||||
|
"command": "bash scripts/nx_research.sh train-tpu-vm-prepare",
|
||||||
|
"cwd": "."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"train-tpu-vm-run": {
|
||||||
|
"executor": "nx:run-commands",
|
||||||
|
"options": {
|
||||||
|
"command": "bash scripts/nx_research.sh train-tpu-vm-run",
|
||||||
|
"cwd": "."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"train-tpu-vm": {
|
||||||
|
"executor": "nx:run-commands",
|
||||||
|
"dependsOn": [
|
||||||
|
"train-tpu-vm-prepare"
|
||||||
|
],
|
||||||
|
"options": {
|
||||||
|
"command": "bash scripts/nx_research.sh train-tpu-vm-run",
|
||||||
|
"cwd": "."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"train-tpu-vm-sweep": {
|
||||||
|
"executor": "nx:run-commands",
|
||||||
|
"options": {
|
||||||
|
"command": "bash scripts/nx_research.sh train-tpu-vm-sweep",
|
||||||
"cwd": "."
|
"cwd": "."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,21 +10,28 @@
|
|||||||
"{projectRoot}/build"
|
"{projectRoot}/build"
|
||||||
],
|
],
|
||||||
"options": {
|
"options": {
|
||||||
"command": "make pdf.build",
|
"command": "bash scripts/nx_paper.sh build",
|
||||||
"cwd": "."
|
"cwd": "."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"watch": {
|
"watch": {
|
||||||
"executor": "nx:run-commands",
|
"executor": "nx:run-commands",
|
||||||
"options": {
|
"options": {
|
||||||
"command": "make pdf.watch",
|
"command": "bash scripts/nx_paper.sh watch",
|
||||||
"cwd": "."
|
"cwd": "."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"clean": {
|
"clean": {
|
||||||
"executor": "nx:run-commands",
|
"executor": "nx:run-commands",
|
||||||
"options": {
|
"options": {
|
||||||
"command": "make pdf.clean",
|
"command": "bash scripts/nx_paper.sh clean",
|
||||||
|
"cwd": "."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"wordcount": {
|
||||||
|
"executor": "nx:run-commands",
|
||||||
|
"options": {
|
||||||
|
"command": "bash scripts/nx_paper.sh wordcount",
|
||||||
"cwd": "."
|
"cwd": "."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -210,8 +210,7 @@ The simulator has multiple configurable factors. We design a multi-factor study
|
|||||||
% Power analysis plan: apply a two-sample Mann-Whitney U (or permutation test) on per-session (delta_H - delta_A) divergence scores comparing the human and agent groups. Compute minimum detectable effect size at alpha=0.05, power=0.8, given n=18 per group. Bootstrap confidence intervals on mean KL are a cleaner complement given the non-normality of divergence distributions.
|
% Power analysis plan: apply a two-sample Mann-Whitney U (or permutation test) on per-session (delta_H - delta_A) divergence scores comparing the human and agent groups. Compute minimum detectable effect size at alpha=0.05, power=0.8, given n=18 per group. Bootstrap confidence intervals on mean KL are a cleaner complement given the non-normality of divergence distributions.
|
||||||
While this scale is generally expensive for reinforcement learning, we execute it on a large TPU cluster to make the sweep tractable.
|
While this scale is generally expensive for reinforcement learning, we execute it on a large TPU cluster to make the sweep tractable.
|
||||||
|
|
||||||
% TODO: cite in the apendix the math to get to 160 petaflops of compute
|
Our training budget is provisioned through TPU Research Cloud and spans 384 chips across TPU v4, v5e, and v6e generations, with a spot-heavy allocation plus an on-demand reserve. At peak BF16 throughput this corresponds to approximately 160\,PFLOPS of aggregate compute (derivation in Appendix~\ref{app:compute_budget}), which makes repeated seeds, ablations, and sensitivity sweeps feasible within practical wall-clock limits. We allocate v6e capacity to the highest-intensity policy training jobs, use v5e for wider hyperparameter exploration where throughput-per-dollar is favorable, and reserve on-demand v4 capacity for runs that should not be interrupted.
|
||||||
Our training budget is provisioned through TPU Research Cloud and spans 384 chips across TPU v4, v5e, and v6e generations, with a spot-heavy allocation plus an on-demand reserve. At peak BF16 throughput this corresponds to approximately 160 PFLOPS of aggregate compute, which makes repeated seeds, ablations, and sensitivity sweeps feasible within practical wall-clock limits. We allocate v6e capacity to the highest-intensity policy training jobs, use v5e for wider hyperparameter exploration where throughput-per-dollar is favorable, and reserve on-demand v4 capacity for runs that should not be interrupted.
|
|
||||||
|
|
||||||
\begin{table}[ht]
|
\begin{table}[ht]
|
||||||
\centering
|
\centering
|
||||||
|
|||||||
@@ -53,6 +53,31 @@ These behavioral signals serve as inputs for a Distributionally Robust Reinforce
|
|||||||
\item[Trajectory] Defined as a series of unspecified length, collecting data on states of some object over time.
|
\item[Trajectory] Defined as a series of unspecified length, collecting data on states of some object over time.
|
||||||
% TODO: maybe define other things in a similar succient manner
|
% TODO: maybe define other things in a similar succient manner
|
||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
|
\section{Aggregate Compute Budget Derivation}
|
||||||
|
\label{app:compute_budget}
|
||||||
|
|
||||||
|
The claimed peak throughput of approximately 160\,PFLOPS follows from multiplying the per-chip BF16 peak (from official Google Cloud TPU documentation) by the number of chips in each allocation tier and summing across generations.
|
||||||
|
|
||||||
|
\begin{table}[ht]
|
||||||
|
\centering
|
||||||
|
\caption{Per-generation contribution to aggregate BF16 throughput.}
|
||||||
|
\label{tab:compute_derivation}
|
||||||
|
\begin{tabular}{@{}lrrr@{}}
|
||||||
|
\toprule
|
||||||
|
\textbf{TPU Gen.} & \textbf{Chips} & \textbf{Peak BF16/chip (TFLOPS)} & \textbf{Subtotal (TFLOPS)} \\
|
||||||
|
\midrule
|
||||||
|
v6e (Trillium) & 128 & 918 & $128 \times 918 = 117{,}504$ \\
|
||||||
|
v5e & 128 & 197 & $128 \times 197 = 25{,}216$ \\
|
||||||
|
v4 & 64 & 275 & $64 \times 275 = 17{,}600$ \\
|
||||||
|
\midrule
|
||||||
|
\textbf{Total} & \textbf{320} & & $\mathbf{160{,}320}$ \\
|
||||||
|
\bottomrule
|
||||||
|
\end{tabular}
|
||||||
|
\end{table}
|
||||||
|
|
||||||
|
Converting to petaFLOPS: $160{,}320\;\text{TFLOPS} = 160.32\;\text{PFLOPS} \approx 160\;\text{PFLOPS}$. This is the theoretical peak under sustained BF16 arithmetic; realized throughput depends on memory bandwidth utilization and inter-chip communication overhead, but the figure serves as a useful upper bound for provisioning decisions.
|
||||||
|
|
||||||
% \input{../build/concatenated_code}
|
% \input{../build/concatenated_code}
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
||||||
|
|||||||
@@ -13,12 +13,23 @@
|
|||||||
},
|
},
|
||||||
"test": {
|
"test": {
|
||||||
"executor": "nx:run-commands",
|
"executor": "nx:run-commands",
|
||||||
|
"dependsOn": [
|
||||||
|
"install"
|
||||||
|
],
|
||||||
"outputs": [
|
"outputs": [
|
||||||
"{projectRoot}/test-results"
|
"{projectRoot}/test-results"
|
||||||
],
|
],
|
||||||
"options": {
|
"options": {
|
||||||
"command": "make test.e2e",
|
"commands": [
|
||||||
"cwd": "."
|
"npx playwright install chromium",
|
||||||
|
"test -f .env || cp .env.example .env",
|
||||||
|
"timeout 30 bash -c \"until curl -sf http://localhost:5000/health > /dev/null 2>&1; do sleep 1; done\" || (echo 'Backend not ready' && exit 1)",
|
||||||
|
"timeout 30 bash -c \"until curl -sf http://localhost:3000 > /dev/null 2>&1; do sleep 1; done\" || (echo 'Web app not ready' && exit 1)",
|
||||||
|
"timeout 30 bash -c \"until curl -sf http://localhost:8085/health > /dev/null 2>&1; do sleep 1; done\" || (echo 'Airflow not ready' && exit 1)",
|
||||||
|
"npm test"
|
||||||
|
],
|
||||||
|
"parallel": false,
|
||||||
|
"cwd": "tests/e2e"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"test-ui": {
|
"test-ui": {
|
||||||
|
|||||||
Reference in New Issue
Block a user