chore: updating make reference and linking of builds

This commit is contained in:
2026-03-15 21:15:01 +01:00
parent a9c091050c
commit 0521a63937
5 changed files with 93 additions and 2 deletions

1
.gitignore vendored
View File

@@ -3,6 +3,7 @@
.env.* .env.*
!.env.*.example !.env.*.example
**/.venv **/.venv
**/.venv-ray
# python build/cache artifacts # python build/cache artifacts
**/__pycache__ **/__pycache__

View File

@@ -11,6 +11,7 @@ PYTEST := $(VENV)/bin/pytest
NX := npx nx NX := npx nx
SWEEP_ENV_FILE ?= .env.sweep SWEEP_ENV_FILE ?= .env.sweep
TPU_CONF ?= tpu_orchestration/configs/v4_spot_us.conf
WANDB_ENTITY ?= WANDB_ENTITY ?=
WANDB_PROJECT ?= capstone WANDB_PROJECT ?= capstone
@@ -21,6 +22,14 @@ SIMPLE_BENCHMARK_ARGS ?= --tiers qtable,ppo,dqn,a2c --alpha-values 0.0,0.15,0.3,
BENCHMARK_AGENT_ARGS ?= BENCHMARK_AGENT_ARGS ?=
AGENT_COUNT ?= 0 AGENT_COUNT ?= 0
WHOCLICKED_REPO ?= velocitatem/whoclickedit
WHOCLICKED_CSV ?= experiments/exports/whoclicked.csv
WHOCLICKED_CARD ?= experiments/exports/whoclicked_dataset_card.md
WHOCLICKED_CSV_PATH_IN_REPO ?= whoclicked.csv
WHOCLICKED_CARD_PATH_IN_REPO ?= README.md
WHOCLICKED_DATASET_MESSAGE ?= Update flattened whoclicked dataset
WHOCLICKED_CARD_MESSAGE ?= Update dataset card for WhoClicked
REPO_URL ?= REPO_URL ?=
BRANCH ?= main BRANCH ?= main
WORKDIR ?= $(HOME)/PHANTOM-agent WORKDIR ?= $(HOME)/PHANTOM-agent
@@ -37,7 +46,8 @@ SWEEP_ENV_LOAD = set -a; [ -f "$(SWEEP_ENV_FILE)" ] && . "$(SWEEP_ENV_FILE)" ||
help: help:
@echo "pdf.build pdf.watch pdf.clean pdf.genpop pdf.genpop.watch pdf.arxiv | test.backend test.e2e test.all | web.dev | install | train | benchmark | benchmark.simple | benchmark.agent | train.agent | train.bootstrap | stats.lines | manim.render manim.render.all" @echo "pdf.build pdf.watch pdf.clean pdf.genpop pdf.genpop.watch pdf.arxiv | test.backend test.e2e test.all | web.dev | install | train | benchmark | benchmark.simple | benchmark.agent | train.agent | train.bootstrap | stats.lines | manim.render manim.render.all"
@echo "backend.server backend.provider backend.worker | platform.up platform.down platform.logs | docker.train.publish" @echo "backend.server backend.provider backend.worker | platform.up platform.down platform.logs | docker.train.publish"
@echo "data.pull data.push | study.margin-erosion study.margin-erosion.quick study.margin-erosion.plot" @echo "data.pull data.push data.whoclicked.publish | study.margin-erosion study.margin-erosion.quick study.margin-erosion.plot"
@echo "tpu.ray.bootstrap tpu.ray.deps tpu.ray.verify tpu.ray.teardown"
@echo "" @echo ""
@echo "Build general public version:" @echo "Build general public version:"
@echo " make pdf.genpop" @echo " make pdf.genpop"
@@ -57,6 +67,12 @@ help:
@echo "Bootstrap private repo worker from anywhere:" @echo "Bootstrap private repo worker from anywhere:"
@echo " make train.bootstrap REPO_URL=https://github.com/org/repo.git BRANCH=main SWEEP_ID=entity/project/id" @echo " make train.bootstrap REPO_URL=https://github.com/org/repo.git BRANCH=main SWEEP_ID=entity/project/id"
@echo "" @echo ""
@echo "Bootstrap Ray on TPU slice from config:"
@echo " make tpu.ray.bootstrap TPU_CONF=tpu_orchestration/configs/v4_spot_us.conf"
@echo ""
@echo "Publish WhoClicked dataset + card:"
@echo " make data.whoclicked.publish HF_TOKEN=... WHOCLICKED_REPO=velocitatem/whoclickedit"
@echo ""
@echo "Config source: $(SWEEP_ENV_FILE) (auto-loaded)" @echo "Config source: $(SWEEP_ENV_FILE) (auto-loaded)"
$(BUILDDIR): $(BUILDDIR):
@@ -134,6 +150,19 @@ train.agent:
train.bootstrap: train.bootstrap:
@WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" REPO_URL="$(REPO_URL)" BRANCH="$(BRANCH)" WORKDIR="$(WORKDIR)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" AGENT_LOOP="$(AGENT_LOOP)" RETRY_SECONDS="$(RETRY_SECONDS)" $(NX) run research:train-bootstrap @WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" REPO_URL="$(REPO_URL)" BRANCH="$(BRANCH)" WORKDIR="$(WORKDIR)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" AGENT_LOOP="$(AGENT_LOOP)" RETRY_SECONDS="$(RETRY_SECONDS)" $(NX) run research:train-bootstrap
.PHONY: tpu.ray.bootstrap tpu.ray.deps tpu.ray.verify tpu.ray.teardown
tpu.ray.bootstrap:
@TPU_CONF="$(TPU_CONF)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" $(NX) run research:tpu-ray-bootstrap
tpu.ray.deps:
@TPU_CONF="$(TPU_CONF)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" $(NX) run research:tpu-ray-deps
tpu.ray.verify:
@TPU_CONF="$(TPU_CONF)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" $(NX) run research:tpu-ray-verify
tpu.ray.teardown:
@TPU_CONF="$(TPU_CONF)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" $(NX) run research:tpu-ray-teardown
.PHONY: data.pull data.push .PHONY: data.pull data.push
data.pull: data.pull:
python scripts/hf_data.py pull python scripts/hf_data.py pull
@@ -141,6 +170,10 @@ data.pull:
data.push: data.push:
python scripts/hf_data.py push python scripts/hf_data.py push
.PHONY: data.whoclicked.publish
data.whoclicked.publish:
@HF_TOKEN="$(HF_TOKEN)" WHOCLICKED_REPO="$(WHOCLICKED_REPO)" WHOCLICKED_CSV="$(WHOCLICKED_CSV)" WHOCLICKED_CARD="$(WHOCLICKED_CARD)" WHOCLICKED_CSV_PATH_IN_REPO="$(WHOCLICKED_CSV_PATH_IN_REPO)" WHOCLICKED_CARD_PATH_IN_REPO="$(WHOCLICKED_CARD_PATH_IN_REPO)" WHOCLICKED_DATASET_MESSAGE="$(WHOCLICKED_DATASET_MESSAGE)" WHOCLICKED_CARD_MESSAGE="$(WHOCLICKED_CARD_MESSAGE)" $(NX) run research:whoclicked-publish
.PHONY: stats.lines .PHONY: stats.lines
stats.lines: stats.lines:
@$(NX) run research:stats @$(NX) run research:stats

View File

@@ -12,7 +12,7 @@ services:
- GITHUB_TOKEN=${GITHUB_TOKEN} - GITHUB_TOKEN=${GITHUB_TOKEN}
- GOOGLE_APPLICATION_CREDENTIALS=/secrets/gcp-sa.json - GOOGLE_APPLICATION_CREDENTIALS=/secrets/gcp-sa.json
- GCP_ACCOUNT=${GCP_ACCOUNT:-} - GCP_ACCOUNT=${GCP_ACCOUNT:-}
- WATCHDOG_CONFIG_PATTERN=${WATCHDOG_CONFIG_PATTERN:-v6e_*.conf} - WATCHDOG_CONFIG_PATTERN=${WATCHDOG_CONFIG_PATTERN:-v[46]*.conf}
- CLOUDSDK_CONFIG=/.config/gcloud - CLOUDSDK_CONFIG=/.config/gcloud
volumes: volumes:
- ~/.config/gcloud:/.config/gcloud:rw - ~/.config/gcloud:/.config/gcloud:rw

15
nx.json
View File

@@ -58,6 +58,21 @@
"benchmark": { "benchmark": {
"cache": false "cache": false
}, },
"whoclicked-publish": {
"cache": false
},
"tpu-ray-bootstrap": {
"cache": false
},
"tpu-ray-deps": {
"cache": false
},
"tpu-ray-verify": {
"cache": false
},
"tpu-ray-teardown": {
"cache": false
},
"up": { "up": {
"cache": false "cache": false
}, },

View File

@@ -4,6 +4,7 @@ set -euo pipefail
cmd="${1:-}" cmd="${1:-}"
env_file="${SWEEP_ENV_FILE:-.env.sweep}" env_file="${SWEEP_ENV_FILE:-.env.sweep}"
default_tpu_conf="tpu_orchestration/configs/v4_spot_us.conf"
load_sweep_env() { load_sweep_env() {
set -a set -a
@@ -20,6 +21,21 @@ require_var() {
fi fi
} }
run_tpu_ray_bootstrap() {
local mode_flag="${1:-}"
load_sweep_env
local conf_path="${TPU_CONF:-$default_tpu_conf}"
[ -f "$conf_path" ] || {
printf '%s\n' "TPU config not found: $conf_path" >&2
exit 1
}
if [ -n "$mode_flag" ]; then
bash tpu_orchestration/bootstrap_ray.sh --conf "$conf_path" "$mode_flag"
else
bash tpu_orchestration/bootstrap_ray.sh --conf "$conf_path"
fi
}
case "$cmd" in case "$cmd" in
install) install)
[ -x .venv/bin/python ] || python3 -m venv .venv [ -x .venv/bin/python ] || python3 -m venv .venv
@@ -120,6 +136,32 @@ PY
docker build -f docker/Trainer.dockerfile --target gpu -t "$image_ref:gpu-latest" . docker build -f docker/Trainer.dockerfile --target gpu -t "$image_ref:gpu-latest" .
docker push "$image_ref:gpu-latest" docker push "$image_ref:gpu-latest"
;; ;;
whoclicked-publish)
require_var HF_TOKEN "HF_TOKEN required - export HF_TOKEN=<token>"
.venv/bin/python scripts/whoclicked_etl.py build-upload \
--output "${WHOCLICKED_CSV:-experiments/exports/whoclicked.csv}" \
--repo "${WHOCLICKED_REPO:-velocitatem/whoclickedit}" \
--path-in-repo "${WHOCLICKED_CSV_PATH_IN_REPO:-whoclicked.csv}" \
--message "${WHOCLICKED_DATASET_MESSAGE:-Update flattened whoclicked dataset}"
.venv/bin/python scripts/whoclicked_card.py build-upload \
--csv "${WHOCLICKED_CSV:-experiments/exports/whoclicked.csv}" \
--card "${WHOCLICKED_CARD:-experiments/exports/whoclicked_dataset_card.md}" \
--repo "${WHOCLICKED_REPO:-velocitatem/whoclickedit}" \
--path-in-repo "${WHOCLICKED_CARD_PATH_IN_REPO:-README.md}" \
--message "${WHOCLICKED_CARD_MESSAGE:-Update dataset card for WhoClicked}"
;;
tpu-ray-bootstrap)
run_tpu_ray_bootstrap
;;
tpu-ray-deps)
run_tpu_ray_bootstrap --deps-only
;;
tpu-ray-verify)
run_tpu_ray_bootstrap --verify-only
;;
tpu-ray-teardown)
run_tpu_ray_bootstrap --teardown
;;
*) *)
printf '%s\n' "Unknown research command: $cmd" >&2 printf '%s\n' "Unknown research command: $cmd" >&2
exit 1 exit 1