diff --git a/.gitignore b/.gitignore index 7644627..11ff6b1 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ .env.* !.env.*.example **/.venv +**/.venv-ray # python build/cache artifacts **/__pycache__ diff --git a/Makefile b/Makefile index fb347d2..d471d69 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,7 @@ PYTEST := $(VENV)/bin/pytest NX := npx nx SWEEP_ENV_FILE ?= .env.sweep +TPU_CONF ?= tpu_orchestration/configs/v4_spot_us.conf WANDB_ENTITY ?= WANDB_PROJECT ?= capstone @@ -21,6 +22,14 @@ SIMPLE_BENCHMARK_ARGS ?= --tiers qtable,ppo,dqn,a2c --alpha-values 0.0,0.15,0.3, BENCHMARK_AGENT_ARGS ?= AGENT_COUNT ?= 0 +WHOCLICKED_REPO ?= velocitatem/whoclickedit +WHOCLICKED_CSV ?= experiments/exports/whoclicked.csv +WHOCLICKED_CARD ?= experiments/exports/whoclicked_dataset_card.md +WHOCLICKED_CSV_PATH_IN_REPO ?= whoclicked.csv +WHOCLICKED_CARD_PATH_IN_REPO ?= README.md +WHOCLICKED_DATASET_MESSAGE ?= Update flattened whoclicked dataset +WHOCLICKED_CARD_MESSAGE ?= Update dataset card for WhoClicked + REPO_URL ?= BRANCH ?= main WORKDIR ?= $(HOME)/PHANTOM-agent @@ -37,7 +46,8 @@ SWEEP_ENV_LOAD = set -a; [ -f "$(SWEEP_ENV_FILE)" ] && . "$(SWEEP_ENV_FILE)" || help: @echo "pdf.build pdf.watch pdf.clean pdf.genpop pdf.genpop.watch pdf.arxiv | test.backend test.e2e test.all | web.dev | install | train | benchmark | benchmark.simple | benchmark.agent | train.agent | train.bootstrap | stats.lines | manim.render manim.render.all" @echo "backend.server backend.provider backend.worker | platform.up platform.down platform.logs | docker.train.publish" - @echo "data.pull data.push | study.margin-erosion study.margin-erosion.quick study.margin-erosion.plot" + @echo "data.pull data.push data.whoclicked.publish | study.margin-erosion study.margin-erosion.quick study.margin-erosion.plot" + @echo "tpu.ray.bootstrap tpu.ray.deps tpu.ray.verify tpu.ray.teardown" @echo "" @echo "Build general public version:" @echo " make pdf.genpop" @@ -57,6 +67,12 @@ help: @echo "Bootstrap private repo worker from anywhere:" @echo " make train.bootstrap REPO_URL=https://github.com/org/repo.git BRANCH=main SWEEP_ID=entity/project/id" @echo "" + @echo "Bootstrap Ray on TPU slice from config:" + @echo " make tpu.ray.bootstrap TPU_CONF=tpu_orchestration/configs/v4_spot_us.conf" + @echo "" + @echo "Publish WhoClicked dataset + card:" + @echo " make data.whoclicked.publish HF_TOKEN=... WHOCLICKED_REPO=velocitatem/whoclickedit" + @echo "" @echo "Config source: $(SWEEP_ENV_FILE) (auto-loaded)" $(BUILDDIR): @@ -134,6 +150,19 @@ train.agent: train.bootstrap: @WANDB_ENTITY="$(WANDB_ENTITY)" WANDB_PROJECT="$(WANDB_PROJECT)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" REPO_URL="$(REPO_URL)" BRANCH="$(BRANCH)" WORKDIR="$(WORKDIR)" SWEEP_ID="$(SWEEP_ID)" AGENT_COUNT="$(AGENT_COUNT)" AGENT_LOOP="$(AGENT_LOOP)" RETRY_SECONDS="$(RETRY_SECONDS)" $(NX) run research:train-bootstrap +.PHONY: tpu.ray.bootstrap tpu.ray.deps tpu.ray.verify tpu.ray.teardown +tpu.ray.bootstrap: + @TPU_CONF="$(TPU_CONF)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" $(NX) run research:tpu-ray-bootstrap + +tpu.ray.deps: + @TPU_CONF="$(TPU_CONF)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" $(NX) run research:tpu-ray-deps + +tpu.ray.verify: + @TPU_CONF="$(TPU_CONF)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" $(NX) run research:tpu-ray-verify + +tpu.ray.teardown: + @TPU_CONF="$(TPU_CONF)" SWEEP_ENV_FILE="$(SWEEP_ENV_FILE)" $(NX) run research:tpu-ray-teardown + .PHONY: data.pull data.push data.pull: python scripts/hf_data.py pull @@ -141,6 +170,10 @@ data.pull: data.push: python scripts/hf_data.py push +.PHONY: data.whoclicked.publish +data.whoclicked.publish: + @HF_TOKEN="$(HF_TOKEN)" WHOCLICKED_REPO="$(WHOCLICKED_REPO)" WHOCLICKED_CSV="$(WHOCLICKED_CSV)" WHOCLICKED_CARD="$(WHOCLICKED_CARD)" WHOCLICKED_CSV_PATH_IN_REPO="$(WHOCLICKED_CSV_PATH_IN_REPO)" WHOCLICKED_CARD_PATH_IN_REPO="$(WHOCLICKED_CARD_PATH_IN_REPO)" WHOCLICKED_DATASET_MESSAGE="$(WHOCLICKED_DATASET_MESSAGE)" WHOCLICKED_CARD_MESSAGE="$(WHOCLICKED_CARD_MESSAGE)" $(NX) run research:whoclicked-publish + .PHONY: stats.lines stats.lines: @$(NX) run research:stats diff --git a/docker-compose.yml b/docker-compose.yml index 24961c5..acbc37c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,7 +12,7 @@ services: - GITHUB_TOKEN=${GITHUB_TOKEN} - GOOGLE_APPLICATION_CREDENTIALS=/secrets/gcp-sa.json - GCP_ACCOUNT=${GCP_ACCOUNT:-} - - WATCHDOG_CONFIG_PATTERN=${WATCHDOG_CONFIG_PATTERN:-v6e_*.conf} + - WATCHDOG_CONFIG_PATTERN=${WATCHDOG_CONFIG_PATTERN:-v[46]*.conf} - CLOUDSDK_CONFIG=/.config/gcloud volumes: - ~/.config/gcloud:/.config/gcloud:rw diff --git a/nx.json b/nx.json index d286a8f..a87654a 100644 --- a/nx.json +++ b/nx.json @@ -58,6 +58,21 @@ "benchmark": { "cache": false }, + "whoclicked-publish": { + "cache": false + }, + "tpu-ray-bootstrap": { + "cache": false + }, + "tpu-ray-deps": { + "cache": false + }, + "tpu-ray-verify": { + "cache": false + }, + "tpu-ray-teardown": { + "cache": false + }, "up": { "cache": false }, diff --git a/scripts/nx_research.sh b/scripts/nx_research.sh index 434a312..f74b8c2 100644 --- a/scripts/nx_research.sh +++ b/scripts/nx_research.sh @@ -4,6 +4,7 @@ set -euo pipefail cmd="${1:-}" env_file="${SWEEP_ENV_FILE:-.env.sweep}" +default_tpu_conf="tpu_orchestration/configs/v4_spot_us.conf" load_sweep_env() { set -a @@ -20,6 +21,21 @@ require_var() { fi } +run_tpu_ray_bootstrap() { + local mode_flag="${1:-}" + load_sweep_env + local conf_path="${TPU_CONF:-$default_tpu_conf}" + [ -f "$conf_path" ] || { + printf '%s\n' "TPU config not found: $conf_path" >&2 + exit 1 + } + if [ -n "$mode_flag" ]; then + bash tpu_orchestration/bootstrap_ray.sh --conf "$conf_path" "$mode_flag" + else + bash tpu_orchestration/bootstrap_ray.sh --conf "$conf_path" + fi +} + case "$cmd" in install) [ -x .venv/bin/python ] || python3 -m venv .venv @@ -120,6 +136,32 @@ PY docker build -f docker/Trainer.dockerfile --target gpu -t "$image_ref:gpu-latest" . docker push "$image_ref:gpu-latest" ;; + whoclicked-publish) + require_var HF_TOKEN "HF_TOKEN required - export HF_TOKEN=" + .venv/bin/python scripts/whoclicked_etl.py build-upload \ + --output "${WHOCLICKED_CSV:-experiments/exports/whoclicked.csv}" \ + --repo "${WHOCLICKED_REPO:-velocitatem/whoclickedit}" \ + --path-in-repo "${WHOCLICKED_CSV_PATH_IN_REPO:-whoclicked.csv}" \ + --message "${WHOCLICKED_DATASET_MESSAGE:-Update flattened whoclicked dataset}" + .venv/bin/python scripts/whoclicked_card.py build-upload \ + --csv "${WHOCLICKED_CSV:-experiments/exports/whoclicked.csv}" \ + --card "${WHOCLICKED_CARD:-experiments/exports/whoclicked_dataset_card.md}" \ + --repo "${WHOCLICKED_REPO:-velocitatem/whoclickedit}" \ + --path-in-repo "${WHOCLICKED_CARD_PATH_IN_REPO:-README.md}" \ + --message "${WHOCLICKED_CARD_MESSAGE:-Update dataset card for WhoClicked}" + ;; + tpu-ray-bootstrap) + run_tpu_ray_bootstrap + ;; + tpu-ray-deps) + run_tpu_ray_bootstrap --deps-only + ;; + tpu-ray-verify) + run_tpu_ray_bootstrap --verify-only + ;; + tpu-ray-teardown) + run_tpu_ray_bootstrap --teardown + ;; *) printf '%s\n' "Unknown research command: $cmd" >&2 exit 1