chore: bulk tpu reorchestration

This commit is contained in:
2026-03-15 21:14:41 +01:00
parent 52b4dcdce3
commit a9c091050c
10 changed files with 155 additions and 42 deletions

View File

@@ -3,6 +3,7 @@ QR_NAME="v4-32-us-ondemand"
ACCEL_TYPE="v4-32"
RUNTIME_VERSION="tpu-ubuntu2204-base"
IS_SPOT="false"
INTERNAL_IPS="false"
RUN_ID="phantom_v4_od_1"
HF_REPO="velocitatem/capstone"
TRAIN_CMD="python -m engine.train --sweep-agent --sweep-id lusiana/capstone/oasdorof"
TRAIN_CMD="python -m engine.train --sweep-agent --sweep-id lusiana/capstone/oasdorof"

View File

@@ -3,6 +3,7 @@ QR_NAME="v4-32-us-spot"
ACCEL_TYPE="v4-32"
RUNTIME_VERSION="tpu-ubuntu2204-base"
IS_SPOT="true"
INTERNAL_IPS="false"
RUN_ID="phantom_v4_spot_1"
HF_REPO="velocitatem/capstone"
TRAIN_CMD="python -m engine.train --sweep-agent --sweep-id lusiana/capstone/oasdorof"
TRAIN_CMD="python -m engine.train --sweep-agent --sweep-id lusiana/capstone/oasdorof"

View File

@@ -1,8 +1,8 @@
ZONE="europe-west4-b"
QR_NAME="v5e-64-eu-spot"
ACCEL_TYPE="v5litepod-64"
QR_NAME="v5e-32-eu-spot"
ACCEL_TYPE="v5litepod-32"
RUNTIME_VERSION="tpu-ubuntu2204-base"
IS_SPOT="true"
RUN_ID="phantom_v5e_eu_1"
HF_REPO="velocitatem/capstone"
TRAIN_CMD="python -m engine.train --sweep-agent --sweep-id lusiana/capstone/oasdorof"
TRAIN_CMD="python -m engine.train --sweep-agent --sweep-id lusiana/capstone/oasdorof"

View File

@@ -1,8 +1,8 @@
ZONE="us-central1-a"
QR_NAME="v5e-64-us-spot"
ACCEL_TYPE="v5litepod-64"
QR_NAME="v5e-32-us-spot"
ACCEL_TYPE="v5litepod-32"
RUNTIME_VERSION="tpu-ubuntu2204-base"
IS_SPOT="true"
RUN_ID="phantom_v5e_us_1"
HF_REPO="velocitatem/capstone"
TRAIN_CMD="python -m engine.train --sweep-agent --sweep-id lusiana/capstone/oasdorof"
TRAIN_CMD="python -m engine.train --sweep-agent --sweep-id lusiana/capstone/oasdorof"

View File

@@ -1,8 +1,8 @@
ZONE="europe-west4-a"
QR_NAME="v6e-64-eu-spot"
ACCEL_TYPE="v6e-64"
QR_NAME="v6e-32-eu-spot"
ACCEL_TYPE="v6e-32"
RUNTIME_VERSION="tpu-ubuntu2204-base"
IS_SPOT="true"
RUN_ID="phantom_v6e_eu_1"
HF_REPO="velocitatem/capstone"
TRAIN_CMD="python -m engine.train --sweep-agent --sweep-id lusiana/capstone/oasdorof"
TRAIN_CMD="python -m engine.train --sweep-agent --sweep-id lusiana/capstone/oasdorof"

View File

@@ -1,8 +1,8 @@
ZONE="us-east1-d"
QR_NAME="v6e-64-us-spot"
ACCEL_TYPE="v6e-64"
QR_NAME="v6e-32-us-spot"
ACCEL_TYPE="v6e-32"
RUNTIME_VERSION="tpu-ubuntu2204-base"
IS_SPOT="true"
RUN_ID="phantom_v6e_us_1"
HF_REPO="velocitatem/capstone"
TRAIN_CMD="python -m engine.train --sweep-agent --sweep-id lusiana/capstone/oasdorof"
TRAIN_CMD="python -m engine.train --sweep-agent --sweep-id lusiana/capstone/oasdorof"