diff --git a/tpu_orchestration/configs/test_vm.conf b/tpu_orchestration/configs/test_vm.conf index 6c154ed..12a78df 100644 --- a/tpu_orchestration/configs/test_vm.conf +++ b/tpu_orchestration/configs/test_vm.conf @@ -1,7 +1,7 @@ ZONE="us-central2-b" QR_NAME="v4-test-vm" ACCEL_TYPE="v4-8" -RUNTIME_VERSION="v2-alpha-tpuv4" +RUNTIME_VERSION="tpu-ubuntu2204-base" IS_SPOT="true" RUN_ID="phantom_v4_test_1" HF_REPO="velocitatem/capstone" diff --git a/tpu_orchestration/configs/v4_od_us.conf b/tpu_orchestration/configs/v4_od_us.conf index 8739861..ba75d7f 100644 --- a/tpu_orchestration/configs/v4_od_us.conf +++ b/tpu_orchestration/configs/v4_od_us.conf @@ -1,7 +1,7 @@ ZONE="us-central2-b" QR_NAME="v4-32-us-ondemand" ACCEL_TYPE="v4-32" -RUNTIME_VERSION="v2-alpha-tpuv4" +RUNTIME_VERSION="tpu-ubuntu2204-base" IS_SPOT="false" RUN_ID="phantom_v4_od_1" HF_REPO="velocitatem/capstone" diff --git a/tpu_orchestration/configs/v4_spot_us.conf b/tpu_orchestration/configs/v4_spot_us.conf index d0f9755..2e31a18 100644 --- a/tpu_orchestration/configs/v4_spot_us.conf +++ b/tpu_orchestration/configs/v4_spot_us.conf @@ -1,7 +1,7 @@ ZONE="us-central2-b" QR_NAME="v4-32-us-spot" ACCEL_TYPE="v4-32" -RUNTIME_VERSION="v2-alpha-tpuv4" +RUNTIME_VERSION="tpu-ubuntu2204-base" IS_SPOT="true" RUN_ID="phantom_v4_spot_1" HF_REPO="velocitatem/capstone" diff --git a/tpu_orchestration/configs/v5e_eu.conf b/tpu_orchestration/configs/v5e_eu.conf index c40ee21..89ef604 100644 --- a/tpu_orchestration/configs/v5e_eu.conf +++ b/tpu_orchestration/configs/v5e_eu.conf @@ -1,7 +1,7 @@ ZONE="europe-west4-b" QR_NAME="v5e-64-eu-spot" ACCEL_TYPE="v5litepod-64" -RUNTIME_VERSION="v2-alpha-tpuv5-lite" +RUNTIME_VERSION="tpu-ubuntu2204-base" IS_SPOT="true" RUN_ID="phantom_v5e_eu_1" HF_REPO="velocitatem/capstone" diff --git a/tpu_orchestration/configs/v5e_us.conf b/tpu_orchestration/configs/v5e_us.conf index 0d44cd5..a77c50e 100644 --- a/tpu_orchestration/configs/v5e_us.conf +++ b/tpu_orchestration/configs/v5e_us.conf @@ -1,7 +1,7 @@ ZONE="us-central1-a" QR_NAME="v5e-64-us-spot" ACCEL_TYPE="v5litepod-64" -RUNTIME_VERSION="v2-alpha-tpuv5-lite" +RUNTIME_VERSION="tpu-ubuntu2204-base" IS_SPOT="true" RUN_ID="phantom_v5e_us_1" HF_REPO="velocitatem/capstone" diff --git a/tpu_orchestration/configs/v6e_eu.conf b/tpu_orchestration/configs/v6e_eu.conf index 5d29b8c..ae7bcc3 100644 --- a/tpu_orchestration/configs/v6e_eu.conf +++ b/tpu_orchestration/configs/v6e_eu.conf @@ -1,7 +1,7 @@ ZONE="europe-west4-a" QR_NAME="v6e-64-eu-spot" ACCEL_TYPE="v6e-64" -RUNTIME_VERSION="v2-alpha-tpuv6e" +RUNTIME_VERSION="tpu-ubuntu2204-base" IS_SPOT="true" RUN_ID="phantom_v6e_eu_1" HF_REPO="velocitatem/capstone" diff --git a/tpu_orchestration/configs/v6e_us.conf b/tpu_orchestration/configs/v6e_us.conf index f6cf423..a5fe55d 100644 --- a/tpu_orchestration/configs/v6e_us.conf +++ b/tpu_orchestration/configs/v6e_us.conf @@ -1,7 +1,7 @@ ZONE="us-east1-d" QR_NAME="v6e-64-us-spot" ACCEL_TYPE="v6e-64" -RUNTIME_VERSION="v2-alpha-tpuv6e" +RUNTIME_VERSION="tpu-ubuntu2204-base" IS_SPOT="true" RUN_ID="phantom_v6e_us_1" HF_REPO="velocitatem/capstone" diff --git a/tpu_orchestration/tpu_startup.sh b/tpu_orchestration/tpu_startup.sh index 62b55a9..ae5f556 100644 --- a/tpu_orchestration/tpu_startup.sh +++ b/tpu_orchestration/tpu_startup.sh @@ -62,6 +62,9 @@ cd /app/model if [ -f "requirements.txt" ]; then pip install -r requirements.txt fi +if [ -f "sim/requirements.txt" ]; then + pip install -r sim/requirements.txt +fi # 5. Restore state from Hugging Face Buckets echo "Restoring state from hf://buckets/$HF_REPO..." diff --git a/tpu_orchestration/watchdog.sh b/tpu_orchestration/watchdog.sh index 2b103bc..4c32562 100755 --- a/tpu_orchestration/watchdog.sh +++ b/tpu_orchestration/watchdog.sh @@ -95,7 +95,7 @@ while true; do fi # Determine runtime version - RT_VERSION=${RUNTIME_VERSION:-"v2-alpha-tpuv4"} + RT_VERSION=${RUNTIME_VERSION:-"tpu-ubuntu2204-base"} gcloud compute tpus queued-resources create $QR_NAME \ --project=$PROJECT_ID \