mirror of
https://github.com/velocitatem/cvfs.git
synced 2026-05-31 16:53:38 +00:00
Initial commit
This commit is contained in:
0
ml/__init__.py
Normal file
0
ml/__init__.py
Normal file
6
ml/configs/data/default.yaml
Normal file
6
ml/configs/data/default.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
dataset_name: synthetic_classification
|
||||
output_dir: ml/data/processed
|
||||
train_samples: 2048
|
||||
input_dim: 16
|
||||
num_classes: 3
|
||||
seed: 42
|
||||
10
ml/configs/train/default.yaml
Normal file
10
ml/configs/train/default.yaml
Normal file
@@ -0,0 +1,10 @@
|
||||
input_dim: 16
|
||||
hidden_dim: 64
|
||||
num_classes: 3
|
||||
learning_rate: 0.001
|
||||
batch_size: 64
|
||||
epochs: 5
|
||||
log_every_n_steps: 20
|
||||
seed: 42
|
||||
tensorboard_dir: ml/tensorboard
|
||||
weights_output: ml/models/weights/model.pt
|
||||
5
ml/data/README.md
Normal file
5
ml/data/README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# Data
|
||||
|
||||
Some thoughts on processing data: In a lot of cases you will get data not in an s3 bucket or anything glamarous and for doing anything in terms of modelling you need the data locally but then when you maybe have a 1TB dataset you want 10GB locally and then you upload to a GPU rich server and there you will want all of teh data. How can you managed this data well? What are best practices?
|
||||
|
||||
Huggingface lets you upload up to 300 gigs of data into a dataset.
|
||||
52
ml/data/etl.py
Normal file
52
ml/data/etl.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
import yaml
|
||||
|
||||
|
||||
def build_dataset(
|
||||
train_samples: int, input_dim: int, num_classes: int, seed: int
|
||||
) -> dict[str, torch.Tensor]:
|
||||
generator = torch.Generator().manual_seed(seed)
|
||||
features = torch.randn(train_samples, input_dim, generator=generator)
|
||||
labels = torch.randint(0, num_classes, (train_samples,), generator=generator)
|
||||
return {"features": features, "labels": labels}
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Build a synthetic training dataset")
|
||||
parser.add_argument("--config", default="ml/configs/data/default.yaml")
|
||||
parser.add_argument("--output", default="ml/data/processed")
|
||||
args = parser.parse_args()
|
||||
|
||||
with open(args.config, "r", encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f)
|
||||
|
||||
output_dir = Path(args.output)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
dataset = build_dataset(
|
||||
train_samples=int(cfg["train_samples"]),
|
||||
input_dim=int(cfg["input_dim"]),
|
||||
num_classes=int(cfg["num_classes"]),
|
||||
seed=int(cfg["seed"]),
|
||||
)
|
||||
dataset_path = output_dir / "dataset.pt"
|
||||
torch.save(dataset, dataset_path)
|
||||
|
||||
metadata = {
|
||||
"dataset_name": cfg["dataset_name"],
|
||||
"train_samples": int(cfg["train_samples"]),
|
||||
"input_dim": int(cfg["input_dim"]),
|
||||
"num_classes": int(cfg["num_classes"]),
|
||||
"seed": int(cfg["seed"]),
|
||||
"dataset_path": str(dataset_path),
|
||||
}
|
||||
with open(output_dir / "metadata.json", "w", encoding="utf-8") as f:
|
||||
json.dump(metadata, f, indent=2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
BIN
ml/data/processed/dataset.pt
Normal file
BIN
ml/data/processed/dataset.pt
Normal file
Binary file not shown.
8
ml/data/processed/metadata.json
Normal file
8
ml/data/processed/metadata.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"dataset_name": "synthetic_classification",
|
||||
"train_samples": 2048,
|
||||
"input_dim": 16,
|
||||
"num_classes": 3,
|
||||
"seed": 42,
|
||||
"dataset_path": "ml/data/processed/dataset.pt"
|
||||
}
|
||||
36
ml/inference.py
Normal file
36
ml/inference.py
Normal file
@@ -0,0 +1,36 @@
|
||||
import os
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from fastapi import FastAPI
|
||||
from pydantic import BaseModel
|
||||
|
||||
# TODO: Import model when ready
|
||||
from models import * # TODO: SPECIFY
|
||||
|
||||
class InputData(BaseModel):
|
||||
pass
|
||||
|
||||
|
||||
weights_path = os.getenv("ML_LATEST_WEIGHTS_PATH")
|
||||
if weights_path is None:
|
||||
raise RuntimeError("ML_LATEST_WEIGHTS_PATH not set")
|
||||
|
||||
|
||||
# FastAPI app
|
||||
app = FastAPI(title="ML Inference API", version="1.0.0")
|
||||
|
||||
@app.get("/health")
|
||||
def health_check():
|
||||
return {"status": "healthy", "service": "ml-inference"}
|
||||
|
||||
@app.post("/predict")
|
||||
def predict(data: InputData):
|
||||
|
||||
#TODO: x = torch.tensor([data.features], dtype=torch.float32)
|
||||
|
||||
with torch.no_grad():
|
||||
|
||||
#TODO: y = model(x)
|
||||
|
||||
y=torch.tensor(0)
|
||||
return {"prediction": y.tolist()}
|
||||
15
ml/models/arch.py
Normal file
15
ml/models/arch.py
Normal file
@@ -0,0 +1,15 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
class Model(nn.Module):
|
||||
def __init__(self, input_dim: int, hidden_dim: int, num_classes: int) -> None:
|
||||
super().__init__()
|
||||
self.net = nn.Sequential(
|
||||
nn.Linear(input_dim, hidden_dim),
|
||||
nn.ReLU(),
|
||||
nn.Linear(hidden_dim, num_classes),
|
||||
)
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
return self.net(x)
|
||||
98
ml/models/train.py
Normal file
98
ml/models/train.py
Normal file
@@ -0,0 +1,98 @@
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import yaml
|
||||
from torch.utils.data import DataLoader, TensorDataset
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
from alveslib import get_logger
|
||||
from ml.models.arch import Model
|
||||
|
||||
logger = get_logger("ml-train")
|
||||
|
||||
|
||||
class Trainer:
|
||||
def __init__(
|
||||
self,
|
||||
model: nn.Module,
|
||||
train_loader: DataLoader,
|
||||
learning_rate: float,
|
||||
log_dir: str,
|
||||
log_every_n_steps: int,
|
||||
) -> None:
|
||||
self.model = model
|
||||
self.train_loader = train_loader
|
||||
self.optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
|
||||
self.criterion = nn.CrossEntropyLoss()
|
||||
self.writer = SummaryWriter(log_dir)
|
||||
self.step = 0
|
||||
self.log_every_n_steps = log_every_n_steps
|
||||
|
||||
def train_epoch(self) -> float:
|
||||
self.model.train()
|
||||
total_loss = 0.0
|
||||
for batch_idx, (features, target) in enumerate(self.train_loader):
|
||||
self.optimizer.zero_grad()
|
||||
output = self.model(features)
|
||||
loss = self.criterion(output, target)
|
||||
loss.backward()
|
||||
self.optimizer.step()
|
||||
|
||||
total_loss += loss.item()
|
||||
if batch_idx % self.log_every_n_steps == 0:
|
||||
self.writer.add_scalar("Loss/TrainStep", loss.item(), self.step)
|
||||
self.step += 1
|
||||
|
||||
return total_loss / max(len(self.train_loader), 1)
|
||||
|
||||
def train(self, epochs: int) -> None:
|
||||
for epoch in range(epochs):
|
||||
avg_loss = self.train_epoch()
|
||||
self.writer.add_scalar("Loss/TrainEpoch", avg_loss, epoch)
|
||||
logger.info(f"epoch={epoch + 1}/{epochs} avg_loss={avg_loss:.5f}")
|
||||
self.writer.close()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Train a baseline model")
|
||||
parser.add_argument("--config", default="ml/configs/train/default.yaml")
|
||||
parser.add_argument("--dataset", default="ml/data/processed/dataset.pt")
|
||||
parser.add_argument("--weights", default=None)
|
||||
args = parser.parse_args()
|
||||
|
||||
with open(args.config, "r", encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f)
|
||||
|
||||
torch.manual_seed(int(cfg["seed"]))
|
||||
|
||||
dataset_blob = torch.load(args.dataset, map_location="cpu")
|
||||
dataset = TensorDataset(dataset_blob["features"], dataset_blob["labels"])
|
||||
train_loader = DataLoader(dataset, batch_size=int(cfg["batch_size"]), shuffle=True)
|
||||
|
||||
model = Model(
|
||||
input_dim=int(cfg["input_dim"]),
|
||||
hidden_dim=int(cfg["hidden_dim"]),
|
||||
num_classes=int(cfg["num_classes"]),
|
||||
)
|
||||
trainer = Trainer(
|
||||
model=model,
|
||||
train_loader=train_loader,
|
||||
learning_rate=float(cfg["learning_rate"]),
|
||||
log_dir=str(cfg["tensorboard_dir"]),
|
||||
log_every_n_steps=int(cfg["log_every_n_steps"]),
|
||||
)
|
||||
trainer.train(epochs=int(cfg["epochs"]))
|
||||
|
||||
weights_target = args.weights or cfg.get(
|
||||
"weights_output", "ml/models/weights/model.pt"
|
||||
)
|
||||
weights_path = Path(weights_target)
|
||||
weights_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
torch.save(model.state_dict(), weights_path)
|
||||
logger.info(f"saved_weights={weights_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
70
ml/project.json
Normal file
70
ml/project.json
Normal file
@@ -0,0 +1,70 @@
|
||||
{
|
||||
"name": "ml",
|
||||
"root": "ml",
|
||||
"sourceRoot": "ml",
|
||||
"projectType": "application",
|
||||
"implicitDependencies": ["alveslib"],
|
||||
"targets": {
|
||||
"dev": {
|
||||
"executor": "nx:run-commands",
|
||||
"options": {
|
||||
"cwd": "ml",
|
||||
"command": "uvicorn inference:app --host 0.0.0.0 --port 8000 --reload"
|
||||
}
|
||||
},
|
||||
"build": {
|
||||
"executor": "nx:run-commands",
|
||||
"options": {
|
||||
"command": "uv run python -m compileall ml"
|
||||
}
|
||||
},
|
||||
"etl": {
|
||||
"executor": "nx:run-commands",
|
||||
"cache": true,
|
||||
"inputs": [
|
||||
"default",
|
||||
"{workspaceRoot}/ml/configs/data/**/*.yaml"
|
||||
],
|
||||
"outputs": ["{workspaceRoot}/ml/data/processed"],
|
||||
"options": {
|
||||
"command": "uv run python -m ml.data.etl --config ml/configs/data/default.yaml --output ml/data/processed"
|
||||
}
|
||||
},
|
||||
"train": {
|
||||
"executor": "nx:run-commands",
|
||||
"cache": true,
|
||||
"dependsOn": ["etl"],
|
||||
"inputs": [
|
||||
"default",
|
||||
"{workspaceRoot}/ml/configs/train/**/*.yaml",
|
||||
"{workspaceRoot}/ml/configs/data/**/*.yaml",
|
||||
"{workspaceRoot}/ml/data/processed/**"
|
||||
],
|
||||
"outputs": [
|
||||
"{workspaceRoot}/ml/models/weights",
|
||||
"{workspaceRoot}/ml/tensorboard"
|
||||
],
|
||||
"options": {
|
||||
"command": "uv run python -m ml.models.train --config ml/configs/train/default.yaml --dataset ml/data/processed/dataset.pt --weights ml/models/weights/model.pt"
|
||||
}
|
||||
},
|
||||
"lint": {
|
||||
"executor": "nx:run-commands",
|
||||
"options": {
|
||||
"command": "uv run ruff check ml"
|
||||
}
|
||||
},
|
||||
"typecheck": {
|
||||
"executor": "nx:run-commands",
|
||||
"options": {
|
||||
"command": "uv run mypy ml"
|
||||
}
|
||||
},
|
||||
"test": {
|
||||
"executor": "nx:run-commands",
|
||||
"options": {
|
||||
"command": "uv run pytest ml -v"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user