From f5621f120f9d45eb910b99e88ee6df739b290bc1 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 9 Apr 2026 09:27:26 +0000 Subject: [PATCH] Add paperless-ngx integration for document storage and share links - dlib/integrations/paperless.py: sync HTTP client wrapping the paperless-ngx REST API (upload doc, poll task, create/delete share links, delete document) - config: PAPERLESS_ENABLED, PAPERLESS_BASE_URL, PAPERLESS_TOKEN, PAPERLESS_TAG_IDS - PublicAsset model: paperless_document_id + paperless_share_slug columns - publication service: after creating the asset, if paperless is enabled upload the patched PDF and create a share link; stores doc id + share slug on the asset - public routes: pass expires_at through to publish_version; new POST /{slug}/share-links endpoint to (re)create expiring share links on demand - schemas: PublishRequest.expires_at, PublicAssetResponse.paperless_share_url, new ShareLinkRequest model - frontend: paperless_share_url field on PublicAsset type, createShareLink() and expiresAt param on publishVersion() in api.ts - .env.example: documented paperless env vars https://claude.ai/code/session_01YPVs6uBwCvcwVMvrfLBBdu --- .env.example | 11 +++ apps/backend/fastapi/app/api/routes/public.py | 64 +++++++++++--- apps/backend/fastapi/app/core/config.py | 16 +++- apps/backend/fastapi/app/models/cv.py | 4 +- apps/backend/fastapi/app/schemas/__init__.py | 2 + apps/backend/fastapi/app/schemas/cv.py | 6 ++ .../fastapi/app/services/publication.py | 24 ++++- apps/webapp/src/libs/api.ts | 15 +++- apps/webapp/src/types/cv.ts | 2 + dlib/integrations/__init__.py | 0 dlib/integrations/paperless.py | 88 +++++++++++++++++++ 11 files changed, 214 insertions(+), 18 deletions(-) create mode 100644 dlib/integrations/__init__.py create mode 100644 dlib/integrations/paperless.py diff --git a/.env.example b/.env.example index 772b570..12e90d6 100644 --- a/.env.example +++ b/.env.example @@ -57,6 +57,17 @@ AUTHENTIK_CLIENT_SECRET= # Leave blank to use the built-in rule-based tailoring instead of Claude. ANTHROPIC_API_KEY= +# ── Paperless-ngx integration (optional) ───────────────────────────────────── +# When enabled, published CVs are uploaded to your paperless-ngx instance and +# shared via paperless share links (with optional expiry). MinIO is still used +# for DOCX artifact storage; paperless handles the published PDF + sharing. +PAPERLESS_ENABLED=false +PAPERLESS_BASE_URL=http://localhost:8000 +# API token — obtain via POST /api/token/ with your paperless credentials. +PAPERLESS_TOKEN= +# Comma-separated tag IDs to apply to uploaded CV documents (optional). +PAPERLESS_TAG_IDS= + # ── Demo mode ───────────────────────────────────────────────────────────────── # Set to true to enable standalone demo mode in the webapp. # Demo mode uses hardcoded dummy data — no backend or DB required. diff --git a/apps/backend/fastapi/app/api/routes/public.py b/apps/backend/fastapi/app/api/routes/public.py index 61876af..3f7db16 100644 --- a/apps/backend/fastapi/app/api/routes/public.py +++ b/apps/backend/fastapi/app/api/routes/public.py @@ -1,5 +1,6 @@ from __future__ import annotations +import asyncio import hashlib from datetime import datetime, timezone @@ -16,11 +17,13 @@ from app.schemas import ( PublicAssetLookupResponse, PublicAssetResponse, PublishRequest, + ShareLinkRequest, ) from app.services.publication import publish_version from app.services.storage import storage_client from dlib.auth import AuthenticatedUser from dlib.cv import docx_bytes_to_pdf, generate_patched_docx +from dlib.integrations.paperless import get_paperless_client router = APIRouter(prefix="/public", tags=["public"]) @@ -48,6 +51,18 @@ async def _get_public_asset(session: AsyncSession, slug: str) -> PublicAsset: return asset +async def _assert_owner(session: AsyncSession, asset: PublicAsset, owner_id: str) -> None: + if not asset.version_id: + raise HTTPException(status_code=403, detail="Not authorized") + stmt = ( + select(CvVersion) + .join(CvVersion.document) + .where(CvVersion.id == asset.version_id, CvDocument.owner_id == owner_id) + ) + if not (await session.execute(stmt)).scalars().one_or_none(): + raise HTTPException(status_code=403, detail="Not authorized") + + @router.post("/publish", response_model=PublicAssetResponse) async def publish( payload: PublishRequest, @@ -60,12 +75,40 @@ async def publish( version_id=payload.version_id, submission_id=payload.submission_id, slug=payload.slug, + expires_at=payload.expires_at, ) if not asset: raise HTTPException(status_code=404, detail="Version or submission not found") return _response_from_asset(asset) +@router.post("/{slug}/share-links", response_model=PublicAssetResponse) +async def create_share_link( + slug: str, + payload: ShareLinkRequest, + session: AsyncSession = Depends(get_db), + user: AuthenticatedUser = Depends(get_current_user), +): + asset = await _get_public_asset(session, slug) + await _assert_owner(session, asset, user.sub) + + if not asset.paperless_document_id: + raise HTTPException(status_code=409, detail="Asset not synced to paperless") + + settings = get_settings() + client = get_paperless_client(settings) + if not client: + raise HTTPException(status_code=503, detail="Paperless integration not enabled") + + _, share_url = await asyncio.to_thread( + client.create_share_link, asset.paperless_document_id, payload.expiration_date + ) + asset.paperless_share_slug = share_url.split("/share/")[-1] + await session.commit() + await session.refresh(asset) + return _response_from_asset(asset) + + @router.get("/{slug}/analytics", response_model=PublicAssetAnalyticsResponse) async def get_analytics( slug: str, @@ -73,17 +116,7 @@ async def get_analytics( user: AuthenticatedUser = Depends(get_current_user), ): asset = await _get_public_asset(session, slug) - - if asset.version_id: - stmt = ( - select(CvVersion) - .join(CvVersion.document) - .where(CvVersion.id == asset.version_id, CvDocument.owner_id == user.sub) - ) - if not (await session.execute(stmt)).scalars().one_or_none(): - raise HTTPException(status_code=403, detail="Not authorized") - else: - raise HTTPException(status_code=403, detail="Not authorized") + await _assert_owner(session, asset, user.sub) view_count = ( await session.execute( @@ -137,7 +170,11 @@ async def get_public_asset(slug: str, request: Request, session: AsyncSession = def _response_from_asset(asset: PublicAsset) -> PublicAssetResponse: settings = get_settings() base = settings.public_base_url.rstrip("/") - url = f"{base}/cv/{asset.slug}" + paperless_share_url = ( + f"{settings.paperless_base_url}/share/{asset.paperless_share_slug}" + if settings.paperless_base_url and asset.paperless_share_slug + else None + ) return PublicAssetResponse( id=asset.id, slug=asset.slug, @@ -146,5 +183,6 @@ def _response_from_asset(asset: PublicAsset) -> PublicAssetResponse: created_at=asset.created_at, version_id=asset.version_id, submission_id=asset.submission_id, - url=url, + url=f"{base}/cv/{asset.slug}", + paperless_share_url=paperless_share_url, ) diff --git a/apps/backend/fastapi/app/core/config.py b/apps/backend/fastapi/app/core/config.py index 41777ce..682a8e3 100644 --- a/apps/backend/fastapi/app/core/config.py +++ b/apps/backend/fastapi/app/core/config.py @@ -47,6 +47,11 @@ class Settings(BaseSettings): ) publish_domain: str = Field(default="cv.alves.world", alias="CV_PUBLIC_DOMAIN") + paperless_enabled: bool = Field(default=False, alias="PAPERLESS_ENABLED") + paperless_base_url: str | None = Field(default=None, alias="PAPERLESS_BASE_URL") + paperless_token: str | None = Field(default=None, alias="PAPERLESS_TOKEN") + paperless_tag_ids: list[int] = Field(default_factory=list, alias="PAPERLESS_TAG_IDS") + class Config: env_file = ".env" extra = "ignore" @@ -67,13 +72,20 @@ class Settings(BaseSettings): return [origin.strip() for origin in value.split(",") if origin.strip()] return value - @field_validator("storage_endpoint_url", mode="before") + @field_validator("storage_endpoint_url", "paperless_base_url", "paperless_token", mode="before") @classmethod - def _empty_endpoint_to_none(cls, value): + def _empty_str_to_none(cls, value): if isinstance(value, str) and not value.strip(): return None return value + @field_validator("paperless_tag_ids", mode="before") + @classmethod + def _parse_tag_ids(cls, value): + if isinstance(value, str): + return [int(v.strip()) for v in value.split(",") if v.strip()] + return value + @lru_cache(maxsize=1) def get_settings() -> Settings: diff --git a/apps/backend/fastapi/app/models/cv.py b/apps/backend/fastapi/app/models/cv.py index 5475c25..a5d9a11 100644 --- a/apps/backend/fastapi/app/models/cv.py +++ b/apps/backend/fastapi/app/models/cv.py @@ -3,7 +3,7 @@ from __future__ import annotations import enum from datetime import datetime, timezone -from sqlalchemy import Boolean, DateTime, Enum, ForeignKey, String, Text +from sqlalchemy import Boolean, DateTime, Enum, ForeignKey, Integer, String, Text from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.orm import Mapped, mapped_column, relationship @@ -148,6 +148,8 @@ class PublicAsset(Base, IdentifierMixin, TimestampMixin): expires_at: Mapped[str | None] = mapped_column( DateTime(timezone=True), nullable=True ) + paperless_document_id: Mapped[int | None] = mapped_column(Integer, nullable=True) + paperless_share_slug: Mapped[str | None] = mapped_column(String(160), nullable=True) submission: Mapped[Submission | None] = relationship( "Submission", back_populates="public_asset" diff --git a/apps/backend/fastapi/app/schemas/__init__.py b/apps/backend/fastapi/app/schemas/__init__.py index b0a6f39..59a78f5 100644 --- a/apps/backend/fastapi/app/schemas/__init__.py +++ b/apps/backend/fastapi/app/schemas/__init__.py @@ -9,6 +9,7 @@ from .cv import ( PublicAssetLookupResponse, PublicAssetResponse, PublishRequest, + ShareLinkRequest, SubmissionCreateRequest, SubmissionResponse, SubmissionStatusUpdateRequest, @@ -31,6 +32,7 @@ __all__ = [ "SuggestionResponse", "SuggestionUpdateRequest", "PublishRequest", + "ShareLinkRequest", "PublicAssetResponse", "PublicAssetLookupResponse", "PublicAssetAnalyticsResponse", diff --git a/apps/backend/fastapi/app/schemas/cv.py b/apps/backend/fastapi/app/schemas/cv.py index 1dc2abe..cd648d8 100644 --- a/apps/backend/fastapi/app/schemas/cv.py +++ b/apps/backend/fastapi/app/schemas/cv.py @@ -121,6 +121,11 @@ class PublishRequest(BaseModel): version_id: str | None = None submission_id: str | None = None slug: str | None = None + expires_at: datetime | None = None + + +class ShareLinkRequest(BaseModel): + expiration_date: datetime | None = None class PublicAssetResponse(BaseModel): @@ -134,6 +139,7 @@ class PublicAssetResponse(BaseModel): version_id: str | None = None submission_id: str | None = None url: str | None = None + paperless_share_url: str | None = None class PublicAssetLookupResponse(BaseModel): diff --git a/apps/backend/fastapi/app/services/publication.py b/apps/backend/fastapi/app/services/publication.py index c90e2e9..75645f1 100644 --- a/apps/backend/fastapi/app/services/publication.py +++ b/apps/backend/fastapi/app/services/publication.py @@ -1,5 +1,6 @@ from __future__ import annotations +import asyncio import re from datetime import datetime from uuid import uuid4 @@ -7,7 +8,11 @@ from uuid import uuid4 from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession +from app.core.config import get_settings from app.models import CvDocument, CvVersion, PublicAsset, Submission +from app.services.storage import storage_client +from dlib.cv import docx_bytes_to_pdf, generate_patched_docx +from dlib.integrations.paperless import get_paperless_client async def publish_version( @@ -17,6 +22,7 @@ async def publish_version( version_id: str | None, submission_id: str | None, slug: str | None, + expires_at: datetime | None = None, ) -> PublicAsset | None: target_version: CvVersion | None = None target_submission: Submission | None = None @@ -55,11 +61,27 @@ async def publish_version( slug=resolved_slug, artifact_key=target_version.artifact_docx_key, is_public=True, - expires_at=None, + expires_at=expires_at, ) session.add(asset) await session.commit() await session.refresh(asset) + + settings = get_settings() + client = get_paperless_client(settings) + if client: + docx = storage_client.download_bytes(target_version.artifact_docx_key) + blocks = target_version.structured_blocks or [] + pdf = docx_bytes_to_pdf(generate_patched_docx(docx, blocks)) + doc_id = await asyncio.to_thread( + client.upload_document, pdf, resolved_slug, settings.paperless_tag_ids or [] + ) + _, share_url = await asyncio.to_thread(client.create_share_link, doc_id, expires_at) + asset.paperless_document_id = doc_id + asset.paperless_share_slug = share_url.split("/share/")[-1] + await session.commit() + await session.refresh(asset) + return asset diff --git a/apps/webapp/src/libs/api.ts b/apps/webapp/src/libs/api.ts index 6260fe8..0dea9a4 100644 --- a/apps/webapp/src/libs/api.ts +++ b/apps/webapp/src/libs/api.ts @@ -80,6 +80,7 @@ export type PublicAsset = { version_id?: string | null; submission_id?: string | null; created_at: string; + paperless_share_url?: string | null; }; export type PublicAssetAnalytics = { @@ -229,11 +230,23 @@ export async function publishVersion( versionId?: string | null, submissionId?: string | null, slug?: string | null, + expiresAt?: string | null, ): Promise { return req('/api/v1/public/publish', { method: 'POST', headers: { 'content-type': 'application/json' }, - body: JSON.stringify({ version_id: versionId ?? null, submission_id: submissionId ?? null, slug: slug ?? null }), + body: JSON.stringify({ version_id: versionId ?? null, submission_id: submissionId ?? null, slug: slug ?? null, expires_at: expiresAt ?? null }), + }); +} + +export async function createShareLink( + slug: string, + expirationDate?: string | null, +): Promise { + return req(`/api/v1/public/${encodeURIComponent(slug)}/share-links`, { + method: 'POST', + headers: { 'content-type': 'application/json' }, + body: JSON.stringify({ expiration_date: expirationDate ?? null }), }); } diff --git a/apps/webapp/src/types/cv.ts b/apps/webapp/src/types/cv.ts index 0a9282e..7be0d53 100644 --- a/apps/webapp/src/types/cv.ts +++ b/apps/webapp/src/types/cv.ts @@ -64,6 +64,8 @@ export interface PublicAsset { isPublic: boolean; expiresAt?: string; viewCount: number; + url?: string | null; + paperlessShareUrl?: string | null; } export interface AISuggestion { diff --git a/dlib/integrations/__init__.py b/dlib/integrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dlib/integrations/paperless.py b/dlib/integrations/paperless.py new file mode 100644 index 0000000..8e03834 --- /dev/null +++ b/dlib/integrations/paperless.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +import time +from datetime import datetime +from typing import TYPE_CHECKING + +import httpx + +if TYPE_CHECKING: + from app.core.config import Settings + + +class PaperlessClient: + def __init__(self, base_url: str, token: str) -> None: + self._base = base_url.rstrip("/") + self._headers = {"Authorization": f"Token {token}"} + + def _get(self, path: str, **params) -> dict: + r = httpx.get(f"{self._base}{path}", headers=self._headers, params=params, timeout=30) + r.raise_for_status() + return r.json() + + def _post(self, path: str, **kwargs) -> dict: + r = httpx.post(f"{self._base}{path}", headers=self._headers, timeout=30, **kwargs) + r.raise_for_status() + return r.json() + + def _delete(self, path: str) -> None: + r = httpx.delete(f"{self._base}{path}", headers=self._headers, timeout=30) + r.raise_for_status() + + def upload_document(self, pdf_bytes: bytes, title: str, tags: list[int] | None = None) -> int: + """Upload PDF to paperless and return the created document_id (polls until task completes).""" + files = {"document": (f"{title}.pdf", pdf_bytes, "application/pdf")} + data: dict = {"title": title} + if tags: + data["tags"] = tags + resp = self._post("/api/documents/post_document/", files=files, data=data) + task_id = resp if isinstance(resp, str) else resp.get("task_id", resp) + return self._poll_task(str(task_id)) + + def _poll_task(self, task_id: str, max_wait: int = 60) -> int: + delay = 2 + elapsed = 0 + while elapsed < max_wait: + time.sleep(delay) + elapsed += delay + result = self._get("/api/tasks/", task_id=task_id) + tasks = result if isinstance(result, list) else result.get("results", []) + if not tasks: + delay = min(delay * 2, 10) + continue + task = tasks[0] + if task.get("status") == "SUCCESS": + return int(task["related_document"]) + if task.get("status") in ("FAILURE", "REVOKED"): + raise RuntimeError(f"Paperless task {task_id} failed: {task.get('result')}") + delay = min(delay * 2, 10) + raise TimeoutError(f"Paperless task {task_id} did not complete within {max_wait}s") + + def create_share_link( + self, document_id: int, expiration: datetime | None = None + ) -> tuple[int, str]: + """Create a share link for document_id. Returns (share_link_id, full share URL).""" + payload: dict = {"document": document_id} + if expiration: + payload["expiration_date"] = expiration.isoformat() + resp = self._post("/api/share_links/", json=payload) + slug = resp["slug"] + link_id = int(resp["id"]) + return link_id, f"{self._base}/share/{slug}" + + def get_share_links(self, document_id: int) -> list[dict]: + return self._get(f"/api/documents/{document_id}/share_links/").get("results", []) + + def delete_share_link(self, share_link_id: int) -> None: + self._delete(f"/api/share_links/{share_link_id}/") + + def delete_document(self, document_id: int) -> None: + self._delete(f"/api/documents/{document_id}/") + + +def get_paperless_client(settings: "Settings") -> PaperlessClient | None: + if not settings.paperless_enabled: + return None + if not settings.paperless_base_url or not settings.paperless_token: + return None + return PaperlessClient(settings.paperless_base_url, settings.paperless_token)