Add paperless-ngx integration for document storage and share links

- dlib/integrations/paperless.py: sync HTTP client wrapping the paperless-ngx
  REST API (upload doc, poll task, create/delete share links, delete document)
- config: PAPERLESS_ENABLED, PAPERLESS_BASE_URL, PAPERLESS_TOKEN, PAPERLESS_TAG_IDS
- PublicAsset model: paperless_document_id + paperless_share_slug columns
- publication service: after creating the asset, if paperless is enabled upload
  the patched PDF and create a share link; stores doc id + share slug on the asset
- public routes: pass expires_at through to publish_version; new
  POST /{slug}/share-links endpoint to (re)create expiring share links on demand
- schemas: PublishRequest.expires_at, PublicAssetResponse.paperless_share_url,
  new ShareLinkRequest model
- frontend: paperless_share_url field on PublicAsset type, createShareLink()
  and expiresAt param on publishVersion() in api.ts
- .env.example: documented paperless env vars

https://claude.ai/code/session_01YPVs6uBwCvcwVMvrfLBBdu
This commit is contained in:
Claude
2026-04-09 09:27:26 +00:00
parent 61430317f4
commit f5621f120f
11 changed files with 214 additions and 18 deletions

View File

@@ -57,6 +57,17 @@ AUTHENTIK_CLIENT_SECRET=
# Leave blank to use the built-in rule-based tailoring instead of Claude. # Leave blank to use the built-in rule-based tailoring instead of Claude.
ANTHROPIC_API_KEY= ANTHROPIC_API_KEY=
# ── Paperless-ngx integration (optional) ─────────────────────────────────────
# When enabled, published CVs are uploaded to your paperless-ngx instance and
# shared via paperless share links (with optional expiry). MinIO is still used
# for DOCX artifact storage; paperless handles the published PDF + sharing.
PAPERLESS_ENABLED=false
PAPERLESS_BASE_URL=http://localhost:8000
# API token — obtain via POST /api/token/ with your paperless credentials.
PAPERLESS_TOKEN=
# Comma-separated tag IDs to apply to uploaded CV documents (optional).
PAPERLESS_TAG_IDS=
# ── Demo mode ───────────────────────────────────────────────────────────────── # ── Demo mode ─────────────────────────────────────────────────────────────────
# Set to true to enable standalone demo mode in the webapp. # Set to true to enable standalone demo mode in the webapp.
# Demo mode uses hardcoded dummy data — no backend or DB required. # Demo mode uses hardcoded dummy data — no backend or DB required.

View File

@@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
import asyncio
import hashlib import hashlib
from datetime import datetime, timezone from datetime import datetime, timezone
@@ -16,11 +17,13 @@ from app.schemas import (
PublicAssetLookupResponse, PublicAssetLookupResponse,
PublicAssetResponse, PublicAssetResponse,
PublishRequest, PublishRequest,
ShareLinkRequest,
) )
from app.services.publication import publish_version from app.services.publication import publish_version
from app.services.storage import storage_client from app.services.storage import storage_client
from dlib.auth import AuthenticatedUser from dlib.auth import AuthenticatedUser
from dlib.cv import docx_bytes_to_pdf, generate_patched_docx from dlib.cv import docx_bytes_to_pdf, generate_patched_docx
from dlib.integrations.paperless import get_paperless_client
router = APIRouter(prefix="/public", tags=["public"]) router = APIRouter(prefix="/public", tags=["public"])
@@ -48,6 +51,18 @@ async def _get_public_asset(session: AsyncSession, slug: str) -> PublicAsset:
return asset return asset
async def _assert_owner(session: AsyncSession, asset: PublicAsset, owner_id: str) -> None:
if not asset.version_id:
raise HTTPException(status_code=403, detail="Not authorized")
stmt = (
select(CvVersion)
.join(CvVersion.document)
.where(CvVersion.id == asset.version_id, CvDocument.owner_id == owner_id)
)
if not (await session.execute(stmt)).scalars().one_or_none():
raise HTTPException(status_code=403, detail="Not authorized")
@router.post("/publish", response_model=PublicAssetResponse) @router.post("/publish", response_model=PublicAssetResponse)
async def publish( async def publish(
payload: PublishRequest, payload: PublishRequest,
@@ -60,12 +75,40 @@ async def publish(
version_id=payload.version_id, version_id=payload.version_id,
submission_id=payload.submission_id, submission_id=payload.submission_id,
slug=payload.slug, slug=payload.slug,
expires_at=payload.expires_at,
) )
if not asset: if not asset:
raise HTTPException(status_code=404, detail="Version or submission not found") raise HTTPException(status_code=404, detail="Version or submission not found")
return _response_from_asset(asset) return _response_from_asset(asset)
@router.post("/{slug}/share-links", response_model=PublicAssetResponse)
async def create_share_link(
slug: str,
payload: ShareLinkRequest,
session: AsyncSession = Depends(get_db),
user: AuthenticatedUser = Depends(get_current_user),
):
asset = await _get_public_asset(session, slug)
await _assert_owner(session, asset, user.sub)
if not asset.paperless_document_id:
raise HTTPException(status_code=409, detail="Asset not synced to paperless")
settings = get_settings()
client = get_paperless_client(settings)
if not client:
raise HTTPException(status_code=503, detail="Paperless integration not enabled")
_, share_url = await asyncio.to_thread(
client.create_share_link, asset.paperless_document_id, payload.expiration_date
)
asset.paperless_share_slug = share_url.split("/share/")[-1]
await session.commit()
await session.refresh(asset)
return _response_from_asset(asset)
@router.get("/{slug}/analytics", response_model=PublicAssetAnalyticsResponse) @router.get("/{slug}/analytics", response_model=PublicAssetAnalyticsResponse)
async def get_analytics( async def get_analytics(
slug: str, slug: str,
@@ -73,17 +116,7 @@ async def get_analytics(
user: AuthenticatedUser = Depends(get_current_user), user: AuthenticatedUser = Depends(get_current_user),
): ):
asset = await _get_public_asset(session, slug) asset = await _get_public_asset(session, slug)
await _assert_owner(session, asset, user.sub)
if asset.version_id:
stmt = (
select(CvVersion)
.join(CvVersion.document)
.where(CvVersion.id == asset.version_id, CvDocument.owner_id == user.sub)
)
if not (await session.execute(stmt)).scalars().one_or_none():
raise HTTPException(status_code=403, detail="Not authorized")
else:
raise HTTPException(status_code=403, detail="Not authorized")
view_count = ( view_count = (
await session.execute( await session.execute(
@@ -137,7 +170,11 @@ async def get_public_asset(slug: str, request: Request, session: AsyncSession =
def _response_from_asset(asset: PublicAsset) -> PublicAssetResponse: def _response_from_asset(asset: PublicAsset) -> PublicAssetResponse:
settings = get_settings() settings = get_settings()
base = settings.public_base_url.rstrip("/") base = settings.public_base_url.rstrip("/")
url = f"{base}/cv/{asset.slug}" paperless_share_url = (
f"{settings.paperless_base_url}/share/{asset.paperless_share_slug}"
if settings.paperless_base_url and asset.paperless_share_slug
else None
)
return PublicAssetResponse( return PublicAssetResponse(
id=asset.id, id=asset.id,
slug=asset.slug, slug=asset.slug,
@@ -146,5 +183,6 @@ def _response_from_asset(asset: PublicAsset) -> PublicAssetResponse:
created_at=asset.created_at, created_at=asset.created_at,
version_id=asset.version_id, version_id=asset.version_id,
submission_id=asset.submission_id, submission_id=asset.submission_id,
url=url, url=f"{base}/cv/{asset.slug}",
paperless_share_url=paperless_share_url,
) )

View File

@@ -47,6 +47,11 @@ class Settings(BaseSettings):
) )
publish_domain: str = Field(default="cv.alves.world", alias="CV_PUBLIC_DOMAIN") publish_domain: str = Field(default="cv.alves.world", alias="CV_PUBLIC_DOMAIN")
paperless_enabled: bool = Field(default=False, alias="PAPERLESS_ENABLED")
paperless_base_url: str | None = Field(default=None, alias="PAPERLESS_BASE_URL")
paperless_token: str | None = Field(default=None, alias="PAPERLESS_TOKEN")
paperless_tag_ids: list[int] = Field(default_factory=list, alias="PAPERLESS_TAG_IDS")
class Config: class Config:
env_file = ".env" env_file = ".env"
extra = "ignore" extra = "ignore"
@@ -67,13 +72,20 @@ class Settings(BaseSettings):
return [origin.strip() for origin in value.split(",") if origin.strip()] return [origin.strip() for origin in value.split(",") if origin.strip()]
return value return value
@field_validator("storage_endpoint_url", mode="before") @field_validator("storage_endpoint_url", "paperless_base_url", "paperless_token", mode="before")
@classmethod @classmethod
def _empty_endpoint_to_none(cls, value): def _empty_str_to_none(cls, value):
if isinstance(value, str) and not value.strip(): if isinstance(value, str) and not value.strip():
return None return None
return value return value
@field_validator("paperless_tag_ids", mode="before")
@classmethod
def _parse_tag_ids(cls, value):
if isinstance(value, str):
return [int(v.strip()) for v in value.split(",") if v.strip()]
return value
@lru_cache(maxsize=1) @lru_cache(maxsize=1)
def get_settings() -> Settings: def get_settings() -> Settings:

View File

@@ -3,7 +3,7 @@ from __future__ import annotations
import enum import enum
from datetime import datetime, timezone from datetime import datetime, timezone
from sqlalchemy import Boolean, DateTime, Enum, ForeignKey, String, Text from sqlalchemy import Boolean, DateTime, Enum, ForeignKey, Integer, String, Text
from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped, mapped_column, relationship from sqlalchemy.orm import Mapped, mapped_column, relationship
@@ -148,6 +148,8 @@ class PublicAsset(Base, IdentifierMixin, TimestampMixin):
expires_at: Mapped[str | None] = mapped_column( expires_at: Mapped[str | None] = mapped_column(
DateTime(timezone=True), nullable=True DateTime(timezone=True), nullable=True
) )
paperless_document_id: Mapped[int | None] = mapped_column(Integer, nullable=True)
paperless_share_slug: Mapped[str | None] = mapped_column(String(160), nullable=True)
submission: Mapped[Submission | None] = relationship( submission: Mapped[Submission | None] = relationship(
"Submission", back_populates="public_asset" "Submission", back_populates="public_asset"

View File

@@ -9,6 +9,7 @@ from .cv import (
PublicAssetLookupResponse, PublicAssetLookupResponse,
PublicAssetResponse, PublicAssetResponse,
PublishRequest, PublishRequest,
ShareLinkRequest,
SubmissionCreateRequest, SubmissionCreateRequest,
SubmissionResponse, SubmissionResponse,
SubmissionStatusUpdateRequest, SubmissionStatusUpdateRequest,
@@ -31,6 +32,7 @@ __all__ = [
"SuggestionResponse", "SuggestionResponse",
"SuggestionUpdateRequest", "SuggestionUpdateRequest",
"PublishRequest", "PublishRequest",
"ShareLinkRequest",
"PublicAssetResponse", "PublicAssetResponse",
"PublicAssetLookupResponse", "PublicAssetLookupResponse",
"PublicAssetAnalyticsResponse", "PublicAssetAnalyticsResponse",

View File

@@ -121,6 +121,11 @@ class PublishRequest(BaseModel):
version_id: str | None = None version_id: str | None = None
submission_id: str | None = None submission_id: str | None = None
slug: str | None = None slug: str | None = None
expires_at: datetime | None = None
class ShareLinkRequest(BaseModel):
expiration_date: datetime | None = None
class PublicAssetResponse(BaseModel): class PublicAssetResponse(BaseModel):
@@ -134,6 +139,7 @@ class PublicAssetResponse(BaseModel):
version_id: str | None = None version_id: str | None = None
submission_id: str | None = None submission_id: str | None = None
url: str | None = None url: str | None = None
paperless_share_url: str | None = None
class PublicAssetLookupResponse(BaseModel): class PublicAssetLookupResponse(BaseModel):

View File

@@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
import asyncio
import re import re
from datetime import datetime from datetime import datetime
from uuid import uuid4 from uuid import uuid4
@@ -7,7 +8,11 @@ from uuid import uuid4
from sqlalchemy import select from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import get_settings
from app.models import CvDocument, CvVersion, PublicAsset, Submission from app.models import CvDocument, CvVersion, PublicAsset, Submission
from app.services.storage import storage_client
from dlib.cv import docx_bytes_to_pdf, generate_patched_docx
from dlib.integrations.paperless import get_paperless_client
async def publish_version( async def publish_version(
@@ -17,6 +22,7 @@ async def publish_version(
version_id: str | None, version_id: str | None,
submission_id: str | None, submission_id: str | None,
slug: str | None, slug: str | None,
expires_at: datetime | None = None,
) -> PublicAsset | None: ) -> PublicAsset | None:
target_version: CvVersion | None = None target_version: CvVersion | None = None
target_submission: Submission | None = None target_submission: Submission | None = None
@@ -55,11 +61,27 @@ async def publish_version(
slug=resolved_slug, slug=resolved_slug,
artifact_key=target_version.artifact_docx_key, artifact_key=target_version.artifact_docx_key,
is_public=True, is_public=True,
expires_at=None, expires_at=expires_at,
) )
session.add(asset) session.add(asset)
await session.commit() await session.commit()
await session.refresh(asset) await session.refresh(asset)
settings = get_settings()
client = get_paperless_client(settings)
if client:
docx = storage_client.download_bytes(target_version.artifact_docx_key)
blocks = target_version.structured_blocks or []
pdf = docx_bytes_to_pdf(generate_patched_docx(docx, blocks))
doc_id = await asyncio.to_thread(
client.upload_document, pdf, resolved_slug, settings.paperless_tag_ids or []
)
_, share_url = await asyncio.to_thread(client.create_share_link, doc_id, expires_at)
asset.paperless_document_id = doc_id
asset.paperless_share_slug = share_url.split("/share/")[-1]
await session.commit()
await session.refresh(asset)
return asset return asset

View File

@@ -80,6 +80,7 @@ export type PublicAsset = {
version_id?: string | null; version_id?: string | null;
submission_id?: string | null; submission_id?: string | null;
created_at: string; created_at: string;
paperless_share_url?: string | null;
}; };
export type PublicAssetAnalytics = { export type PublicAssetAnalytics = {
@@ -229,11 +230,23 @@ export async function publishVersion(
versionId?: string | null, versionId?: string | null,
submissionId?: string | null, submissionId?: string | null,
slug?: string | null, slug?: string | null,
expiresAt?: string | null,
): Promise<PublicAsset> { ): Promise<PublicAsset> {
return req<PublicAsset>('/api/v1/public/publish', { return req<PublicAsset>('/api/v1/public/publish', {
method: 'POST', method: 'POST',
headers: { 'content-type': 'application/json' }, headers: { 'content-type': 'application/json' },
body: JSON.stringify({ version_id: versionId ?? null, submission_id: submissionId ?? null, slug: slug ?? null }), body: JSON.stringify({ version_id: versionId ?? null, submission_id: submissionId ?? null, slug: slug ?? null, expires_at: expiresAt ?? null }),
});
}
export async function createShareLink(
slug: string,
expirationDate?: string | null,
): Promise<PublicAsset> {
return req<PublicAsset>(`/api/v1/public/${encodeURIComponent(slug)}/share-links`, {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify({ expiration_date: expirationDate ?? null }),
}); });
} }

View File

@@ -64,6 +64,8 @@ export interface PublicAsset {
isPublic: boolean; isPublic: boolean;
expiresAt?: string; expiresAt?: string;
viewCount: number; viewCount: number;
url?: string | null;
paperlessShareUrl?: string | null;
} }
export interface AISuggestion { export interface AISuggestion {

View File

View File

@@ -0,0 +1,88 @@
from __future__ import annotations
import time
from datetime import datetime
from typing import TYPE_CHECKING
import httpx
if TYPE_CHECKING:
from app.core.config import Settings
class PaperlessClient:
def __init__(self, base_url: str, token: str) -> None:
self._base = base_url.rstrip("/")
self._headers = {"Authorization": f"Token {token}"}
def _get(self, path: str, **params) -> dict:
r = httpx.get(f"{self._base}{path}", headers=self._headers, params=params, timeout=30)
r.raise_for_status()
return r.json()
def _post(self, path: str, **kwargs) -> dict:
r = httpx.post(f"{self._base}{path}", headers=self._headers, timeout=30, **kwargs)
r.raise_for_status()
return r.json()
def _delete(self, path: str) -> None:
r = httpx.delete(f"{self._base}{path}", headers=self._headers, timeout=30)
r.raise_for_status()
def upload_document(self, pdf_bytes: bytes, title: str, tags: list[int] | None = None) -> int:
"""Upload PDF to paperless and return the created document_id (polls until task completes)."""
files = {"document": (f"{title}.pdf", pdf_bytes, "application/pdf")}
data: dict = {"title": title}
if tags:
data["tags"] = tags
resp = self._post("/api/documents/post_document/", files=files, data=data)
task_id = resp if isinstance(resp, str) else resp.get("task_id", resp)
return self._poll_task(str(task_id))
def _poll_task(self, task_id: str, max_wait: int = 60) -> int:
delay = 2
elapsed = 0
while elapsed < max_wait:
time.sleep(delay)
elapsed += delay
result = self._get("/api/tasks/", task_id=task_id)
tasks = result if isinstance(result, list) else result.get("results", [])
if not tasks:
delay = min(delay * 2, 10)
continue
task = tasks[0]
if task.get("status") == "SUCCESS":
return int(task["related_document"])
if task.get("status") in ("FAILURE", "REVOKED"):
raise RuntimeError(f"Paperless task {task_id} failed: {task.get('result')}")
delay = min(delay * 2, 10)
raise TimeoutError(f"Paperless task {task_id} did not complete within {max_wait}s")
def create_share_link(
self, document_id: int, expiration: datetime | None = None
) -> tuple[int, str]:
"""Create a share link for document_id. Returns (share_link_id, full share URL)."""
payload: dict = {"document": document_id}
if expiration:
payload["expiration_date"] = expiration.isoformat()
resp = self._post("/api/share_links/", json=payload)
slug = resp["slug"]
link_id = int(resp["id"])
return link_id, f"{self._base}/share/{slug}"
def get_share_links(self, document_id: int) -> list[dict]:
return self._get(f"/api/documents/{document_id}/share_links/").get("results", [])
def delete_share_link(self, share_link_id: int) -> None:
self._delete(f"/api/share_links/{share_link_id}/")
def delete_document(self, document_id: int) -> None:
self._delete(f"/api/documents/{document_id}/")
def get_paperless_client(settings: "Settings") -> PaperlessClient | None:
if not settings.paperless_enabled:
return None
if not settings.paperless_base_url or not settings.paperless_token:
return None
return PaperlessClient(settings.paperless_base_url, settings.paperless_token)