mirror of
https://github.com/velocitatem/cvfs.git
synced 2026-05-31 08:43:37 +00:00
Add paperless-ngx integration for document storage and share links
- dlib/integrations/paperless.py: sync HTTP client wrapping the paperless-ngx
REST API (upload doc, poll task, create/delete share links, delete document)
- config: PAPERLESS_ENABLED, PAPERLESS_BASE_URL, PAPERLESS_TOKEN, PAPERLESS_TAG_IDS
- PublicAsset model: paperless_document_id + paperless_share_slug columns
- publication service: after creating the asset, if paperless is enabled upload
the patched PDF and create a share link; stores doc id + share slug on the asset
- public routes: pass expires_at through to publish_version; new
POST /{slug}/share-links endpoint to (re)create expiring share links on demand
- schemas: PublishRequest.expires_at, PublicAssetResponse.paperless_share_url,
new ShareLinkRequest model
- frontend: paperless_share_url field on PublicAsset type, createShareLink()
and expiresAt param on publishVersion() in api.ts
- .env.example: documented paperless env vars
https://claude.ai/code/session_01YPVs6uBwCvcwVMvrfLBBdu
This commit is contained in:
11
.env.example
11
.env.example
@@ -57,6 +57,17 @@ AUTHENTIK_CLIENT_SECRET=
|
||||
# Leave blank to use the built-in rule-based tailoring instead of Claude.
|
||||
ANTHROPIC_API_KEY=
|
||||
|
||||
# ── Paperless-ngx integration (optional) ─────────────────────────────────────
|
||||
# When enabled, published CVs are uploaded to your paperless-ngx instance and
|
||||
# shared via paperless share links (with optional expiry). MinIO is still used
|
||||
# for DOCX artifact storage; paperless handles the published PDF + sharing.
|
||||
PAPERLESS_ENABLED=false
|
||||
PAPERLESS_BASE_URL=http://localhost:8000
|
||||
# API token — obtain via POST /api/token/ with your paperless credentials.
|
||||
PAPERLESS_TOKEN=
|
||||
# Comma-separated tag IDs to apply to uploaded CV documents (optional).
|
||||
PAPERLESS_TAG_IDS=
|
||||
|
||||
# ── Demo mode ─────────────────────────────────────────────────────────────────
|
||||
# Set to true to enable standalone demo mode in the webapp.
|
||||
# Demo mode uses hardcoded dummy data — no backend or DB required.
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
from datetime import datetime, timezone
|
||||
|
||||
@@ -16,11 +17,13 @@ from app.schemas import (
|
||||
PublicAssetLookupResponse,
|
||||
PublicAssetResponse,
|
||||
PublishRequest,
|
||||
ShareLinkRequest,
|
||||
)
|
||||
from app.services.publication import publish_version
|
||||
from app.services.storage import storage_client
|
||||
from dlib.auth import AuthenticatedUser
|
||||
from dlib.cv import docx_bytes_to_pdf, generate_patched_docx
|
||||
from dlib.integrations.paperless import get_paperless_client
|
||||
|
||||
|
||||
router = APIRouter(prefix="/public", tags=["public"])
|
||||
@@ -48,6 +51,18 @@ async def _get_public_asset(session: AsyncSession, slug: str) -> PublicAsset:
|
||||
return asset
|
||||
|
||||
|
||||
async def _assert_owner(session: AsyncSession, asset: PublicAsset, owner_id: str) -> None:
|
||||
if not asset.version_id:
|
||||
raise HTTPException(status_code=403, detail="Not authorized")
|
||||
stmt = (
|
||||
select(CvVersion)
|
||||
.join(CvVersion.document)
|
||||
.where(CvVersion.id == asset.version_id, CvDocument.owner_id == owner_id)
|
||||
)
|
||||
if not (await session.execute(stmt)).scalars().one_or_none():
|
||||
raise HTTPException(status_code=403, detail="Not authorized")
|
||||
|
||||
|
||||
@router.post("/publish", response_model=PublicAssetResponse)
|
||||
async def publish(
|
||||
payload: PublishRequest,
|
||||
@@ -60,12 +75,40 @@ async def publish(
|
||||
version_id=payload.version_id,
|
||||
submission_id=payload.submission_id,
|
||||
slug=payload.slug,
|
||||
expires_at=payload.expires_at,
|
||||
)
|
||||
if not asset:
|
||||
raise HTTPException(status_code=404, detail="Version or submission not found")
|
||||
return _response_from_asset(asset)
|
||||
|
||||
|
||||
@router.post("/{slug}/share-links", response_model=PublicAssetResponse)
|
||||
async def create_share_link(
|
||||
slug: str,
|
||||
payload: ShareLinkRequest,
|
||||
session: AsyncSession = Depends(get_db),
|
||||
user: AuthenticatedUser = Depends(get_current_user),
|
||||
):
|
||||
asset = await _get_public_asset(session, slug)
|
||||
await _assert_owner(session, asset, user.sub)
|
||||
|
||||
if not asset.paperless_document_id:
|
||||
raise HTTPException(status_code=409, detail="Asset not synced to paperless")
|
||||
|
||||
settings = get_settings()
|
||||
client = get_paperless_client(settings)
|
||||
if not client:
|
||||
raise HTTPException(status_code=503, detail="Paperless integration not enabled")
|
||||
|
||||
_, share_url = await asyncio.to_thread(
|
||||
client.create_share_link, asset.paperless_document_id, payload.expiration_date
|
||||
)
|
||||
asset.paperless_share_slug = share_url.split("/share/")[-1]
|
||||
await session.commit()
|
||||
await session.refresh(asset)
|
||||
return _response_from_asset(asset)
|
||||
|
||||
|
||||
@router.get("/{slug}/analytics", response_model=PublicAssetAnalyticsResponse)
|
||||
async def get_analytics(
|
||||
slug: str,
|
||||
@@ -73,17 +116,7 @@ async def get_analytics(
|
||||
user: AuthenticatedUser = Depends(get_current_user),
|
||||
):
|
||||
asset = await _get_public_asset(session, slug)
|
||||
|
||||
if asset.version_id:
|
||||
stmt = (
|
||||
select(CvVersion)
|
||||
.join(CvVersion.document)
|
||||
.where(CvVersion.id == asset.version_id, CvDocument.owner_id == user.sub)
|
||||
)
|
||||
if not (await session.execute(stmt)).scalars().one_or_none():
|
||||
raise HTTPException(status_code=403, detail="Not authorized")
|
||||
else:
|
||||
raise HTTPException(status_code=403, detail="Not authorized")
|
||||
await _assert_owner(session, asset, user.sub)
|
||||
|
||||
view_count = (
|
||||
await session.execute(
|
||||
@@ -137,7 +170,11 @@ async def get_public_asset(slug: str, request: Request, session: AsyncSession =
|
||||
def _response_from_asset(asset: PublicAsset) -> PublicAssetResponse:
|
||||
settings = get_settings()
|
||||
base = settings.public_base_url.rstrip("/")
|
||||
url = f"{base}/cv/{asset.slug}"
|
||||
paperless_share_url = (
|
||||
f"{settings.paperless_base_url}/share/{asset.paperless_share_slug}"
|
||||
if settings.paperless_base_url and asset.paperless_share_slug
|
||||
else None
|
||||
)
|
||||
return PublicAssetResponse(
|
||||
id=asset.id,
|
||||
slug=asset.slug,
|
||||
@@ -146,5 +183,6 @@ def _response_from_asset(asset: PublicAsset) -> PublicAssetResponse:
|
||||
created_at=asset.created_at,
|
||||
version_id=asset.version_id,
|
||||
submission_id=asset.submission_id,
|
||||
url=url,
|
||||
url=f"{base}/cv/{asset.slug}",
|
||||
paperless_share_url=paperless_share_url,
|
||||
)
|
||||
|
||||
@@ -47,6 +47,11 @@ class Settings(BaseSettings):
|
||||
)
|
||||
publish_domain: str = Field(default="cv.alves.world", alias="CV_PUBLIC_DOMAIN")
|
||||
|
||||
paperless_enabled: bool = Field(default=False, alias="PAPERLESS_ENABLED")
|
||||
paperless_base_url: str | None = Field(default=None, alias="PAPERLESS_BASE_URL")
|
||||
paperless_token: str | None = Field(default=None, alias="PAPERLESS_TOKEN")
|
||||
paperless_tag_ids: list[int] = Field(default_factory=list, alias="PAPERLESS_TAG_IDS")
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
extra = "ignore"
|
||||
@@ -67,13 +72,20 @@ class Settings(BaseSettings):
|
||||
return [origin.strip() for origin in value.split(",") if origin.strip()]
|
||||
return value
|
||||
|
||||
@field_validator("storage_endpoint_url", mode="before")
|
||||
@field_validator("storage_endpoint_url", "paperless_base_url", "paperless_token", mode="before")
|
||||
@classmethod
|
||||
def _empty_endpoint_to_none(cls, value):
|
||||
def _empty_str_to_none(cls, value):
|
||||
if isinstance(value, str) and not value.strip():
|
||||
return None
|
||||
return value
|
||||
|
||||
@field_validator("paperless_tag_ids", mode="before")
|
||||
@classmethod
|
||||
def _parse_tag_ids(cls, value):
|
||||
if isinstance(value, str):
|
||||
return [int(v.strip()) for v in value.split(",") if v.strip()]
|
||||
return value
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_settings() -> Settings:
|
||||
|
||||
@@ -3,7 +3,7 @@ from __future__ import annotations
|
||||
import enum
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from sqlalchemy import Boolean, DateTime, Enum, ForeignKey, String, Text
|
||||
from sqlalchemy import Boolean, DateTime, Enum, ForeignKey, Integer, String, Text
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
@@ -148,6 +148,8 @@ class PublicAsset(Base, IdentifierMixin, TimestampMixin):
|
||||
expires_at: Mapped[str | None] = mapped_column(
|
||||
DateTime(timezone=True), nullable=True
|
||||
)
|
||||
paperless_document_id: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||
paperless_share_slug: Mapped[str | None] = mapped_column(String(160), nullable=True)
|
||||
|
||||
submission: Mapped[Submission | None] = relationship(
|
||||
"Submission", back_populates="public_asset"
|
||||
|
||||
@@ -9,6 +9,7 @@ from .cv import (
|
||||
PublicAssetLookupResponse,
|
||||
PublicAssetResponse,
|
||||
PublishRequest,
|
||||
ShareLinkRequest,
|
||||
SubmissionCreateRequest,
|
||||
SubmissionResponse,
|
||||
SubmissionStatusUpdateRequest,
|
||||
@@ -31,6 +32,7 @@ __all__ = [
|
||||
"SuggestionResponse",
|
||||
"SuggestionUpdateRequest",
|
||||
"PublishRequest",
|
||||
"ShareLinkRequest",
|
||||
"PublicAssetResponse",
|
||||
"PublicAssetLookupResponse",
|
||||
"PublicAssetAnalyticsResponse",
|
||||
|
||||
@@ -121,6 +121,11 @@ class PublishRequest(BaseModel):
|
||||
version_id: str | None = None
|
||||
submission_id: str | None = None
|
||||
slug: str | None = None
|
||||
expires_at: datetime | None = None
|
||||
|
||||
|
||||
class ShareLinkRequest(BaseModel):
|
||||
expiration_date: datetime | None = None
|
||||
|
||||
|
||||
class PublicAssetResponse(BaseModel):
|
||||
@@ -134,6 +139,7 @@ class PublicAssetResponse(BaseModel):
|
||||
version_id: str | None = None
|
||||
submission_id: str | None = None
|
||||
url: str | None = None
|
||||
paperless_share_url: str | None = None
|
||||
|
||||
|
||||
class PublicAssetLookupResponse(BaseModel):
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
from datetime import datetime
|
||||
from uuid import uuid4
|
||||
@@ -7,7 +8,11 @@ from uuid import uuid4
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.models import CvDocument, CvVersion, PublicAsset, Submission
|
||||
from app.services.storage import storage_client
|
||||
from dlib.cv import docx_bytes_to_pdf, generate_patched_docx
|
||||
from dlib.integrations.paperless import get_paperless_client
|
||||
|
||||
|
||||
async def publish_version(
|
||||
@@ -17,6 +22,7 @@ async def publish_version(
|
||||
version_id: str | None,
|
||||
submission_id: str | None,
|
||||
slug: str | None,
|
||||
expires_at: datetime | None = None,
|
||||
) -> PublicAsset | None:
|
||||
target_version: CvVersion | None = None
|
||||
target_submission: Submission | None = None
|
||||
@@ -55,11 +61,27 @@ async def publish_version(
|
||||
slug=resolved_slug,
|
||||
artifact_key=target_version.artifact_docx_key,
|
||||
is_public=True,
|
||||
expires_at=None,
|
||||
expires_at=expires_at,
|
||||
)
|
||||
session.add(asset)
|
||||
await session.commit()
|
||||
await session.refresh(asset)
|
||||
|
||||
settings = get_settings()
|
||||
client = get_paperless_client(settings)
|
||||
if client:
|
||||
docx = storage_client.download_bytes(target_version.artifact_docx_key)
|
||||
blocks = target_version.structured_blocks or []
|
||||
pdf = docx_bytes_to_pdf(generate_patched_docx(docx, blocks))
|
||||
doc_id = await asyncio.to_thread(
|
||||
client.upload_document, pdf, resolved_slug, settings.paperless_tag_ids or []
|
||||
)
|
||||
_, share_url = await asyncio.to_thread(client.create_share_link, doc_id, expires_at)
|
||||
asset.paperless_document_id = doc_id
|
||||
asset.paperless_share_slug = share_url.split("/share/")[-1]
|
||||
await session.commit()
|
||||
await session.refresh(asset)
|
||||
|
||||
return asset
|
||||
|
||||
|
||||
|
||||
@@ -80,6 +80,7 @@ export type PublicAsset = {
|
||||
version_id?: string | null;
|
||||
submission_id?: string | null;
|
||||
created_at: string;
|
||||
paperless_share_url?: string | null;
|
||||
};
|
||||
|
||||
export type PublicAssetAnalytics = {
|
||||
@@ -229,11 +230,23 @@ export async function publishVersion(
|
||||
versionId?: string | null,
|
||||
submissionId?: string | null,
|
||||
slug?: string | null,
|
||||
expiresAt?: string | null,
|
||||
): Promise<PublicAsset> {
|
||||
return req<PublicAsset>('/api/v1/public/publish', {
|
||||
method: 'POST',
|
||||
headers: { 'content-type': 'application/json' },
|
||||
body: JSON.stringify({ version_id: versionId ?? null, submission_id: submissionId ?? null, slug: slug ?? null }),
|
||||
body: JSON.stringify({ version_id: versionId ?? null, submission_id: submissionId ?? null, slug: slug ?? null, expires_at: expiresAt ?? null }),
|
||||
});
|
||||
}
|
||||
|
||||
export async function createShareLink(
|
||||
slug: string,
|
||||
expirationDate?: string | null,
|
||||
): Promise<PublicAsset> {
|
||||
return req<PublicAsset>(`/api/v1/public/${encodeURIComponent(slug)}/share-links`, {
|
||||
method: 'POST',
|
||||
headers: { 'content-type': 'application/json' },
|
||||
body: JSON.stringify({ expiration_date: expirationDate ?? null }),
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -64,6 +64,8 @@ export interface PublicAsset {
|
||||
isPublic: boolean;
|
||||
expiresAt?: string;
|
||||
viewCount: number;
|
||||
url?: string | null;
|
||||
paperlessShareUrl?: string | null;
|
||||
}
|
||||
|
||||
export interface AISuggestion {
|
||||
|
||||
0
dlib/integrations/__init__.py
Normal file
0
dlib/integrations/__init__.py
Normal file
88
dlib/integrations/paperless.py
Normal file
88
dlib/integrations/paperless.py
Normal file
@@ -0,0 +1,88 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import httpx
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.core.config import Settings
|
||||
|
||||
|
||||
class PaperlessClient:
|
||||
def __init__(self, base_url: str, token: str) -> None:
|
||||
self._base = base_url.rstrip("/")
|
||||
self._headers = {"Authorization": f"Token {token}"}
|
||||
|
||||
def _get(self, path: str, **params) -> dict:
|
||||
r = httpx.get(f"{self._base}{path}", headers=self._headers, params=params, timeout=30)
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
|
||||
def _post(self, path: str, **kwargs) -> dict:
|
||||
r = httpx.post(f"{self._base}{path}", headers=self._headers, timeout=30, **kwargs)
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
|
||||
def _delete(self, path: str) -> None:
|
||||
r = httpx.delete(f"{self._base}{path}", headers=self._headers, timeout=30)
|
||||
r.raise_for_status()
|
||||
|
||||
def upload_document(self, pdf_bytes: bytes, title: str, tags: list[int] | None = None) -> int:
|
||||
"""Upload PDF to paperless and return the created document_id (polls until task completes)."""
|
||||
files = {"document": (f"{title}.pdf", pdf_bytes, "application/pdf")}
|
||||
data: dict = {"title": title}
|
||||
if tags:
|
||||
data["tags"] = tags
|
||||
resp = self._post("/api/documents/post_document/", files=files, data=data)
|
||||
task_id = resp if isinstance(resp, str) else resp.get("task_id", resp)
|
||||
return self._poll_task(str(task_id))
|
||||
|
||||
def _poll_task(self, task_id: str, max_wait: int = 60) -> int:
|
||||
delay = 2
|
||||
elapsed = 0
|
||||
while elapsed < max_wait:
|
||||
time.sleep(delay)
|
||||
elapsed += delay
|
||||
result = self._get("/api/tasks/", task_id=task_id)
|
||||
tasks = result if isinstance(result, list) else result.get("results", [])
|
||||
if not tasks:
|
||||
delay = min(delay * 2, 10)
|
||||
continue
|
||||
task = tasks[0]
|
||||
if task.get("status") == "SUCCESS":
|
||||
return int(task["related_document"])
|
||||
if task.get("status") in ("FAILURE", "REVOKED"):
|
||||
raise RuntimeError(f"Paperless task {task_id} failed: {task.get('result')}")
|
||||
delay = min(delay * 2, 10)
|
||||
raise TimeoutError(f"Paperless task {task_id} did not complete within {max_wait}s")
|
||||
|
||||
def create_share_link(
|
||||
self, document_id: int, expiration: datetime | None = None
|
||||
) -> tuple[int, str]:
|
||||
"""Create a share link for document_id. Returns (share_link_id, full share URL)."""
|
||||
payload: dict = {"document": document_id}
|
||||
if expiration:
|
||||
payload["expiration_date"] = expiration.isoformat()
|
||||
resp = self._post("/api/share_links/", json=payload)
|
||||
slug = resp["slug"]
|
||||
link_id = int(resp["id"])
|
||||
return link_id, f"{self._base}/share/{slug}"
|
||||
|
||||
def get_share_links(self, document_id: int) -> list[dict]:
|
||||
return self._get(f"/api/documents/{document_id}/share_links/").get("results", [])
|
||||
|
||||
def delete_share_link(self, share_link_id: int) -> None:
|
||||
self._delete(f"/api/share_links/{share_link_id}/")
|
||||
|
||||
def delete_document(self, document_id: int) -> None:
|
||||
self._delete(f"/api/documents/{document_id}/")
|
||||
|
||||
|
||||
def get_paperless_client(settings: "Settings") -> PaperlessClient | None:
|
||||
if not settings.paperless_enabled:
|
||||
return None
|
||||
if not settings.paperless_base_url or not settings.paperless_token:
|
||||
return None
|
||||
return PaperlessClient(settings.paperless_base_url, settings.paperless_token)
|
||||
Reference in New Issue
Block a user