mirror of
https://github.com/velocitatem/cvfs.git
synced 2026-05-31 08:43:37 +00:00
feat: add mobile support, delete CV/branch, and fix DOCX export with patches
Agent-Logs-Url: https://github.com/velocitatem/cvfs/sessions/4d754ed6-7f63-44e0-8689-123d7a70595f Co-authored-by: velocitatem <60182044+velocitatem@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
300a577fbe
commit
5d815cd24d
@@ -8,6 +8,7 @@ from .schema import (
|
||||
from .parser import parse_docx_bytes, summarize_keywords
|
||||
from .patcher import apply_patchset
|
||||
from .ats_guard import validate_patchset
|
||||
from .docx_export import generate_patched_docx
|
||||
|
||||
__all__ = [
|
||||
"StructuredBlock",
|
||||
@@ -19,4 +20,5 @@ __all__ = [
|
||||
"summarize_keywords",
|
||||
"apply_patchset",
|
||||
"validate_patchset",
|
||||
"generate_patched_docx",
|
||||
]
|
||||
|
||||
76
dlib/cv/docx_export.py
Normal file
76
dlib/cv/docx_export.py
Normal file
@@ -0,0 +1,76 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from io import BytesIO
|
||||
|
||||
from docx import Document
|
||||
|
||||
from .parser import _detect_block_type
|
||||
|
||||
|
||||
def _path_to_para_map(doc: Document) -> dict[str, int]:
|
||||
counters: defaultdict[str, int] = defaultdict(int)
|
||||
result: dict[str, int] = {}
|
||||
for idx, para in enumerate(doc.paragraphs):
|
||||
if not para.text.strip():
|
||||
continue
|
||||
block_type = _detect_block_type(getattr(para.style, "name", None), para)
|
||||
counters[block_type] += 1
|
||||
result[f"{block_type}[{counters[block_type]}]"] = idx
|
||||
return result
|
||||
|
||||
|
||||
def _replace_para_text(para, new_text: str) -> None:
|
||||
"""Replace paragraph text preserving the first run's character formatting."""
|
||||
if not para.runs:
|
||||
para.add_run(new_text)
|
||||
return
|
||||
first = para.runs[0]
|
||||
for run in para.runs[1:]:
|
||||
run.text = ""
|
||||
first.text = new_text
|
||||
|
||||
|
||||
def _remove_paragraph(paragraph) -> None:
|
||||
p = paragraph._element
|
||||
p.getparent().remove(p)
|
||||
|
||||
|
||||
def generate_patched_docx(
|
||||
original_bytes: bytes, structured_blocks: list[dict]
|
||||
) -> bytes:
|
||||
"""Return DOCX bytes with text patches from structured_blocks applied.
|
||||
|
||||
Compares each block's text against the original paragraph and replaces it
|
||||
when different. Blocks absent from structured_blocks are removed.
|
||||
"""
|
||||
if not structured_blocks:
|
||||
return original_bytes
|
||||
|
||||
doc = Document(BytesIO(original_bytes))
|
||||
path_map = _path_to_para_map(doc)
|
||||
|
||||
original_paths = set(path_map.keys())
|
||||
patched = {b["path"]: b["text"] for b in structured_blocks}
|
||||
patched_paths = set(patched.keys())
|
||||
|
||||
# Apply text replacements first (indices stay stable)
|
||||
for path, new_text in patched.items():
|
||||
idx = path_map.get(path)
|
||||
if idx is None:
|
||||
continue
|
||||
para = doc.paragraphs[idx]
|
||||
if para.text.strip() != new_text:
|
||||
_replace_para_text(para, new_text)
|
||||
|
||||
# Remove blocks no longer present; process in reverse index order
|
||||
removed = sorted(
|
||||
[path_map[p] for p in (original_paths - patched_paths) if p in path_map],
|
||||
reverse=True,
|
||||
)
|
||||
for idx in removed:
|
||||
_remove_paragraph(doc.paragraphs[idx])
|
||||
|
||||
out = BytesIO()
|
||||
doc.save(out)
|
||||
return out.getvalue()
|
||||
Reference in New Issue
Block a user