feat: NLP patch insights + standalone demo mode

- dlib/ai/insights.py: pure-Python NLP analysis that correlates accepted AI suggestion operations/keywords/sections with submission outcomes (pending_review / published = positive, archived = negative) - Backend: GET /api/v1/insights route + service + Pydantic schema - Frontend: InsightsPanel component with bar charts for operation impact, section impact, and keyword signal lift scores - Insights tab added to the version panel; compact preview on doc overview - NEXT_PUBLIC_DEMO=true makes the webapp fully standalone: loads DEMO_DOCUMENTS / DEMO_SUBMISSIONS / DEMO_INSIGHTS from demo-data.ts, disables all mutating actions, shows a DEMO badge in the top bar - apps/webapp/public/demo-cv.docx: static dummy CV (Alex Rivera) for demo - scripts/gen_demo_cv.py: script to regenerate the demo DOCX - .env.example: document NEXT_PUBLIC_DEMO flag https://claude.ai/code/session_01LWxu2qrwY6BRjUFXXn7NiM
2026-07-15 19:03:38 +00:00 · 2026-04-05 09:34:01 +00:00
parent 0f32d46404
commit 615d1bdb9e
12 changed files with 780 additions and 17 deletions
--- a/.env.example
+++ b/.env.example
@@ -56,3 +56,8 @@ AUTHENTIK_CLIENT_SECRET=
 # ── AI tailoring (optional) ───────────────────────────────────────────────────
 # Leave blank to use the built-in rule-based tailoring instead of Claude.
 ANTHROPIC_API_KEY=
 # ── Demo mode ─────────────────────────────────────────────────────────────────
 # Set to true to enable standalone demo mode in the webapp.
 # Demo mode uses hardcoded dummy data — no backend or DB required.
 NEXT_PUBLIC_DEMO=false
--- a/apps/backend/fastapi/app/api/router.py
+++ b/apps/backend/fastapi/app/api/router.py
@@ -2,10 +2,11 @@ from __future__ import annotations
 from fastapi import APIRouter
-from app.api.routes import documents, versions, submissions, public
+from app.api.routes import documents, insights, versions, submissions, public
 api_router = APIRouter()
 api_router.include_router(documents.router)
 api_router.include_router(versions.router)
 api_router.include_router(submissions.router)
 api_router.include_router(public.router)
 api_router.include_router(insights.router)
--- a/apps/backend/fastapi/app/api/routes/insights.py
+++ b/apps/backend/fastapi/app/api/routes/insights.py
@@ -0,0 +1,41 @@
 from __future__ import annotations
 from fastapi import APIRouter, Depends
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.api.deps import get_current_user, get_db
 from app.schemas.insights import InsightsResponse
 from app.services.insights import get_insights
 from dlib.auth import AuthenticatedUser
 router = APIRouter(prefix="/insights", tags=["insights"])
@router.get("", response_model=InsightsResponse)
 async def insights_endpoint(
    session: AsyncSession = Depends(get_db),
    user: AuthenticatedUser = Depends(get_current_user),
 ):
    result = await get_insights(session, owner_id=user.sub)
    return InsightsResponse(
        total_submissions=result.total_submissions,
        positive_count=result.positive_count,
        positive_rate=result.positive_rate,
        operation_impact=[
            {"operation": o.operation, "total": o.total, "positive": o.positive, "rate": o.rate}
            for o in result.operation_impact
        ],
        top_positive_keywords=[
            {"keyword": k.keyword, "positive_count": k.positive_count, "negative_count": k.negative_count, "lift": k.lift}
            for k in result.top_positive_keywords
        ],
        top_negative_keywords=[
            {"keyword": k.keyword, "positive_count": k.positive_count, "negative_count": k.negative_count, "lift": k.lift}
            for k in result.top_negative_keywords
        ],
        section_impact=[
            {"section": s.section, "positive_rate": s.positive_rate, "count": s.count}
            for s in result.section_impact
        ],
        has_data=result.has_data,
    )
--- a/apps/backend/fastapi/app/schemas/insights.py
+++ b/apps/backend/fastapi/app/schemas/insights.py
@@ -0,0 +1,34 @@
 from __future__ import annotations
 from pydantic import BaseModel
 class OperationImpactSchema(BaseModel):
    operation: str
    total: int
    positive: int
    rate: float
 class KeywordSignalSchema(BaseModel):
    keyword: str
    positive_count: int
    negative_count: int
    lift: float
 class SectionImpactSchema(BaseModel):
    section: str
    positive_rate: float
    count: int
 class InsightsResponse(BaseModel):
    total_submissions: int
    positive_count: int
    positive_rate: float
    operation_impact: list[OperationImpactSchema]
    top_positive_keywords: list[KeywordSignalSchema]
    top_negative_keywords: list[KeywordSignalSchema]
    section_impact: list[SectionImpactSchema]
    has_data: bool
--- a/apps/backend/fastapi/app/services/insights.py
+++ b/apps/backend/fastapi/app/services/insights.py
@@ -0,0 +1,37 @@
 from __future__ import annotations
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.orm import selectinload
 from dlib.ai.insights import InsightsResult, SubmissionRecord, SuggestionRecord, analyze
 from app.models import AiSuggestion, CvDocument, CvVersion, Submission
 async def get_insights(session: AsyncSession, *, owner_id: str) -> InsightsResult:
    stmt = (
        select(Submission)
        .join(Submission.version)
        .join(CvVersion.document)
        .where(CvDocument.owner_id == owner_id)
        .options(selectinload(Submission.suggestions))
    )
    rows = list((await session.execute(stmt)).scalars().all())
    records = [
        SubmissionRecord(
            status=s.status.value,
            suggestions=[
                SuggestionRecord(
                    operation=sug.operation,
                    target_path=sug.target_path,
                    proposed_text=sug.proposed_text,
                    rationale=sug.rationale,
                    accepted=sug.accepted,
                )
                for sug in s.suggestions
            ],
        )
        for s in rows
    ]
    return analyze(records)
--- a/apps/webapp/public/demo-cv.docx
+++ b/apps/webapp/public/demo-cv.docx
--- a/apps/webapp/src/app/dashboard/demo-data.ts
+++ b/apps/webapp/src/app/dashboard/demo-data.ts
@@ -0,0 +1,165 @@
 import type { Document, Submission, InsightsResult } from '@/libs/api';
 const NOW = new Date().toISOString();
 const D = (daysAgo: number) => new Date(Date.now() - daysAgo * 86_400_000).toISOString();
 const ROOT_VERSION_ID = 'demo-v1';
 const ML_VERSION_ID = 'demo-v2';
 const BACKEND_VERSION_ID = 'demo-v3';
 export const DEMO_DOC_ID = 'demo-doc-1';
 export const DEMO_DOCUMENTS: Document[] = [
    {
        id: DEMO_DOC_ID,
        title: 'Alex Rivera — Software Engineer',
        description: 'Main CV, ATS-safe baseline',
        owner_id: 'demo-user',
        root_version_id: ROOT_VERSION_ID,
        created_at: D(45),
        updated_at: D(3),
        versions: [
            {
                id: ROOT_VERSION_ID,
                branch_name: 'root',
                version_label: 'v1.0 baseline',
                parent_version_id: null,
                structured_blocks: [
                    { path: 'heading[1]', block_type: 'heading', text: 'Alex Rivera', keywords: [] },
                    { path: 'summary[1]', block_type: 'summary', text: 'Software engineer with 5 years of experience building distributed systems and ML pipelines at scale.', keywords: ['distributed', 'systems', 'machine', 'learning'] },
                    { path: 'heading[2]', block_type: 'heading', text: 'Experience', keywords: [] },
                    { path: 'bullet[1]', block_type: 'bullet', text: 'Led migration of monolithic data pipeline to distributed microservices, reducing p99 latency by 40%.', keywords: ['distributed', 'microservices', 'latency', 'pipeline'] },
                    { path: 'bullet[2]', block_type: 'bullet', text: 'Designed feature flag system used by 50+ engineers across 3 teams.', keywords: ['system', 'design', 'engineers'] },
                    { path: 'heading[3]', block_type: 'heading', text: 'Skills', keywords: [] },
                    { path: 'skills[1]', block_type: 'skills', text: 'Python, Go, TypeScript, SQL, Kubernetes, AWS, PyTorch', keywords: ['python', 'go', 'typescript', 'pytorch', 'kubernetes'] },
                ],
                artifact_docx_key: 'demo/demo-cv.docx',
                patches: [],
                public_assets: [],
                created_at: D(45),
                updated_at: D(45),
            },
            {
                id: ML_VERSION_ID,
                branch_name: 'ml-engineer',
                version_label: 'ML-focused variant',
                parent_version_id: ROOT_VERSION_ID,
                structured_blocks: [
                    { path: 'heading[1]', block_type: 'heading', text: 'Alex Rivera', keywords: [] },
                    { path: 'summary[1]', block_type: 'summary', text: 'ML engineer specialising in large-scale PyTorch training pipelines, distributed inference, and production-grade MLOps.', keywords: ['pytorch', 'distributed', 'mlops', 'inference'] },
                    { path: 'heading[2]', block_type: 'heading', text: 'Experience', keywords: [] },
                    { path: 'bullet[1]', block_type: 'bullet', text: 'Contributed PyTorch anomaly detection model achieving 92% precision on production traffic at 2M events/day.', keywords: ['pytorch', 'machine learning', 'production', 'precision'] },
                    { path: 'bullet[2]', block_type: 'bullet', text: 'Built streaming data ingestion system (Kafka + Flink) powering real-time ML feature store.', keywords: ['kafka', 'flink', 'streaming', 'feature store'] },
                    { path: 'heading[3]', block_type: 'heading', text: 'Skills', keywords: [] },
                    { path: 'skills[1]', block_type: 'skills', text: 'PyTorch, Python, Go, Kubernetes, Spark, dbt, AWS SageMaker', keywords: ['pytorch', 'python', 'kubernetes', 'spark', 'sagemaker'] },
                ],
                artifact_docx_key: 'demo/demo-cv.docx',
                patches: [
                    { id: 'dp1', target_path: 'summary[1]', operation: 'replace_text', old_value: 'Software engineer…', new_value: 'ML engineer specialising…', created_at: D(30) },
                    { id: 'dp2', target_path: 'skills[1]', operation: 'boost_keyword', old_value: null, new_value: 'PyTorch', created_at: D(30) },
                ],
                public_assets: [{
                    id: 'demo-asset-1', slug: 'alex-ml', artifact_key: 'public/alex-ml.docx',
                    is_public: true, url: '/demo-cv.docx', version_id: ML_VERSION_ID, submission_id: null, created_at: D(20),
                }],
                created_at: D(30),
                updated_at: D(3),
            },
            {
                id: BACKEND_VERSION_ID,
                branch_name: 'backend-engineer',
                version_label: 'Backend-focused variant',
                parent_version_id: ROOT_VERSION_ID,
                structured_blocks: [
                    { path: 'heading[1]', block_type: 'heading', text: 'Alex Rivera', keywords: [] },
                    { path: 'summary[1]', block_type: 'summary', text: 'Backend engineer focused on high-throughput API design, distributed systems, and reliability engineering.', keywords: ['backend', 'api', 'distributed', 'reliability'] },
                    { path: 'bullet[1]', block_type: 'bullet', text: 'Led migration to microservices, reducing p99 latency by 40% under 10k RPS sustained load.', keywords: ['microservices', 'latency', 'rps', 'distributed'] },
                    { path: 'skills[1]', block_type: 'skills', text: 'Go, Python, PostgreSQL, Redis, gRPC, Kubernetes, AWS', keywords: ['go', 'postgresql', 'redis', 'grpc', 'kubernetes'] },
                ],
                artifact_docx_key: 'demo/demo-cv.docx',
                patches: [
                    { id: 'dp3', target_path: 'summary[1]', operation: 'replace_text', old_value: 'Software engineer…', new_value: 'Backend engineer…', created_at: D(25) },
                ],
                public_assets: [],
                created_at: D(25),
                updated_at: D(10),
            },
        ],
    },
 ];
 export const DEMO_SUBMISSIONS: Submission[] = [
    {
        id: 'ds1', version_id: ML_VERSION_ID, company_name: 'Anthropic', role_title: 'ML Research Engineer',
        job_url: null, job_description: null, status: 'pending_review', created_at: D(18),
        suggestions: [
            { id: 's1', target_path: 'summary[1]', operation: 'boost_keyword', proposed_text: 'constitutional ai', rationale: 'Highlight alignment research experience', accepted: true, metadata_json: { confidence: 0.82 } },
            { id: 's2', target_path: 'bullet[1]', operation: 'replace_text', proposed_text: 'Built distributed PyTorch training pipeline handling constitutional AI fine-tuning at scale.', rationale: 'Align with Anthropic stack', accepted: true, metadata_json: { confidence: 0.74 } },
        ],
    },
    {
        id: 'ds2', version_id: ML_VERSION_ID, company_name: 'Google DeepMind', role_title: 'Senior ML Engineer',
        job_url: null, job_description: null, status: 'pending_review', created_at: D(14),
        suggestions: [
            { id: 's3', target_path: 'skills[1]', operation: 'boost_keyword', proposed_text: 'JAX', rationale: 'DeepMind uses JAX heavily', accepted: true, metadata_json: { confidence: 0.71 } },
            { id: 's4', target_path: 'bullet[2]', operation: 'replace_text', proposed_text: 'Built large-scale streaming pipeline underpinning real-time feature store for JAX model serving.', rationale: 'Add JAX context', accepted: true, metadata_json: { confidence: 0.68 } },
        ],
    },
    {
        id: 'ds3', version_id: ML_VERSION_ID, company_name: 'OpenAI', role_title: 'Research Engineer',
        job_url: null, job_description: null, status: 'published', created_at: D(10),
        suggestions: [
            { id: 's5', target_path: 'summary[1]', operation: 'replace_text', proposed_text: 'ML engineer with track record in large-scale training infrastructure and RLHF pipelines.', rationale: 'OpenAI focus on RLHF', accepted: true, metadata_json: { confidence: 0.77 } },
        ],
    },
    {
        id: 'ds4', version_id: ML_VERSION_ID, company_name: 'Meta AI', role_title: 'ML Infrastructure Engineer',
        job_url: null, job_description: null, status: 'archived', created_at: D(22),
        suggestions: [
            { id: 's6', target_path: 'bullet[1]', operation: 'boost_keyword', proposed_text: 'PyTorch', rationale: 'Meta maintains PyTorch', accepted: true, metadata_json: { confidence: 0.55 } },
            { id: 's7', target_path: 'summary[1]', operation: 'suppress_block', proposed_text: null, rationale: 'Summary too generic', accepted: false, metadata_json: { confidence: 0.3 } },
        ],
    },
    {
        id: 'ds5', version_id: BACKEND_VERSION_ID, company_name: 'Stripe', role_title: 'Senior Backend Engineer',
        job_url: null, job_description: null, status: 'pending_review', created_at: D(8),
        suggestions: [
            { id: 's8', target_path: 'bullet[1]', operation: 'replace_text', proposed_text: 'Led migration to microservices achieving 99.99% uptime across Stripe-scale payment processing.', rationale: 'Emphasise reliability', accepted: true, metadata_json: { confidence: 0.79 } },
        ],
    },
    {
        id: 'ds6', version_id: BACKEND_VERSION_ID, company_name: 'Cloudflare', role_title: 'Staff Engineer',
        job_url: null, job_description: null, status: 'archived', created_at: D(20),
        suggestions: [
            { id: 's9', target_path: 'skills[1]', operation: 'boost_keyword', proposed_text: 'Rust', rationale: 'Cloudflare uses Rust', accepted: true, metadata_json: { confidence: 0.4 } },
        ],
    },
 ];
 export const DEMO_INSIGHTS: InsightsResult = {
    total_submissions: 6,
    positive_count: 4,
    positive_rate: 0.667,
    has_data: true,
    operation_impact: [
        { operation: 'replace_text', total: 5, positive: 4, rate: 0.8 },
        { operation: 'boost_keyword', total: 5, positive: 3, rate: 0.6 },
        { operation: 'suppress_block', total: 1, positive: 0, rate: 0.0 },
    ],
    top_positive_keywords: [
        { keyword: 'pytorch', positive_count: 4, negative_count: 1, lift: 4.0 },
        { keyword: 'distributed', positive_count: 3, negative_count: 0, lift: 3.0 },
        { keyword: 'pipeline', positive_count: 3, negative_count: 1, lift: 3.0 },
        { keyword: 'scale', positive_count: 3, negative_count: 1, lift: 3.0 },
        { keyword: 'reliability', positive_count: 2, negative_count: 0, lift: 2.0 },
        { keyword: 'inference', positive_count: 2, negative_count: 0, lift: 2.0 },
    ],
    top_negative_keywords: [
        { keyword: 'generic', positive_count: 0, negative_count: 2, lift: 0.0 },
        { keyword: 'suppress', positive_count: 0, negative_count: 1, lift: 0.0 },
    ],
    section_impact: [
        { section: 'summary', positive_rate: 0.83, count: 6 },
        { section: 'bullet', positive_rate: 0.75, count: 4 },
        { section: 'skills', positive_rate: 0.5, count: 4 },
    ],
 };
--- a/apps/webapp/src/app/dashboard/page.tsx
+++ b/apps/webapp/src/app/dashboard/page.tsx
@@ -3,12 +3,15 @@
 import { useEffect, useRef, useState } from 'react';
 import CVTree from '@/components/cv/CVTree';
 import DiffViewer from '@/components/cv/DiffViewer';
 import InsightsPanel from '@/components/cv/InsightsPanel';
 import Link from 'next/link';
 import {
    appendPatches,
    createBranch, createSubmission, deleteDocument, deleteVersion,
    Document, downloadVersionUrl,
-    fetchDocuments, fetchSubmissions, fetchPublicAssetAnalytics, getPublicPdfUrl,
+    fetchDocuments, fetchInsights, fetchSubmissions, fetchPublicAssetAnalytics, getPublicPdfUrl,
    InsightsResult,
    IS_DEMO,
    publishVersion, PublicAsset, PublicAssetAnalytics,
    requestAiSuggestions,
    Submission,
@@ -20,6 +23,9 @@ import {
    uploadDocument,
    Version,
 } from '@/libs/api';
 import {
    DEMO_DOCUMENTS, DEMO_DOC_ID, DEMO_INSIGHTS, DEMO_SUBMISSIONS,
 } from './demo-data';
 // ── helpers ───────────────────────────────────────────────────────────────────
@@ -548,7 +554,7 @@ function SubmissionsTab({
 // ── main dashboard ────────────────────────────────────────────────────────────
 type Modal = 'upload' | 'branch' | 'submission' | 'publish' | null;
-type Tab = 'content' | 'patches' | 'submissions';
+type Tab = 'content' | 'patches' | 'submissions' | 'insights';
 export default function Dashboard() {
    const [docs, setDocs] = useState<Document[]>([]);
@@ -568,8 +574,17 @@ export default function Dashboard() {
    const [docHovered, setDocHovered] = useState<string | null>(null);
    const [applyLoading, setApplyLoading] = useState(false);
    const [applyError, setApplyError] = useState('');
    const [insights, setInsights] = useState<InsightsResult | null>(null);
    useEffect(() => {
        if (IS_DEMO) {
            setDocs(DEMO_DOCUMENTS);
            setAllSubmissions(DEMO_SUBMISSIONS);
            setSelectedDocId(DEMO_DOC_ID);
            setInsights(DEMO_INSIGHTS);
            setLoading(false);
            return;
        }
        Promise.all([fetchDocuments(), fetchSubmissions().catch(() => [])])
            .then(([d, allSubs]) => {
                setDocs(d);
@@ -580,6 +595,11 @@ export default function Dashboard() {
            .finally(() => setLoading(false));
    }, []);
    useEffect(() => {
        if (IS_DEMO || !selectedDocId) return;
        fetchInsights().then(setInsights).catch(() => setInsights(null));
    }, [selectedDocId]);
    useEffect(() => {
        setPendingEdits(new Map());
        setApplyError('');
@@ -691,6 +711,7 @@ export default function Dashboard() {
    };
    const handleDeleteDoc = async (docId: string) => {
        if (IS_DEMO) return;
        if (!confirm('Delete this CV and all its branches? This cannot be undone.')) return;
        try {
            await deleteDocument(docId);
@@ -706,6 +727,7 @@ export default function Dashboard() {
    };
    const handleDeleteVersion = async (versionId: string) => {
        if (IS_DEMO) return;
        const hasChildren = selectedDoc?.versions.some(v => v.parent_version_id === versionId);
        const msg = hasChildren
            ? 'Delete this branch and all its sub-branches? This cannot be undone.'
@@ -758,12 +780,21 @@ export default function Dashboard() {
                    </Link>
                </div>
                <div style={{ display: 'flex', gap: 8, alignItems: 'center' }}>
                    {IS_DEMO && (
                        <span style={{ fontSize: 11, padding: '2px 10px', background: '#7c3aed', color: '#fff', borderRadius: 9999, fontWeight: 600, letterSpacing: '0.04em' }}>
                            DEMO
                        </span>
                    )}
                    {!IS_DEMO && (
                        <button className="btn btn-primary" style={{ padding: '4px 10px', fontSize: 12 }} onClick={() => setModal('upload')}>
                            + Upload CV
                        </button>
                    )}
                    {!IS_DEMO && (
                        <button className="btn btn-ghost" style={{ padding: '4px 10px', fontSize: 12 }} onClick={logout}>
                            Sign out
                        </button>
                    )}
                </div>
            </div>
@@ -900,6 +931,13 @@ export default function Dashboard() {
                                            onSelect={selectVersion}
                                        />
                                    </div>
                                    {insights?.has_data && (
                                        <div style={{ marginTop: 10 }}>
                                            <div className="label" style={{ marginBottom: 8 }}>NLP insights</div>
                                            <InsightsPanel data={insights} />
                                        </div>
                                    )}
                                </div>
                            ) : (
                                <div style={{ paddingTop: 60, textAlign: 'center', color: 'var(--text-faint)', fontSize: 13 }}>
@@ -938,10 +976,15 @@ export default function Dashboard() {
                                    {/* action buttons */}
                                    <div className="action-buttons">
-                                        <button className="btn btn-ghost" onClick={() => setModal('branch')}>Branch</button>
+                                        {!IS_DEMO && <button className="btn btn-ghost" onClick={() => setModal('branch')}>Branch</button>}
-                                        <button className="btn btn-ghost" onClick={() => { setModal('submission'); }}>Submit</button>
+                                        {!IS_DEMO && <button className="btn btn-ghost" onClick={() => { setModal('submission'); }}>Submit</button>}
-                                        <button className="btn btn-ghost" onClick={() => setModal('publish')}>Publish</button>
+                                        {!IS_DEMO && <button className="btn btn-ghost" onClick={() => setModal('publish')}>Publish</button>}
-                                        {selectedVersion.artifact_docx_key && selectedDoc && (
+                                        {IS_DEMO && (
                                            <a href="/demo-cv.docx" download="alex-rivera-cv.docx" className="btn btn-ghost">
                                                ↓ DOCX
                                            </a>
                                        )}
                                        {!IS_DEMO && selectedVersion.artifact_docx_key && selectedDoc && (
                                            <a href={downloadVersionUrl(selectedDoc.id, selectedVersion.id)} download className="btn btn-ghost">
                                                ↓ DOCX
                                            </a>
@@ -1044,7 +1087,7 @@ export default function Dashboard() {
                                {/* tabs */}
                                <div style={{ display: 'flex', gap: 0, borderBottom: '1px solid var(--border)', overflowX: 'auto' }}>
-                                    {(['content', 'patches', 'submissions'] as Tab[]).map(t => (
+                                    {(['content', 'patches', 'submissions', 'insights'] as Tab[]).map(t => (
                                        <button
                                            key={t}
                                            onClick={() => setActiveTab(t)}
@@ -1076,13 +1119,21 @@ export default function Dashboard() {
                                )}
                                {activeTab === 'submissions' && (
                                    <SubmissionsTab
-                                        submissions={submissions}
+                                        submissions={IS_DEMO
                                            ? DEMO_SUBMISSIONS.filter(s => {
                                                const doc = DEMO_DOCUMENTS.find(d => d.id === selectedDocId);
                                                return doc?.versions.some(v => v.id === s.version_id);
                                            })
                                            : submissions}
                                        loading={subsLoading}
-                                        onNewSubmission={() => setModal('submission')}
+                                        onNewSubmission={() => !IS_DEMO && setModal('submission')}
-                                        onRefresh={refreshSubs}
+                                        onRefresh={() => !IS_DEMO && refreshSubs()}
                                        onStatusChange={handleSubmissionStatusChange}
                                    />
                                )}
                                {activeTab === 'insights' && (
                                    <InsightsPanel data={insights} />
                                )}
                            </div>
                        </>
                    )}
--- a/apps/webapp/src/components/cv/InsightsPanel.tsx
+++ b/apps/webapp/src/components/cv/InsightsPanel.tsx
@@ -0,0 +1,134 @@
 'use client';
 import type { InsightsResult } from '@/libs/api';
 function Bar({ rate, positive }: { rate: number; positive?: boolean }) {
    return (
        <div style={{ flex: 1, height: 6, background: 'var(--border)', borderRadius: 3, overflow: 'hidden' }}>
            <div style={{
                width: `${Math.round(rate * 100)}%`,
                height: '100%',
                background: positive === false ? '#ef4444' : rate >= 0.6 ? '#22c55e' : rate >= 0.4 ? '#f59e0b' : '#94a3b8',
                borderRadius: 3,
                transition: 'width 0.3s',
            }} />
        </div>
    );
 }
 function Pct({ v }: { v: number }) {
    return <span style={{ fontVariantNumeric: 'tabular-nums', fontSize: 12, fontWeight: 600, color: v >= 0.6 ? '#16a34a' : v >= 0.4 ? '#d97706' : '#6b7280' }}>{Math.round(v * 100)}%</span>;
 }
 export default function InsightsPanel({ data }: { data: InsightsResult | null }) {
    if (!data) return (
        <div style={{ padding: '24px 0', color: 'var(--text-faint)', fontSize: 13, textAlign: 'center' }}>
            Loading insights…
        </div>
    );
    if (!data.has_data) return (
        <div style={{ padding: '24px 0', color: 'var(--text-faint)', fontSize: 13 }}>
            Not enough data yet. Submit applications and mark outcomes to unlock insights.
        </div>
    );
    return (
        <div style={{ display: 'flex', flexDirection: 'column', gap: 20 }}>
            {/* headline numbers */}
            <div style={{ display: 'grid', gridTemplateColumns: 'repeat(3, 1fr)', gap: 8 }}>
                {[
                    { label: 'Total submissions', value: data.total_submissions },
                    { label: 'Passed screening', value: data.positive_count },
                    { label: 'Screening rate', value: `${Math.round(data.positive_rate * 100)}%` },
                ].map(({ label, value }) => (
                    <div key={label} style={{ border: '1px solid var(--border)', borderRadius: 6, padding: '8px 10px', background: 'var(--surface)' }}>
                        <div className="label" style={{ marginBottom: 3 }}>{label}</div>
                        <div style={{ fontSize: 18, fontWeight: 600 }}>{value}</div>
                    </div>
                ))}
            </div>
            {/* operation impact */}
            {data.operation_impact.length > 0 && (
                <section>
                    <div className="label" style={{ marginBottom: 8 }}>Patch operation impact</div>
                    <div style={{ display: 'flex', flexDirection: 'column', gap: 6 }}>
                        {data.operation_impact.map(op => (
                            <div key={op.operation} style={{ display: 'flex', alignItems: 'center', gap: 10 }}>
                                <span style={{ fontFamily: 'var(--font-mono)', fontSize: 11, width: 140, flexShrink: 0, color: 'var(--text-muted)' }}>
                                    {op.operation}
                                </span>
                                <Bar rate={op.rate} />
                                <Pct v={op.rate} />
                                <span style={{ fontSize: 11, color: 'var(--text-faint)', width: 50, textAlign: 'right' }}>
                                    {op.positive}/{op.total}
                                </span>
                            </div>
                        ))}
                    </div>
                    <p style={{ fontSize: 11, color: 'var(--text-faint)', marginTop: 6 }}>
                        % of accepted patches of this type in submissions that passed screening.
                    </p>
                </section>
            )}
            {/* section impact */}
            {data.section_impact.length > 0 && (
                <section>
                    <div className="label" style={{ marginBottom: 8 }}>CV section impact</div>
                    <div style={{ display: 'flex', flexDirection: 'column', gap: 6 }}>
                        {data.section_impact.map(s => (
                            <div key={s.section} style={{ display: 'flex', alignItems: 'center', gap: 10 }}>
                                <span style={{ fontFamily: 'var(--font-mono)', fontSize: 11, width: 80, flexShrink: 0, color: 'var(--text-muted)' }}>
                                    {s.section}
                                </span>
                                <Bar rate={s.positive_rate} />
                                <Pct v={s.positive_rate} />
                                <span style={{ fontSize: 11, color: 'var(--text-faint)', width: 50, textAlign: 'right' }}>
                                    {s.count} edits
                                </span>
                            </div>
                        ))}
                    </div>
                </section>
            )}
            {/* keyword signals */}
            {(data.top_positive_keywords.length > 0 || data.top_negative_keywords.length > 0) && (
                <section>
                    <div className="label" style={{ marginBottom: 8 }}>Keyword signals</div>
                    <div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 12 }}>
                        <div>
                            <div style={{ fontSize: 11, color: '#16a34a', fontWeight: 600, marginBottom: 6 }}>Positive signals</div>
                            <div style={{ display: 'flex', flexDirection: 'column', gap: 4 }}>
                                {data.top_positive_keywords.map(k => (
                                    <div key={k.keyword} style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center' }}>
                                        <span style={{ fontSize: 12, fontFamily: 'var(--font-mono)', color: 'var(--text)' }}>{k.keyword}</span>
                                        <span style={{ fontSize: 11, color: '#16a34a' }}>+{k.positive_count} ({k.lift}×)</span>
                                    </div>
                                ))}
                            </div>
                        </div>
                        <div>
                            <div style={{ fontSize: 11, color: '#dc2626', fontWeight: 600, marginBottom: 6 }}>Negative signals</div>
                            <div style={{ display: 'flex', flexDirection: 'column', gap: 4 }}>
                                {data.top_negative_keywords.length === 0
                                    ? <span style={{ fontSize: 12, color: 'var(--text-faint)' }}>None yet</span>
                                    : data.top_negative_keywords.map(k => (
                                        <div key={k.keyword} style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center' }}>
                                            <span style={{ fontSize: 12, fontFamily: 'var(--font-mono)', color: 'var(--text)' }}>{k.keyword}</span>
                                            <span style={{ fontSize: 11, color: '#dc2626' }}>{k.negative_count}×</span>
                                        </div>
                                    ))}
                            </div>
                        </div>
                    </div>
                    <p style={{ fontSize: 11, color: 'var(--text-faint)', marginTop: 6 }}>
                        Keywords extracted from accepted AI suggestions, split by outcome.
                    </p>
                </section>
            )}
        </div>
    );
 }
--- a/apps/webapp/src/libs/api.ts
+++ b/apps/webapp/src/libs/api.ts
@@ -1,4 +1,5 @@
 const API = "";
 export const IS_DEMO = process.env.NEXT_PUBLIC_DEMO === 'true';
 export type StructuredBlock = {
    path: string;
@@ -87,6 +88,21 @@ export type PublicAssetAnalytics = {
    last_viewed_at?: string | null;
 };
 export type OperationImpact = { operation: string; total: number; positive: number; rate: number };
 export type KeywordSignal = { keyword: string; positive_count: number; negative_count: number; lift: number };
 export type SectionImpact = { section: string; positive_rate: number; count: number };
 export type InsightsResult = {
    total_submissions: number;
    positive_count: number;
    positive_rate: number;
    operation_impact: OperationImpact[];
    top_positive_keywords: KeywordSignal[];
    top_negative_keywords: KeywordSignal[];
    section_impact: SectionImpact[];
    has_data: boolean;
 };
 // reads OIDC bearer token from client-readable cookie (set by /api/auth/callback)
 function getAuthHeader(): Record<string, string> {
    if (typeof document === 'undefined') return {};
@@ -238,6 +254,9 @@ export async function deleteDocument(documentId: string): Promise<void> {
    }
 }
 export const fetchInsights = (): Promise<InsightsResult> =>
    req<InsightsResult>('/api/v1/insights');
 export async function deleteVersion(versionId: string): Promise<void> {
    const res = await fetch(`${API}/api/v1/versions/${versionId}`, {
        method: 'DELETE',
--- a/dlib/ai/insights.py
+++ b/dlib/ai/insights.py
@@ -0,0 +1,189 @@
 from __future__ import annotations
 import re
 from collections import Counter, defaultdict
 from dataclasses import dataclass, field
 from typing import Literal
 STOPWORDS = frozenset(
    "a an the and or but in on at to for of with is are was were be been have has"
    " had do does did this that these those it its i you he she we they their our"
    " your my his her from by into through about as so if then when where which who"
    " can will may should would could also just not no more some all any each"
    " than other up out off over how what new using use used with well per".split()
 )
 Outcome = Literal["positive", "negative"]  # positive = pending_review / published
@dataclass
 class SuggestionRecord:
    operation: str
    target_path: str
    proposed_text: str | None
    rationale: str | None
    accepted: bool | None
@dataclass
 class SubmissionRecord:
    status: str
    suggestions: list[SuggestionRecord] = field(default_factory=list)
@dataclass
 class OperationImpact:
    operation: str
    total: int
    positive: int
    rate: float
@dataclass
 class KeywordSignal:
    keyword: str
    positive_count: int
    negative_count: int
    lift: float  # positive_count / max(negative_count, 1)
@dataclass
 class SectionImpact:
    section: str
    positive_rate: float
    count: int
@dataclass
 class InsightsResult:
    total_submissions: int
    positive_count: int
    positive_rate: float
    operation_impact: list[OperationImpact]
    top_positive_keywords: list[KeywordSignal]
    top_negative_keywords: list[KeywordSignal]
    section_impact: list[SectionImpact]
    has_data: bool
 def _outcome(status: str) -> Outcome | None:
    if status in ("pending_review", "published"):
        return "positive"
    if status == "archived":
        return "negative"
    return None  # draft / tailoring — not enough signal
 def _tokens(text: str | None) -> list[str]:
    if not text:
        return []
    return [
        t for t in re.findall(r"[a-z][a-z0-9+.-]{1,}", text.lower())
        if t not in STOPWORDS and len(t) > 2
    ]
 def _section_prefix(path: str) -> str:
    """heading[1] -> heading, bullet[3] -> bullet, table[1].0-1 -> table"""
    return re.match(r"([a-z_]+)", path).group(1) if path else "unknown"
 def analyze(submissions: list[SubmissionRecord]) -> InsightsResult:
    labeled = [(s, _outcome(s.status)) for s in submissions]
    labeled_known = [(s, o) for s, o in labeled if o is not None]
    positive_count = sum(1 for _, o in labeled_known if o == "positive")
    # operation impact: only accepted suggestions in outcome-labeled submissions
    op_positive: Counter[str] = Counter()
    op_total: Counter[str] = Counter()
    for sub, outcome in labeled_known:
        for sug in sub.suggestions:
            if sug.accepted is not True:
                continue
            op_total[sug.operation] += 1
            if outcome == "positive":
                op_positive[sug.operation] += 1
    op_impact = sorted(
        [
            OperationImpact(
                operation=op,
                total=total,
                positive=op_positive[op],
                rate=round(op_positive[op] / total, 3),
            )
            for op, total in op_total.items()
        ],
        key=lambda x: x.rate,
        reverse=True,
    )
    # keyword signals from accepted-suggestion text in outcome-labeled submissions
    kw_pos: Counter[str] = Counter()
    kw_neg: Counter[str] = Counter()
    for sub, outcome in labeled_known:
        bucket = kw_pos if outcome == "positive" else kw_neg
        for sug in sub.suggestions:
            if sug.accepted is not True:
                continue
            for t in _tokens(sug.proposed_text) + _tokens(sug.rationale):
                bucket[t] += 1
    all_kws = set(kw_pos) | set(kw_neg)
    signals = [
        KeywordSignal(
            keyword=kw,
            positive_count=kw_pos[kw],
            negative_count=kw_neg[kw],
            lift=round(kw_pos[kw] / max(kw_neg[kw], 1), 2),
        )
        for kw in all_kws
        if kw_pos[kw] + kw_neg[kw] >= 2  # minimum support
    ]
    top_pos_kw = sorted(
        [s for s in signals if s.positive_count > 0],
        key=lambda s: (s.lift, s.positive_count),
        reverse=True,
    )[:8]
    top_neg_kw = sorted(
        [s for s in signals if s.negative_count > 0],
        key=lambda s: (s.negative_count, -s.lift),
        reverse=True,
    )[:8]
    # section impact: group target_path prefix by outcome
    sec_pos: Counter[str] = Counter()
    sec_total: Counter[str] = Counter()
    for sub, outcome in labeled_known:
        for sug in sub.suggestions:
            if sug.accepted is not True:
                continue
            sec = _section_prefix(sug.target_path)
            sec_total[sec] += 1
            if outcome == "positive":
                sec_pos[sec] += 1
    section_impact = sorted(
        [
            SectionImpact(
                section=sec,
                positive_rate=round(sec_pos[sec] / total, 3),
                count=total,
            )
            for sec, total in sec_total.items()
        ],
        key=lambda s: s.positive_rate,
        reverse=True,
    )
    return InsightsResult(
        total_submissions=len(submissions),
        positive_count=positive_count,
        positive_rate=round(positive_count / len(submissions), 3) if submissions else 0.0,
        operation_impact=op_impact,
        top_positive_keywords=top_pos_kw,
        top_negative_keywords=top_neg_kw,
        section_impact=section_impact,
        has_data=bool(labeled_known),
    )
--- a/scripts/gen_demo_cv.py
+++ b/scripts/gen_demo_cv.py
@@ -0,0 +1,87 @@
 """Generate the static demo CV DOCX used by DEMO mode in the webapp."""
 from __future__ import annotations
 import sys
 from pathlib import Path
 from docx import Document
 from docx.shared import Pt, RGBColor
 from docx.enum.text import WD_ALIGN_PARAGRAPH
 def add_heading(doc: Document, text: str, level: int = 1) -> None:
    p = doc.add_heading(text, level=level)
    p.alignment = WD_ALIGN_PARAGRAPH.LEFT
 def add_bullet(doc: Document, text: str) -> None:
    doc.add_paragraph(text, style="List Bullet")
 def build(path: Path) -> None:
    doc = Document()
    # Name / contact
    name_para = doc.add_paragraph()
    name_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
    run = name_para.add_run("Alex Rivera")
    run.bold = True
    run.font.size = Pt(18)
    contact = doc.add_paragraph()
    contact.alignment = WD_ALIGN_PARAGRAPH.CENTER
    contact.add_run("alex.rivera@email.com  ·  linkedin.com/in/alexrivera  ·  github.com/alexrivera")
    doc.add_paragraph()  # spacer
    # Summary
    add_heading(doc, "Summary", level=2)
    doc.add_paragraph(
        "Software engineer with 5 years of experience building distributed systems and "
        "machine learning pipelines at scale. Strong background in Python, Go, and cloud-native "
        "architectures. Passionate about developer tooling and open-source contribution."
    )
    # Experience
    add_heading(doc, "Experience", level=2)
    add_heading(doc, "Senior Software Engineer — Acme Corp", level=3)
    doc.add_paragraph("Jan 2022 – Present  ·  San Francisco, CA")
    add_bullet(doc, "Led migration of monolithic data pipeline to distributed microservices, reducing p99 latency by 40%.")
    add_bullet(doc, "Designed and shipped an internal feature flag system used by 50+ engineers across 3 teams.")
    add_bullet(doc, "Mentored 4 junior engineers and ran weekly technical design review sessions.")
    add_heading(doc, "Software Engineer — DataFlow Inc", level=3)
    doc.add_paragraph("Aug 2019 – Dec 2021  ·  Remote")
    add_bullet(doc, "Built real-time streaming ingestion system processing 2M events/day using Kafka and Flink.")
    add_bullet(doc, "Developed Python SDK for internal data platform, adopted by 8 product teams.")
    add_bullet(doc, "Contributed PyTorch-based anomaly detection model achieving 92% precision on production traffic.")
    # Education
    add_heading(doc, "Education", level=2)
    add_heading(doc, "B.S. Computer Science — State University", level=3)
    doc.add_paragraph("Graduated May 2019  ·  GPA 3.8 / 4.0")
    add_bullet(doc, "Senior thesis: Efficient approximate nearest-neighbour search for high-dimensional embeddings.")
    # Skills
    add_heading(doc, "Skills", level=2)
    skills_para = doc.add_paragraph()
    skills_para.add_run("Languages: ").bold = True
    skills_para.add_run("Python, Go, TypeScript, SQL")
    infra_para = doc.add_paragraph()
    infra_para.add_run("Infrastructure: ").bold = True
    infra_para.add_run("Kubernetes, AWS, GCP, Terraform, Docker")
    ml_para = doc.add_paragraph()
    ml_para.add_run("ML / Data: ").bold = True
    ml_para.add_run("PyTorch, scikit-learn, Spark, Kafka, dbt")
    doc.save(path)
    print(f"Saved demo CV to {path}")
 if __name__ == "__main__":
    out = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("apps/webapp/public/demo-cv.docx")
    out.parent.mkdir(parents=True, exist_ok=True)
    build(out)