diff --git a/.env.example b/.env.example index 48b9b6d..772b570 100644 --- a/.env.example +++ b/.env.example @@ -56,3 +56,8 @@ AUTHENTIK_CLIENT_SECRET= # ── AI tailoring (optional) ─────────────────────────────────────────────────── # Leave blank to use the built-in rule-based tailoring instead of Claude. ANTHROPIC_API_KEY= + +# ── Demo mode ───────────────────────────────────────────────────────────────── +# Set to true to enable standalone demo mode in the webapp. +# Demo mode uses hardcoded dummy data — no backend or DB required. +NEXT_PUBLIC_DEMO=false diff --git a/apps/backend/fastapi/app/api/router.py b/apps/backend/fastapi/app/api/router.py index dccc0ca..25ab068 100644 --- a/apps/backend/fastapi/app/api/router.py +++ b/apps/backend/fastapi/app/api/router.py @@ -2,10 +2,11 @@ from __future__ import annotations from fastapi import APIRouter -from app.api.routes import documents, versions, submissions, public +from app.api.routes import documents, insights, versions, submissions, public api_router = APIRouter() api_router.include_router(documents.router) api_router.include_router(versions.router) api_router.include_router(submissions.router) api_router.include_router(public.router) +api_router.include_router(insights.router) diff --git a/apps/backend/fastapi/app/api/routes/insights.py b/apps/backend/fastapi/app/api/routes/insights.py new file mode 100644 index 0000000..5cdae76 --- /dev/null +++ b/apps/backend/fastapi/app/api/routes/insights.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from fastapi import APIRouter, Depends +from sqlalchemy.ext.asyncio import AsyncSession + +from app.api.deps import get_current_user, get_db +from app.schemas.insights import InsightsResponse +from app.services.insights import get_insights +from dlib.auth import AuthenticatedUser + +router = APIRouter(prefix="/insights", tags=["insights"]) + + +@router.get("", response_model=InsightsResponse) +async def insights_endpoint( + session: AsyncSession = Depends(get_db), + user: AuthenticatedUser = Depends(get_current_user), +): + result = await get_insights(session, owner_id=user.sub) + return InsightsResponse( + total_submissions=result.total_submissions, + positive_count=result.positive_count, + positive_rate=result.positive_rate, + operation_impact=[ + {"operation": o.operation, "total": o.total, "positive": o.positive, "rate": o.rate} + for o in result.operation_impact + ], + top_positive_keywords=[ + {"keyword": k.keyword, "positive_count": k.positive_count, "negative_count": k.negative_count, "lift": k.lift} + for k in result.top_positive_keywords + ], + top_negative_keywords=[ + {"keyword": k.keyword, "positive_count": k.positive_count, "negative_count": k.negative_count, "lift": k.lift} + for k in result.top_negative_keywords + ], + section_impact=[ + {"section": s.section, "positive_rate": s.positive_rate, "count": s.count} + for s in result.section_impact + ], + has_data=result.has_data, + ) diff --git a/apps/backend/fastapi/app/schemas/insights.py b/apps/backend/fastapi/app/schemas/insights.py new file mode 100644 index 0000000..84eff18 --- /dev/null +++ b/apps/backend/fastapi/app/schemas/insights.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +from pydantic import BaseModel + + +class OperationImpactSchema(BaseModel): + operation: str + total: int + positive: int + rate: float + + +class KeywordSignalSchema(BaseModel): + keyword: str + positive_count: int + negative_count: int + lift: float + + +class SectionImpactSchema(BaseModel): + section: str + positive_rate: float + count: int + + +class InsightsResponse(BaseModel): + total_submissions: int + positive_count: int + positive_rate: float + operation_impact: list[OperationImpactSchema] + top_positive_keywords: list[KeywordSignalSchema] + top_negative_keywords: list[KeywordSignalSchema] + section_impact: list[SectionImpactSchema] + has_data: bool diff --git a/apps/backend/fastapi/app/services/insights.py b/apps/backend/fastapi/app/services/insights.py new file mode 100644 index 0000000..a876e78 --- /dev/null +++ b/apps/backend/fastapi/app/services/insights.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import selectinload + +from dlib.ai.insights import InsightsResult, SubmissionRecord, SuggestionRecord, analyze +from app.models import AiSuggestion, CvDocument, CvVersion, Submission + + +async def get_insights(session: AsyncSession, *, owner_id: str) -> InsightsResult: + stmt = ( + select(Submission) + .join(Submission.version) + .join(CvVersion.document) + .where(CvDocument.owner_id == owner_id) + .options(selectinload(Submission.suggestions)) + ) + rows = list((await session.execute(stmt)).scalars().all()) + + records = [ + SubmissionRecord( + status=s.status.value, + suggestions=[ + SuggestionRecord( + operation=sug.operation, + target_path=sug.target_path, + proposed_text=sug.proposed_text, + rationale=sug.rationale, + accepted=sug.accepted, + ) + for sug in s.suggestions + ], + ) + for s in rows + ] + return analyze(records) diff --git a/apps/webapp/public/demo-cv.docx b/apps/webapp/public/demo-cv.docx new file mode 100644 index 0000000..e32e6a0 Binary files /dev/null and b/apps/webapp/public/demo-cv.docx differ diff --git a/apps/webapp/src/app/dashboard/demo-data.ts b/apps/webapp/src/app/dashboard/demo-data.ts new file mode 100644 index 0000000..b038e57 --- /dev/null +++ b/apps/webapp/src/app/dashboard/demo-data.ts @@ -0,0 +1,165 @@ +import type { Document, Submission, InsightsResult } from '@/libs/api'; + +const NOW = new Date().toISOString(); +const D = (daysAgo: number) => new Date(Date.now() - daysAgo * 86_400_000).toISOString(); + +const ROOT_VERSION_ID = 'demo-v1'; +const ML_VERSION_ID = 'demo-v2'; +const BACKEND_VERSION_ID = 'demo-v3'; +export const DEMO_DOC_ID = 'demo-doc-1'; + +export const DEMO_DOCUMENTS: Document[] = [ + { + id: DEMO_DOC_ID, + title: 'Alex Rivera — Software Engineer', + description: 'Main CV, ATS-safe baseline', + owner_id: 'demo-user', + root_version_id: ROOT_VERSION_ID, + created_at: D(45), + updated_at: D(3), + versions: [ + { + id: ROOT_VERSION_ID, + branch_name: 'root', + version_label: 'v1.0 baseline', + parent_version_id: null, + structured_blocks: [ + { path: 'heading[1]', block_type: 'heading', text: 'Alex Rivera', keywords: [] }, + { path: 'summary[1]', block_type: 'summary', text: 'Software engineer with 5 years of experience building distributed systems and ML pipelines at scale.', keywords: ['distributed', 'systems', 'machine', 'learning'] }, + { path: 'heading[2]', block_type: 'heading', text: 'Experience', keywords: [] }, + { path: 'bullet[1]', block_type: 'bullet', text: 'Led migration of monolithic data pipeline to distributed microservices, reducing p99 latency by 40%.', keywords: ['distributed', 'microservices', 'latency', 'pipeline'] }, + { path: 'bullet[2]', block_type: 'bullet', text: 'Designed feature flag system used by 50+ engineers across 3 teams.', keywords: ['system', 'design', 'engineers'] }, + { path: 'heading[3]', block_type: 'heading', text: 'Skills', keywords: [] }, + { path: 'skills[1]', block_type: 'skills', text: 'Python, Go, TypeScript, SQL, Kubernetes, AWS, PyTorch', keywords: ['python', 'go', 'typescript', 'pytorch', 'kubernetes'] }, + ], + artifact_docx_key: 'demo/demo-cv.docx', + patches: [], + public_assets: [], + created_at: D(45), + updated_at: D(45), + }, + { + id: ML_VERSION_ID, + branch_name: 'ml-engineer', + version_label: 'ML-focused variant', + parent_version_id: ROOT_VERSION_ID, + structured_blocks: [ + { path: 'heading[1]', block_type: 'heading', text: 'Alex Rivera', keywords: [] }, + { path: 'summary[1]', block_type: 'summary', text: 'ML engineer specialising in large-scale PyTorch training pipelines, distributed inference, and production-grade MLOps.', keywords: ['pytorch', 'distributed', 'mlops', 'inference'] }, + { path: 'heading[2]', block_type: 'heading', text: 'Experience', keywords: [] }, + { path: 'bullet[1]', block_type: 'bullet', text: 'Contributed PyTorch anomaly detection model achieving 92% precision on production traffic at 2M events/day.', keywords: ['pytorch', 'machine learning', 'production', 'precision'] }, + { path: 'bullet[2]', block_type: 'bullet', text: 'Built streaming data ingestion system (Kafka + Flink) powering real-time ML feature store.', keywords: ['kafka', 'flink', 'streaming', 'feature store'] }, + { path: 'heading[3]', block_type: 'heading', text: 'Skills', keywords: [] }, + { path: 'skills[1]', block_type: 'skills', text: 'PyTorch, Python, Go, Kubernetes, Spark, dbt, AWS SageMaker', keywords: ['pytorch', 'python', 'kubernetes', 'spark', 'sagemaker'] }, + ], + artifact_docx_key: 'demo/demo-cv.docx', + patches: [ + { id: 'dp1', target_path: 'summary[1]', operation: 'replace_text', old_value: 'Software engineer…', new_value: 'ML engineer specialising…', created_at: D(30) }, + { id: 'dp2', target_path: 'skills[1]', operation: 'boost_keyword', old_value: null, new_value: 'PyTorch', created_at: D(30) }, + ], + public_assets: [{ + id: 'demo-asset-1', slug: 'alex-ml', artifact_key: 'public/alex-ml.docx', + is_public: true, url: '/demo-cv.docx', version_id: ML_VERSION_ID, submission_id: null, created_at: D(20), + }], + created_at: D(30), + updated_at: D(3), + }, + { + id: BACKEND_VERSION_ID, + branch_name: 'backend-engineer', + version_label: 'Backend-focused variant', + parent_version_id: ROOT_VERSION_ID, + structured_blocks: [ + { path: 'heading[1]', block_type: 'heading', text: 'Alex Rivera', keywords: [] }, + { path: 'summary[1]', block_type: 'summary', text: 'Backend engineer focused on high-throughput API design, distributed systems, and reliability engineering.', keywords: ['backend', 'api', 'distributed', 'reliability'] }, + { path: 'bullet[1]', block_type: 'bullet', text: 'Led migration to microservices, reducing p99 latency by 40% under 10k RPS sustained load.', keywords: ['microservices', 'latency', 'rps', 'distributed'] }, + { path: 'skills[1]', block_type: 'skills', text: 'Go, Python, PostgreSQL, Redis, gRPC, Kubernetes, AWS', keywords: ['go', 'postgresql', 'redis', 'grpc', 'kubernetes'] }, + ], + artifact_docx_key: 'demo/demo-cv.docx', + patches: [ + { id: 'dp3', target_path: 'summary[1]', operation: 'replace_text', old_value: 'Software engineer…', new_value: 'Backend engineer…', created_at: D(25) }, + ], + public_assets: [], + created_at: D(25), + updated_at: D(10), + }, + ], + }, +]; + +export const DEMO_SUBMISSIONS: Submission[] = [ + { + id: 'ds1', version_id: ML_VERSION_ID, company_name: 'Anthropic', role_title: 'ML Research Engineer', + job_url: null, job_description: null, status: 'pending_review', created_at: D(18), + suggestions: [ + { id: 's1', target_path: 'summary[1]', operation: 'boost_keyword', proposed_text: 'constitutional ai', rationale: 'Highlight alignment research experience', accepted: true, metadata_json: { confidence: 0.82 } }, + { id: 's2', target_path: 'bullet[1]', operation: 'replace_text', proposed_text: 'Built distributed PyTorch training pipeline handling constitutional AI fine-tuning at scale.', rationale: 'Align with Anthropic stack', accepted: true, metadata_json: { confidence: 0.74 } }, + ], + }, + { + id: 'ds2', version_id: ML_VERSION_ID, company_name: 'Google DeepMind', role_title: 'Senior ML Engineer', + job_url: null, job_description: null, status: 'pending_review', created_at: D(14), + suggestions: [ + { id: 's3', target_path: 'skills[1]', operation: 'boost_keyword', proposed_text: 'JAX', rationale: 'DeepMind uses JAX heavily', accepted: true, metadata_json: { confidence: 0.71 } }, + { id: 's4', target_path: 'bullet[2]', operation: 'replace_text', proposed_text: 'Built large-scale streaming pipeline underpinning real-time feature store for JAX model serving.', rationale: 'Add JAX context', accepted: true, metadata_json: { confidence: 0.68 } }, + ], + }, + { + id: 'ds3', version_id: ML_VERSION_ID, company_name: 'OpenAI', role_title: 'Research Engineer', + job_url: null, job_description: null, status: 'published', created_at: D(10), + suggestions: [ + { id: 's5', target_path: 'summary[1]', operation: 'replace_text', proposed_text: 'ML engineer with track record in large-scale training infrastructure and RLHF pipelines.', rationale: 'OpenAI focus on RLHF', accepted: true, metadata_json: { confidence: 0.77 } }, + ], + }, + { + id: 'ds4', version_id: ML_VERSION_ID, company_name: 'Meta AI', role_title: 'ML Infrastructure Engineer', + job_url: null, job_description: null, status: 'archived', created_at: D(22), + suggestions: [ + { id: 's6', target_path: 'bullet[1]', operation: 'boost_keyword', proposed_text: 'PyTorch', rationale: 'Meta maintains PyTorch', accepted: true, metadata_json: { confidence: 0.55 } }, + { id: 's7', target_path: 'summary[1]', operation: 'suppress_block', proposed_text: null, rationale: 'Summary too generic', accepted: false, metadata_json: { confidence: 0.3 } }, + ], + }, + { + id: 'ds5', version_id: BACKEND_VERSION_ID, company_name: 'Stripe', role_title: 'Senior Backend Engineer', + job_url: null, job_description: null, status: 'pending_review', created_at: D(8), + suggestions: [ + { id: 's8', target_path: 'bullet[1]', operation: 'replace_text', proposed_text: 'Led migration to microservices achieving 99.99% uptime across Stripe-scale payment processing.', rationale: 'Emphasise reliability', accepted: true, metadata_json: { confidence: 0.79 } }, + ], + }, + { + id: 'ds6', version_id: BACKEND_VERSION_ID, company_name: 'Cloudflare', role_title: 'Staff Engineer', + job_url: null, job_description: null, status: 'archived', created_at: D(20), + suggestions: [ + { id: 's9', target_path: 'skills[1]', operation: 'boost_keyword', proposed_text: 'Rust', rationale: 'Cloudflare uses Rust', accepted: true, metadata_json: { confidence: 0.4 } }, + ], + }, +]; + +export const DEMO_INSIGHTS: InsightsResult = { + total_submissions: 6, + positive_count: 4, + positive_rate: 0.667, + has_data: true, + operation_impact: [ + { operation: 'replace_text', total: 5, positive: 4, rate: 0.8 }, + { operation: 'boost_keyword', total: 5, positive: 3, rate: 0.6 }, + { operation: 'suppress_block', total: 1, positive: 0, rate: 0.0 }, + ], + top_positive_keywords: [ + { keyword: 'pytorch', positive_count: 4, negative_count: 1, lift: 4.0 }, + { keyword: 'distributed', positive_count: 3, negative_count: 0, lift: 3.0 }, + { keyword: 'pipeline', positive_count: 3, negative_count: 1, lift: 3.0 }, + { keyword: 'scale', positive_count: 3, negative_count: 1, lift: 3.0 }, + { keyword: 'reliability', positive_count: 2, negative_count: 0, lift: 2.0 }, + { keyword: 'inference', positive_count: 2, negative_count: 0, lift: 2.0 }, + ], + top_negative_keywords: [ + { keyword: 'generic', positive_count: 0, negative_count: 2, lift: 0.0 }, + { keyword: 'suppress', positive_count: 0, negative_count: 1, lift: 0.0 }, + ], + section_impact: [ + { section: 'summary', positive_rate: 0.83, count: 6 }, + { section: 'bullet', positive_rate: 0.75, count: 4 }, + { section: 'skills', positive_rate: 0.5, count: 4 }, + ], +}; diff --git a/apps/webapp/src/app/dashboard/page.tsx b/apps/webapp/src/app/dashboard/page.tsx index 7054698..a4c26ab 100644 --- a/apps/webapp/src/app/dashboard/page.tsx +++ b/apps/webapp/src/app/dashboard/page.tsx @@ -3,12 +3,15 @@ import { useEffect, useRef, useState } from 'react'; import CVTree from '@/components/cv/CVTree'; import DiffViewer from '@/components/cv/DiffViewer'; +import InsightsPanel from '@/components/cv/InsightsPanel'; import Link from 'next/link'; import { appendPatches, createBranch, createSubmission, deleteDocument, deleteVersion, Document, downloadVersionUrl, - fetchDocuments, fetchSubmissions, fetchPublicAssetAnalytics, getPublicPdfUrl, + fetchDocuments, fetchInsights, fetchSubmissions, fetchPublicAssetAnalytics, getPublicPdfUrl, + InsightsResult, + IS_DEMO, publishVersion, PublicAsset, PublicAssetAnalytics, requestAiSuggestions, Submission, @@ -20,6 +23,9 @@ import { uploadDocument, Version, } from '@/libs/api'; +import { + DEMO_DOCUMENTS, DEMO_DOC_ID, DEMO_INSIGHTS, DEMO_SUBMISSIONS, +} from './demo-data'; // ── helpers ─────────────────────────────────────────────────────────────────── @@ -548,7 +554,7 @@ function SubmissionsTab({ // ── main dashboard ──────────────────────────────────────────────────────────── type Modal = 'upload' | 'branch' | 'submission' | 'publish' | null; -type Tab = 'content' | 'patches' | 'submissions'; +type Tab = 'content' | 'patches' | 'submissions' | 'insights'; export default function Dashboard() { const [docs, setDocs] = useState([]); @@ -568,8 +574,17 @@ export default function Dashboard() { const [docHovered, setDocHovered] = useState(null); const [applyLoading, setApplyLoading] = useState(false); const [applyError, setApplyError] = useState(''); + const [insights, setInsights] = useState(null); useEffect(() => { + if (IS_DEMO) { + setDocs(DEMO_DOCUMENTS); + setAllSubmissions(DEMO_SUBMISSIONS); + setSelectedDocId(DEMO_DOC_ID); + setInsights(DEMO_INSIGHTS); + setLoading(false); + return; + } Promise.all([fetchDocuments(), fetchSubmissions().catch(() => [])]) .then(([d, allSubs]) => { setDocs(d); @@ -580,6 +595,11 @@ export default function Dashboard() { .finally(() => setLoading(false)); }, []); + useEffect(() => { + if (IS_DEMO || !selectedDocId) return; + fetchInsights().then(setInsights).catch(() => setInsights(null)); + }, [selectedDocId]); + useEffect(() => { setPendingEdits(new Map()); setApplyError(''); @@ -691,6 +711,7 @@ export default function Dashboard() { }; const handleDeleteDoc = async (docId: string) => { + if (IS_DEMO) return; if (!confirm('Delete this CV and all its branches? This cannot be undone.')) return; try { await deleteDocument(docId); @@ -706,6 +727,7 @@ export default function Dashboard() { }; const handleDeleteVersion = async (versionId: string) => { + if (IS_DEMO) return; const hasChildren = selectedDoc?.versions.some(v => v.parent_version_id === versionId); const msg = hasChildren ? 'Delete this branch and all its sub-branches? This cannot be undone.' @@ -758,12 +780,21 @@ export default function Dashboard() {
- - + {IS_DEMO && ( + + DEMO + + )} + {!IS_DEMO && ( + + )} + {!IS_DEMO && ( + + )}
@@ -900,6 +931,13 @@ export default function Dashboard() { onSelect={selectVersion} /> + + {insights?.has_data && ( +
+
NLP insights
+ +
+ )} ) : (
@@ -938,10 +976,15 @@ export default function Dashboard() { {/* action buttons */}
- - - - {selectedVersion.artifact_docx_key && selectedDoc && ( + {!IS_DEMO && } + {!IS_DEMO && } + {!IS_DEMO && } + {IS_DEMO && ( + + ↓ DOCX + + )} + {!IS_DEMO && selectedVersion.artifact_docx_key && selectedDoc && ( ↓ DOCX @@ -1044,7 +1087,7 @@ export default function Dashboard() { {/* tabs */}
- {(['content', 'patches', 'submissions'] as Tab[]).map(t => ( + {(['content', 'patches', 'submissions', 'insights'] as Tab[]).map(t => (
)} diff --git a/apps/webapp/src/components/cv/InsightsPanel.tsx b/apps/webapp/src/components/cv/InsightsPanel.tsx new file mode 100644 index 0000000..bbdcd8c --- /dev/null +++ b/apps/webapp/src/components/cv/InsightsPanel.tsx @@ -0,0 +1,134 @@ +'use client'; + +import type { InsightsResult } from '@/libs/api'; + +function Bar({ rate, positive }: { rate: number; positive?: boolean }) { + return ( +
+
= 0.6 ? '#22c55e' : rate >= 0.4 ? '#f59e0b' : '#94a3b8', + borderRadius: 3, + transition: 'width 0.3s', + }} /> +
+ ); +} + +function Pct({ v }: { v: number }) { + return = 0.6 ? '#16a34a' : v >= 0.4 ? '#d97706' : '#6b7280' }}>{Math.round(v * 100)}%; +} + +export default function InsightsPanel({ data }: { data: InsightsResult | null }) { + if (!data) return ( +
+ Loading insights… +
+ ); + + if (!data.has_data) return ( +
+ Not enough data yet. Submit applications and mark outcomes to unlock insights. +
+ ); + + return ( +
+ {/* headline numbers */} +
+ {[ + { label: 'Total submissions', value: data.total_submissions }, + { label: 'Passed screening', value: data.positive_count }, + { label: 'Screening rate', value: `${Math.round(data.positive_rate * 100)}%` }, + ].map(({ label, value }) => ( +
+
{label}
+
{value}
+
+ ))} +
+ + {/* operation impact */} + {data.operation_impact.length > 0 && ( +
+
Patch operation impact
+
+ {data.operation_impact.map(op => ( +
+ + {op.operation} + + + + + {op.positive}/{op.total} + +
+ ))} +
+

+ % of accepted patches of this type in submissions that passed screening. +

+
+ )} + + {/* section impact */} + {data.section_impact.length > 0 && ( +
+
CV section impact
+
+ {data.section_impact.map(s => ( +
+ + {s.section} + + + + + {s.count} edits + +
+ ))} +
+
+ )} + + {/* keyword signals */} + {(data.top_positive_keywords.length > 0 || data.top_negative_keywords.length > 0) && ( +
+
Keyword signals
+
+
+
Positive signals
+
+ {data.top_positive_keywords.map(k => ( +
+ {k.keyword} + +{k.positive_count} ({k.lift}×) +
+ ))} +
+
+
+
Negative signals
+
+ {data.top_negative_keywords.length === 0 + ? None yet + : data.top_negative_keywords.map(k => ( +
+ {k.keyword} + {k.negative_count}× +
+ ))} +
+
+
+

+ Keywords extracted from accepted AI suggestions, split by outcome. +

+
+ )} +
+ ); +} diff --git a/apps/webapp/src/libs/api.ts b/apps/webapp/src/libs/api.ts index 736f4a9..6260fe8 100644 --- a/apps/webapp/src/libs/api.ts +++ b/apps/webapp/src/libs/api.ts @@ -1,4 +1,5 @@ const API = ""; +export const IS_DEMO = process.env.NEXT_PUBLIC_DEMO === 'true'; export type StructuredBlock = { path: string; @@ -87,6 +88,21 @@ export type PublicAssetAnalytics = { last_viewed_at?: string | null; }; +export type OperationImpact = { operation: string; total: number; positive: number; rate: number }; +export type KeywordSignal = { keyword: string; positive_count: number; negative_count: number; lift: number }; +export type SectionImpact = { section: string; positive_rate: number; count: number }; + +export type InsightsResult = { + total_submissions: number; + positive_count: number; + positive_rate: number; + operation_impact: OperationImpact[]; + top_positive_keywords: KeywordSignal[]; + top_negative_keywords: KeywordSignal[]; + section_impact: SectionImpact[]; + has_data: boolean; +}; + // reads OIDC bearer token from client-readable cookie (set by /api/auth/callback) function getAuthHeader(): Record { if (typeof document === 'undefined') return {}; @@ -238,6 +254,9 @@ export async function deleteDocument(documentId: string): Promise { } } +export const fetchInsights = (): Promise => + req('/api/v1/insights'); + export async function deleteVersion(versionId: string): Promise { const res = await fetch(`${API}/api/v1/versions/${versionId}`, { method: 'DELETE', diff --git a/dlib/ai/insights.py b/dlib/ai/insights.py new file mode 100644 index 0000000..db87003 --- /dev/null +++ b/dlib/ai/insights.py @@ -0,0 +1,189 @@ +from __future__ import annotations + +import re +from collections import Counter, defaultdict +from dataclasses import dataclass, field +from typing import Literal + +STOPWORDS = frozenset( + "a an the and or but in on at to for of with is are was were be been have has" + " had do does did this that these those it its i you he she we they their our" + " your my his her from by into through about as so if then when where which who" + " can will may should would could also just not no more some all any each" + " than other up out off over how what new using use used with well per".split() +) + +Outcome = Literal["positive", "negative"] # positive = pending_review / published + + +@dataclass +class SuggestionRecord: + operation: str + target_path: str + proposed_text: str | None + rationale: str | None + accepted: bool | None + + +@dataclass +class SubmissionRecord: + status: str + suggestions: list[SuggestionRecord] = field(default_factory=list) + + +@dataclass +class OperationImpact: + operation: str + total: int + positive: int + rate: float + + +@dataclass +class KeywordSignal: + keyword: str + positive_count: int + negative_count: int + lift: float # positive_count / max(negative_count, 1) + + +@dataclass +class SectionImpact: + section: str + positive_rate: float + count: int + + +@dataclass +class InsightsResult: + total_submissions: int + positive_count: int + positive_rate: float + operation_impact: list[OperationImpact] + top_positive_keywords: list[KeywordSignal] + top_negative_keywords: list[KeywordSignal] + section_impact: list[SectionImpact] + has_data: bool + + +def _outcome(status: str) -> Outcome | None: + if status in ("pending_review", "published"): + return "positive" + if status == "archived": + return "negative" + return None # draft / tailoring — not enough signal + + +def _tokens(text: str | None) -> list[str]: + if not text: + return [] + return [ + t for t in re.findall(r"[a-z][a-z0-9+.-]{1,}", text.lower()) + if t not in STOPWORDS and len(t) > 2 + ] + + +def _section_prefix(path: str) -> str: + """heading[1] -> heading, bullet[3] -> bullet, table[1].0-1 -> table""" + return re.match(r"([a-z_]+)", path).group(1) if path else "unknown" + + +def analyze(submissions: list[SubmissionRecord]) -> InsightsResult: + labeled = [(s, _outcome(s.status)) for s in submissions] + labeled_known = [(s, o) for s, o in labeled if o is not None] + + positive_count = sum(1 for _, o in labeled_known if o == "positive") + + # operation impact: only accepted suggestions in outcome-labeled submissions + op_positive: Counter[str] = Counter() + op_total: Counter[str] = Counter() + for sub, outcome in labeled_known: + for sug in sub.suggestions: + if sug.accepted is not True: + continue + op_total[sug.operation] += 1 + if outcome == "positive": + op_positive[sug.operation] += 1 + + op_impact = sorted( + [ + OperationImpact( + operation=op, + total=total, + positive=op_positive[op], + rate=round(op_positive[op] / total, 3), + ) + for op, total in op_total.items() + ], + key=lambda x: x.rate, + reverse=True, + ) + + # keyword signals from accepted-suggestion text in outcome-labeled submissions + kw_pos: Counter[str] = Counter() + kw_neg: Counter[str] = Counter() + for sub, outcome in labeled_known: + bucket = kw_pos if outcome == "positive" else kw_neg + for sug in sub.suggestions: + if sug.accepted is not True: + continue + for t in _tokens(sug.proposed_text) + _tokens(sug.rationale): + bucket[t] += 1 + + all_kws = set(kw_pos) | set(kw_neg) + signals = [ + KeywordSignal( + keyword=kw, + positive_count=kw_pos[kw], + negative_count=kw_neg[kw], + lift=round(kw_pos[kw] / max(kw_neg[kw], 1), 2), + ) + for kw in all_kws + if kw_pos[kw] + kw_neg[kw] >= 2 # minimum support + ] + top_pos_kw = sorted( + [s for s in signals if s.positive_count > 0], + key=lambda s: (s.lift, s.positive_count), + reverse=True, + )[:8] + top_neg_kw = sorted( + [s for s in signals if s.negative_count > 0], + key=lambda s: (s.negative_count, -s.lift), + reverse=True, + )[:8] + + # section impact: group target_path prefix by outcome + sec_pos: Counter[str] = Counter() + sec_total: Counter[str] = Counter() + for sub, outcome in labeled_known: + for sug in sub.suggestions: + if sug.accepted is not True: + continue + sec = _section_prefix(sug.target_path) + sec_total[sec] += 1 + if outcome == "positive": + sec_pos[sec] += 1 + + section_impact = sorted( + [ + SectionImpact( + section=sec, + positive_rate=round(sec_pos[sec] / total, 3), + count=total, + ) + for sec, total in sec_total.items() + ], + key=lambda s: s.positive_rate, + reverse=True, + ) + + return InsightsResult( + total_submissions=len(submissions), + positive_count=positive_count, + positive_rate=round(positive_count / len(submissions), 3) if submissions else 0.0, + operation_impact=op_impact, + top_positive_keywords=top_pos_kw, + top_negative_keywords=top_neg_kw, + section_impact=section_impact, + has_data=bool(labeled_known), + ) diff --git a/scripts/gen_demo_cv.py b/scripts/gen_demo_cv.py new file mode 100644 index 0000000..fbb055d --- /dev/null +++ b/scripts/gen_demo_cv.py @@ -0,0 +1,87 @@ +"""Generate the static demo CV DOCX used by DEMO mode in the webapp.""" +from __future__ import annotations + +import sys +from pathlib import Path + +from docx import Document +from docx.shared import Pt, RGBColor +from docx.enum.text import WD_ALIGN_PARAGRAPH + + +def add_heading(doc: Document, text: str, level: int = 1) -> None: + p = doc.add_heading(text, level=level) + p.alignment = WD_ALIGN_PARAGRAPH.LEFT + + +def add_bullet(doc: Document, text: str) -> None: + doc.add_paragraph(text, style="List Bullet") + + +def build(path: Path) -> None: + doc = Document() + + # Name / contact + name_para = doc.add_paragraph() + name_para.alignment = WD_ALIGN_PARAGRAPH.CENTER + run = name_para.add_run("Alex Rivera") + run.bold = True + run.font.size = Pt(18) + + contact = doc.add_paragraph() + contact.alignment = WD_ALIGN_PARAGRAPH.CENTER + contact.add_run("alex.rivera@email.com · linkedin.com/in/alexrivera · github.com/alexrivera") + + doc.add_paragraph() # spacer + + # Summary + add_heading(doc, "Summary", level=2) + doc.add_paragraph( + "Software engineer with 5 years of experience building distributed systems and " + "machine learning pipelines at scale. Strong background in Python, Go, and cloud-native " + "architectures. Passionate about developer tooling and open-source contribution." + ) + + # Experience + add_heading(doc, "Experience", level=2) + + add_heading(doc, "Senior Software Engineer — Acme Corp", level=3) + doc.add_paragraph("Jan 2022 – Present · San Francisco, CA") + add_bullet(doc, "Led migration of monolithic data pipeline to distributed microservices, reducing p99 latency by 40%.") + add_bullet(doc, "Designed and shipped an internal feature flag system used by 50+ engineers across 3 teams.") + add_bullet(doc, "Mentored 4 junior engineers and ran weekly technical design review sessions.") + + add_heading(doc, "Software Engineer — DataFlow Inc", level=3) + doc.add_paragraph("Aug 2019 – Dec 2021 · Remote") + add_bullet(doc, "Built real-time streaming ingestion system processing 2M events/day using Kafka and Flink.") + add_bullet(doc, "Developed Python SDK for internal data platform, adopted by 8 product teams.") + add_bullet(doc, "Contributed PyTorch-based anomaly detection model achieving 92% precision on production traffic.") + + # Education + add_heading(doc, "Education", level=2) + add_heading(doc, "B.S. Computer Science — State University", level=3) + doc.add_paragraph("Graduated May 2019 · GPA 3.8 / 4.0") + add_bullet(doc, "Senior thesis: Efficient approximate nearest-neighbour search for high-dimensional embeddings.") + + # Skills + add_heading(doc, "Skills", level=2) + skills_para = doc.add_paragraph() + skills_para.add_run("Languages: ").bold = True + skills_para.add_run("Python, Go, TypeScript, SQL") + + infra_para = doc.add_paragraph() + infra_para.add_run("Infrastructure: ").bold = True + infra_para.add_run("Kubernetes, AWS, GCP, Terraform, Docker") + + ml_para = doc.add_paragraph() + ml_para.add_run("ML / Data: ").bold = True + ml_para.add_run("PyTorch, scikit-learn, Spark, Kafka, dbt") + + doc.save(path) + print(f"Saved demo CV to {path}") + + +if __name__ == "__main__": + out = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("apps/webapp/public/demo-cv.docx") + out.parent.mkdir(parents=True, exist_ok=True) + build(out)