Coverage for src / documint_mcp / repository.py: 0%
1194 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-30 22:30 -0400
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-30 22:30 -0400
1"""Persistent repository-backed services for Documint V1."""
3from __future__ import annotations
5import glob
6import hashlib
7import re
8import secrets
9import shutil
10import subprocess
11from collections.abc import Iterable
12from dataclasses import dataclass
13from datetime import UTC, datetime
14from pathlib import Path
15from typing import Any, cast
16from uuid import uuid4
18import httpx
19import structlog
20from sqlalchemy import delete, desc, select
21from sqlalchemy.orm import Session
23logger = structlog.get_logger(__name__)
25from .config import settings
26from .rag import get_rag
27from .db import (
28 ActivityEventRecord,
29 AgentRunRecord,
30 ApiTokenRecord,
31 ArtifactDefinitionRecord,
32 ArtifactTraceRecord,
33 BackgroundJobRecord,
34 DocPatchRecord,
35 DriftFindingRecord,
36 GitHubInstallationRecord,
37 GitHubRepositoryRecord,
38 GitHubWebhookDeliveryRecord,
39 ProjectRecord,
40 ProjectSettingsRecord,
41 PublishDeploymentRecord,
42 PublishedPageRecord,
43 PullRequestRecord,
44 RepositorySourceRecord,
45 SourceSignalRecord,
46 UserRecord,
47 VerificationRunRecord,
48 WorkspaceMemberRecord,
49 WorkspaceRecord,
50 init_db,
51 reset_db,
52 session_scope,
53)
54from .models import (
55 ActivityEvent,
56 ApiTokenSummary,
57 ArtifactTrace,
58 ArtifactType,
59 AuthenticatedUser,
60 CLIExchangeRequest,
61 DocPatch,
62 DriftFinding,
63 DriftJobRequest,
64 FindingSeverity,
65 GitHubInstallation,
66 GitHubRepository,
67 JobStatus,
68 PatchCitation,
69 Project,
70 ProjectCreateRequest,
71 ProjectSettings,
72 ProjectSnapshot,
73 PublicDocPage,
74 PublishDeployment,
75 PublishedPage,
76 PullRequestCreateRequest,
77 QueuedJob,
78 RepositoryRevision,
79 RepositorySource,
80 RuntimeStatus,
81 SourceSignal,
82 SourceSignalType,
83 TokenCreateRequest,
84 User,
85 VerificationRun,
86 VerificationStatus,
87 Workspace,
88 WorkspaceCreateRequest,
89)
90from .models import (
91 PullRequestRecord as PullRequestModel,
92)
94try:
95 import yaml # type: ignore[import-untyped]
96except ModuleNotFoundError: # pragma: no cover - dependency managed at install time.
97 yaml = None
99from .ai import DraftPatchResult, get_patch_generator
100from .agent_files import AgentFileGenerator
101from .cascade_detector import find_cascades
102from .drift_engine import DriftResult, get_drift_engine
103from .symbol_graph import get_symbol_graph
104from .mint import MintDocument
105from .github import (
106 GitHubPullRequestResult,
107 create_or_update_pull_request,
108)
109from .github import (
110 list_installation_repositories as fetch_installation_repositories,
111)
114@dataclass(frozen=True)
115class ArtifactSpec:
116 artifact_key: str
117 slug: str
118 title: str
119 artifact_type: ArtifactType
120 summary: str
121 doc_paths: tuple[str, ...]
122 source_patterns: tuple[str, ...]
125DEFAULT_ARTIFACT_SPECS: tuple[ArtifactSpec, ...] = (
126 ArtifactSpec(
127 artifact_key="api-reference",
128 slug="api-reference",
129 title="API Reference",
130 artifact_type=ArtifactType.API_REFERENCE,
131 summary="HTTP endpoints, project snapshot objects, and repo-backed control plane contracts.",
132 doc_paths=("content/docs/api-reference.md",),
133 source_patterns=("src/documint_mcp/**/*.py", "pyproject.toml"),
134 ),
135 ArtifactSpec(
136 artifact_key="sdk-guides",
137 slug="sdk-guides",
138 title="SDK and Integration Guides",
139 artifact_type=ArtifactType.SDK_GUIDES,
140 summary="Repo-connected workflows for the web app, CLI, onboarding, and integration entrypoints.",
141 doc_paths=("content/docs/sdk-guides.md",),
142 source_patterns=("apps/web/**/*", "README.md", "package.json"),
143 ),
144 ArtifactSpec(
145 artifact_key="mcp-reference",
146 slug="mcp-reference",
147 title="MCP Reference",
148 artifact_type=ArtifactType.MCP_REFERENCE,
149 summary="HTTP MCP tools for agents that inspect projects, drift, patches, publishes, and activity.",
150 doc_paths=("content/docs/mcp-reference.md",),
151 source_patterns=("src/documint_mcp/**/*.py", "mcp.json", "README.md"),
152 ),
153 ArtifactSpec(
154 artifact_key="changelog",
155 slug="changelog",
156 title="Release Changelog",
157 artifact_type=ArtifactType.CHANGELOG,
158 summary="Current product milestones, releases, and repository-level change tracking.",
159 doc_paths=("content/docs/changelog.md",),
160 source_patterns=("README.md", "docs/**/*.md", ".github/workflows/*.yml"),
161 ),
162 ArtifactSpec(
163 artifact_key="migration-notes",
164 slug="migration-notes",
165 title="Migration Notes",
166 artifact_type=ArtifactType.MIGRATION_NOTES,
167 summary="Notes for the shift from the dogfood stack to the persistent hosted-docs platform.",
168 doc_paths=("content/docs/migration-notes.md",),
169 source_patterns=(
170 "README.md",
171 "package.json",
172 "pyproject.toml",
173 "apps/web/**/*",
174 ),
175 ),
176)
178IGNORED_PATH_PARTS = {
179 ".git",
180 ".next",
181 ".pytest_cache",
182 "__pycache__",
183 "htmlcov",
184 "node_modules",
185 ".documint",
186}
187IGNORED_SUFFIXES = {".pyc"}
190def _utc_from_timestamp(timestamp: int | float) -> datetime:
191 return datetime.fromtimestamp(timestamp, tz=UTC)
194def _now() -> datetime:
195 return datetime.now(tz=UTC)
198def _slugify(value: str) -> str:
199 normalized = re.sub(r"[^a-zA-Z0-9]+", "-", value.strip().lower()).strip("-")
200 return normalized or f"item-{uuid4().hex[:8]}"
203def _revision_to_payload(
204 revision: RepositoryRevision | None,
205) -> dict[str, object] | None:
206 if revision is None:
207 return None
208 return {
209 "ref": revision.ref,
210 "touched_at": revision.touched_at.isoformat(),
211 "committed": revision.committed,
212 }
215def _revision_from_payload(
216 payload: dict[str, object] | None,
217) -> RepositoryRevision | None:
218 if not payload:
219 return None
220 ref = payload.get("ref")
221 touched_at = payload.get("touched_at")
222 committed = payload.get("committed", True)
223 if not isinstance(ref, str) or not isinstance(touched_at, str):
224 return None
225 return RepositoryRevision(
226 ref=ref,
227 touched_at=datetime.fromisoformat(touched_at),
228 committed=bool(committed),
229 )
232def _parse_frontmatter(raw: str) -> tuple[dict[str, str], str]:
233 if not raw.startswith("---\n"):
234 return {}, raw
235 end_marker = "\n---\n"
236 end_index = raw.find(end_marker, 4)
237 if end_index == -1:
238 return {}, raw
239 header_text = raw[4:end_index]
240 body = raw[end_index + len(end_marker) :]
241 metadata: dict[str, str] = {}
242 for line in header_text.splitlines():
243 if ":" not in line:
244 continue
245 key, value = line.split(":", 1)
246 metadata[key.strip()] = value.strip().strip("\"'")
247 return metadata, body.strip()
250def _token_hash(token: str) -> str:
251 return hashlib.sha256(token.encode("utf-8")).hexdigest()
254class DocumintService:
255 """Persistent service layer backed by SQLAlchemy metadata storage."""
257 def __init__(self, repo_root: Path) -> None:
258 self.repo_root = repo_root.resolve()
259 self._git_executable = shutil.which("git")
260 self._head_ref = "HEAD"
261 self._head_ref_source = "unknown"
262 self._dirty_paths: set[str] = set()
263 init_db()
264 self._ensure_bootstrap_defaults()
266 def _ensure_bootstrap_defaults(self) -> None:
267 if settings.auto_bootstrap_defaults:
268 self._bootstrap_defaults()
270 def me(self, user_id: str | None = None) -> AuthenticatedUser:
271 self._ensure_bootstrap_defaults()
272 effective_user_id = user_id or settings.default_user_id
273 with session_scope() as session:
274 user_record = session.get(UserRecord, effective_user_id)
275 if user_record is None:
276 raise KeyError(f"Unknown user id: {effective_user_id}")
277 workspace_records = self._workspace_records_for_user(
278 session, effective_user_id
279 )
280 return AuthenticatedUser(
281 user=self._user_model(user_record),
282 workspaces=[self._workspace_model(item) for item in workspace_records],
283 )
285 def bootstrap_self(self) -> ProjectSnapshot:
286 self._bootstrap_defaults()
287 return self.snapshot(project_id=settings.project_id)
289 def list_workspaces(self, user_id: str | None = None) -> list[Workspace]:
290 self._ensure_bootstrap_defaults()
291 with session_scope() as session:
292 records = (
293 self._workspace_records_for_user(session, user_id)
294 if user_id is not None
295 else session.scalars(
296 select(WorkspaceRecord).order_by(WorkspaceRecord.created_at.asc())
297 ).all()
298 )
299 return [self._workspace_model(item) for item in records]
301 def create_workspace(
302 self, request: WorkspaceCreateRequest, user_id: str | None = None
303 ) -> Workspace:
304 slug = _slugify(request.slug or request.name)
305 owner_id = user_id or settings.default_user_id
306 with session_scope() as session:
307 workspace = WorkspaceRecord(
308 id=f"workspace-{uuid4().hex[:12]}",
309 slug=slug,
310 name=request.name,
311 description=request.description,
312 )
313 session.add(workspace)
314 session.add(
315 WorkspaceMemberRecord(
316 id=f"wm-{uuid4().hex[:12]}",
317 workspace_id=workspace.id,
318 user_id=owner_id,
319 role="owner",
320 )
321 )
322 self._record_activity(
323 session,
324 workspace.id,
325 None,
326 "workspace.created",
327 f"Workspace {workspace.name} created",
328 request.description
329 or "Workspace added through the Documint operator API.",
330 {"slug": workspace.slug},
331 )
332 session.flush()
333 return self._workspace_model(workspace)
335 def list_projects(
336 self,
337 workspace_id: str | None = None,
338 user_id: str | None = None,
339 ) -> list[Project]:
340 self._ensure_bootstrap_defaults()
341 self._refresh_repo_state()
342 with session_scope() as session:
343 statement = select(ProjectRecord).order_by(ProjectRecord.created_at.asc())
344 if workspace_id:
345 self._require_workspace_access(session, workspace_id, user_id)
346 statement = statement.where(ProjectRecord.workspace_id == workspace_id)
347 elif user_id is not None:
348 allowed_workspace_ids = [
349 item.id
350 for item in self._workspace_records_for_user(session, user_id)
351 ]
352 statement = statement.where(
353 ProjectRecord.workspace_id.in_(allowed_workspace_ids)
354 )
355 projects = session.scalars(statement).all()
356 return [self._project_model(session, record) for record in projects]
358 def create_project(
359 self,
360 request: ProjectCreateRequest,
361 user_id: str | None = None,
362 ) -> Project:
363 self._refresh_repo_state()
364 slug = _slugify(request.slug or request.name)
365 with session_scope() as session:
366 workspace = session.get(WorkspaceRecord, request.workspace_id)
367 if workspace is None:
368 raise KeyError(f"Unknown workspace id: {request.workspace_id}")
369 self._require_workspace_access(session, workspace.id, user_id)
370 if request.installation_id is not None:
371 installation = session.get(
372 GitHubInstallationRecord, request.installation_id
373 )
374 if installation is None:
375 raise KeyError(
376 f"Unknown installation id: {request.installation_id}"
377 )
378 if installation.workspace_id != workspace.id:
379 raise PermissionError(
380 "Installation does not belong to the workspace"
381 )
382 repo_full_name = f"{request.owner}/{request.repo}".lower()
383 cached_repo = session.scalar(
384 select(GitHubRepositoryRecord).where(
385 GitHubRepositoryRecord.github_installation_id
386 == installation.id,
387 GitHubRepositoryRecord.full_name == repo_full_name,
388 )
389 )
390 if cached_repo is None and settings.job_execution_mode != "inline":
391 raise KeyError(
392 f"Repository {repo_full_name} is not synced for installation {request.installation_id}"
393 )
394 project = ProjectRecord(
395 id=f"project-{uuid4().hex[:12]}",
396 workspace_id=request.workspace_id,
397 github_installation_id=request.installation_id,
398 name=request.name,
399 slug=slug,
400 description=request.description,
401 public_url=(
402 f"{settings.public_base_url.rstrip('/')}/p/{workspace.slug}/{slug}/docs"
403 ),
404 dashboard_url=f"{settings.public_base_url.rstrip('/')}/app/projects",
405 onboarding_status="connected",
406 )
407 session.add(project)
408 source = RepositorySourceRecord(
409 id=f"source-{project.id}",
410 project_id=project.id,
411 provider="github",
412 owner=request.owner,
413 repo=request.repo,
414 default_branch=request.default_branch,
415 local_path=request.local_path or str(self.repo_root),
416 current_ref=self._head_ref,
417 docs_root=str(settings.docs_content_path.relative_to(self.repo_root)),
418 installation_id=request.installation_id,
419 )
420 session.add(source)
421 config = self._load_repo_config()
422 session.add(
423 ProjectSettingsRecord(
424 id=f"settings-{project.id}",
425 project_id=project.id,
426 docs_root=str(config["docs"]["root"]),
427 config_version=int(config["version"]),
428 config_json=config,
429 ai_policy=dict(config["ai"]),
430 publish_behavior=dict(config["publish"]),
431 pr_behavior=dict(config["pull_requests"]),
432 )
433 )
434 self._upsert_artifact_definitions(session, project.id, config)
435 self._record_activity(
436 session,
437 workspace.id,
438 project.id,
439 "project.created",
440 f"Project {project.name} created",
441 f"Connected GitHub repository {request.owner}/{request.repo}.",
442 {"slug": slug, "owner": request.owner, "repo": request.repo},
443 )
444 session.flush()
445 return self._project_model(session, project)
447 def get_project(
448 self, project_id: str, user_id: str | None = None
449 ) -> ProjectSnapshot:
450 return self.snapshot(project_id=project_id, user_id=user_id)
452 def snapshot(
453 self,
454 project_id: str | None = None,
455 user_id: str | None = None,
456 ) -> ProjectSnapshot:
457 self._ensure_bootstrap_defaults()
458 self._refresh_repo_state()
459 target_project_id = project_id or settings.project_id
460 with session_scope() as session:
461 project_record = session.get(ProjectRecord, target_project_id)
462 if project_record is None:
463 raise KeyError(f"Unknown project id: {target_project_id}")
464 self._require_project_access(session, target_project_id, user_id)
465 workspace_record = session.get(WorkspaceRecord, project_record.workspace_id)
466 if workspace_record is None:
467 raise KeyError(f"Workspace missing for project {target_project_id}")
468 self._sync_project_runtime(session, project_record)
469 settings_record = session.get(
470 ProjectSettingsRecord, f"settings-{project_record.id}"
471 )
472 self._ensure_project_artifacts(session, project_record.id, settings_record)
473 traces = self._artifact_traces_for_project(session, project_record.id)
474 findings = self.list_findings(project_record.id)
475 latest_run = self._latest_run(session, project_record.id)
476 latest_deployment = self._latest_deployment(session, project_record.id)
477 pull_requests = self.list_pull_requests(project_record.id)
478 activity = self.list_activity(project_record.id)
479 return ProjectSnapshot(
480 project=self._project_model(session, project_record),
481 workspace=self._workspace_model(workspace_record),
482 settings=(
483 self._project_settings_model(settings_record)
484 if settings_record
485 else None
486 ),
487 runtime=self.runtime_status(),
488 artifacts=traces,
489 latest_run=latest_run,
490 latest_deployment=latest_deployment,
491 findings=findings,
492 pull_requests=pull_requests,
493 activity=activity,
494 )
496 def list_sources(self, user_id: str | None = None) -> list[RepositorySource]:
497 self._ensure_bootstrap_defaults()
498 self._refresh_repo_state()
499 with session_scope() as session:
500 statement = select(RepositorySourceRecord).order_by(
501 RepositorySourceRecord.created_at.asc()
502 )
503 if user_id is not None:
504 allowed_project_ids = [
505 item.id
506 for item in session.scalars(
507 select(ProjectRecord).where(
508 ProjectRecord.workspace_id.in_(
509 [
510 workspace.id
511 for workspace in self._workspace_records_for_user(
512 session, user_id
513 )
514 ]
515 )
516 )
517 ).all()
518 ]
519 statement = statement.where(
520 RepositorySourceRecord.project_id.in_(allowed_project_ids)
521 )
522 return [
523 self._source_model(record)
524 for record in session.scalars(statement).all()
525 ]
527 def runtime_status(self) -> RuntimeStatus:
528 self._refresh_repo_state()
529 return RuntimeStatus(
530 environment=settings.deployment_environment,
531 deployment_provider=settings.deployment_provider,
532 job_execution_mode=settings.job_execution_mode,
533 api_base_url=settings.api_base_url,
534 public_base_url=settings.public_base_url,
535 app_base_url=settings.app_base_url,
536 repo_root=str(self.repo_root),
537 docs_root=str(settings.docs_content_path),
538 git_available=self._git_executable is not None,
539 git_metadata_available=(self.repo_root / ".git").exists(),
540 current_ref=self._head_ref,
541 current_ref_source=self._head_ref_source,
542 auth_required=bool(
543 (settings.auth_token or settings.clerk_jwks_url) and not settings.debug
544 ),
545 github_app_ready=bool(
546 settings.github_app_id
547 and settings.github_app_slug
548 and settings.github_webhook_secret
549 ),
550 deploy_branch=settings.deploy_branch,
551 database_configured=bool(settings.database_url),
552 clerk_configured=bool(settings.clerk_jwks_url and settings.clerk_issuer),
553 huggingface_configured=bool(
554 settings.hf_api_token
555 and settings.hf_primary_model
556 and settings.hf_fast_model
557 ),
558 )
560 def run_drift(
561 self,
562 request: DriftJobRequest,
563 user_id: str | None = None,
564 ) -> VerificationRun:
565 self._ensure_bootstrap_defaults()
566 self._refresh_repo_state()
567 with session_scope() as session:
568 project_record = session.get(ProjectRecord, request.project_id)
569 if project_record is None:
570 raise KeyError(f"Unknown project id: {request.project_id}")
571 self._require_project_access(session, request.project_id, user_id)
572 self._sync_project_runtime(session, project_record)
573 settings_record = session.get(
574 ProjectSettingsRecord, f"settings-{project_record.id}"
575 )
576 self._ensure_project_artifacts(session, project_record.id, settings_record)
577 signal_record = SourceSignalRecord(
578 id=f"signal-{uuid4().hex[:12]}",
579 project_id=project_record.id,
580 signal_type=request.signal_type.value,
581 ref=self._head_ref,
582 changed_files=(
583 self._worktree_changed_files()
584 if request.changed_files is None
585 else list(request.changed_files)
586 ),
587 )
588 session.add(signal_record)
589 session.flush()
591 traces = self._artifact_traces_for_project(session, project_record.id)
592 all_findings = self._compute_findings(session, project_record.id, traces)
593 filtered_findings = self._filter_findings(
594 all_findings, signal_record.changed_files
595 )
596 run_record = VerificationRunRecord(
597 id=f"run-{uuid4().hex[:12]}",
598 project_id=project_record.id,
599 signal_id=signal_record.id,
600 status=JobStatus.COMPLETED.value,
601 findings_count=len(filtered_findings),
602 started_at=_now(),
603 completed_at=_now(),
604 )
605 session.add(run_record)
606 session.execute(
607 delete(DriftFindingRecord).where(
608 DriftFindingRecord.project_id == project_record.id
609 )
610 )
611 for finding in all_findings:
612 session.add(
613 DriftFindingRecord(
614 id=finding.id,
615 project_id=project_record.id,
616 verification_run_id=run_record.id,
617 artifact_key=finding.artifact_id,
618 artifact_type=finding.artifact_type.value,
619 severity=finding.severity.value,
620 summary=finding.summary,
621 rationale=finding.rationale,
622 source_paths=finding.source_paths,
623 doc_paths=finding.doc_paths,
624 suggested_actions=finding.suggested_actions,
625 source_revision=_revision_to_payload(finding.source_revision),
626 doc_revision=_revision_to_payload(finding.doc_revision),
627 status=finding.status,
628 changed_symbols=finding.changed_symbols,
629 )
630 )
631 self._record_activity(
632 session,
633 project_record.workspace_id,
634 project_record.id,
635 "verification.completed",
636 f"Drift check completed for {project_record.name}",
637 f"{len(all_findings)} open findings, {len(filtered_findings)} returned for this signal scope.",
638 {
639 "signal_type": request.signal_type.value,
640 "changed_files": signal_record.changed_files,
641 "findings_count": len(all_findings),
642 },
643 )
644 session.flush()
645 return VerificationRun(
646 id=run_record.id,
647 project_id=project_record.id,
648 status=JobStatus(run_record.status),
649 signal=SourceSignal(
650 id=signal_record.id,
651 type=SourceSignalType(signal_record.signal_type),
652 ref=signal_record.ref,
653 changed_files=list(signal_record.changed_files),
654 created_at=signal_record.created_at,
655 ),
656 findings_count=len(filtered_findings),
657 findings=filtered_findings,
658 started_at=run_record.started_at,
659 completed_at=run_record.completed_at,
660 )
662 def list_findings(
663 self, project_id: str, user_id: str | None = None
664 ) -> list[DriftFinding]:
665 with session_scope() as session:
666 self._require_project_access(session, project_id, user_id)
667 findings = session.scalars(
668 select(DriftFindingRecord)
669 .where(DriftFindingRecord.project_id == project_id)
670 .order_by(
671 desc(DriftFindingRecord.created_at),
672 DriftFindingRecord.severity.asc(),
673 )
674 ).all()
675 return [self._finding_model(item) for item in findings]
677 def generate_doc_patch(
678 self,
679 artifact_id: str | None = None,
680 finding_id: str | None = None,
681 project_id: str | None = None,
682 policy: str = "on_demand",
683 user_id: str | None = None,
684 ) -> DocPatch:
685 target_project_id = project_id or settings.project_id
686 if finding_id is None and artifact_id is None:
687 raise KeyError("Either artifact_id or finding_id is required")
689 with session_scope() as session:
690 project_record = session.get(ProjectRecord, target_project_id)
691 if project_record is None:
692 raise KeyError(f"Unknown project id: {target_project_id}")
693 self._require_project_access(session, target_project_id, user_id)
694 finding = (
695 session.get(DriftFindingRecord, finding_id)
696 if finding_id is not None
697 else None
698 )
699 effective_artifact_id = artifact_id or (
700 finding.artifact_key if finding is not None else None
701 )
702 if effective_artifact_id is None:
703 raise KeyError("Artifact id could not be resolved")
704 trace = self.get_artifact_trace(effective_artifact_id, target_project_id)
705 current_doc = (
706 self._read_doc_path(trace.doc_paths[0]) if trace.doc_paths else ""
707 )
708 source_content = self._read_source_content(trace.source_paths)
709 patch_result = get_patch_generator().draft_patch(
710 project=self._project_model(session, project_record),
711 trace=trace,
712 finding=self._finding_model(finding) if finding is not None else None,
713 current_doc=current_doc,
714 source_content=source_content,
715 project_settings=self._project_settings_model(
716 session.get(ProjectSettingsRecord, f"settings-{project_record.id}")
717 ),
718 policy=policy,
719 )
720 patch_record = self._store_patch(
721 session,
722 project_record.id,
723 effective_artifact_id,
724 finding.id if finding is not None else None,
725 (
726 trace.doc_paths[0]
727 if trace.doc_paths
728 else f"{settings.docs_content_path}/todo.md"
729 ),
730 patch_result,
731 )
732 self._record_activity(
733 session,
734 project_record.workspace_id,
735 project_record.id,
736 "patch.generated",
737 f"Patch draft created for {trace.title}",
738 patch_result.summary,
739 {
740 "artifact_id": trace.id,
741 "finding_id": finding.id if finding is not None else None,
742 "provider": patch_result.ai_provider,
743 "model": patch_result.model_name,
744 },
745 )
746 return self._patch_model(patch_record)
748 def get_patch(
749 self, project_id: str, patch_id: str, user_id: str | None = None
750 ) -> DocPatch:
751 with session_scope() as session:
752 self._require_project_access(session, project_id, user_id)
753 patch = session.get(DocPatchRecord, patch_id)
754 if patch is None or patch.project_id != project_id:
755 raise KeyError(f"Unknown patch id: {patch_id}")
756 return self._patch_model(patch)
758 def list_patches(
759 self, project_id: str, user_id: str | None = None
760 ) -> list[DocPatch]:
761 with session_scope() as session:
762 self._require_project_access(session, project_id, user_id)
763 patches = session.scalars(
764 select(DocPatchRecord)
765 .where(DocPatchRecord.project_id == project_id)
766 .order_by(desc(DocPatchRecord.created_at))
767 ).all()
768 return [self._patch_model(item) for item in patches]
770 def approve_patch(
771 self,
772 project_id: str,
773 patch_id: str,
774 user_id: str | None = None,
775 ) -> DocPatch:
776 """Mark a patch as approved and store it in RAG for style learning."""
777 with session_scope() as session:
778 self._require_project_access(session, project_id, user_id)
779 patch_record = session.get(DocPatchRecord, patch_id)
780 if patch_record is None or patch_record.project_id != project_id:
781 raise KeyError(f"Unknown patch id: {patch_id}")
782 patch_record.status = "approved"
783 session.flush()
785 # Store in RAG for few-shot style learning
786 rag = get_rag()
787 rag.store_approved_patch(
788 patch_id=patch_record.id,
789 artifact_id=patch_record.artifact_key,
790 project_id=project_id,
791 section_titles=list(patch_record.proposed_sections),
792 patch_content=patch_record.preview_markdown,
793 )
795 project_record = session.get(ProjectRecord, project_id)
796 if project_record is not None:
797 self._record_activity(
798 session,
799 project_record.workspace_id,
800 project_id,
801 "patch.approved",
802 f"Patch approved for {patch_record.artifact_key}",
803 patch_record.summary,
804 {"patch_id": patch_id, "artifact_id": patch_record.artifact_key},
805 )
807 return self._patch_model(patch_record)
809 def open_pull_request(
810 self,
811 project_id: str,
812 patch_id: str,
813 request: PullRequestCreateRequest | None = None,
814 user_id: str | None = None,
815 ) -> PullRequestModel:
816 with session_scope() as session:
817 self._require_project_access(session, project_id, user_id)
818 patch_record = session.get(DocPatchRecord, patch_id)
819 if patch_record is None or patch_record.project_id != project_id:
820 raise KeyError(f"Unknown patch id: {patch_id}")
821 patch_review_error = self._patch_review_error(patch_record)
822 if patch_review_error is not None:
823 patch_record.status = "blocked"
824 raise ValueError(patch_review_error)
825 project_record = session.get(ProjectRecord, project_id)
826 if project_record is None:
827 raise KeyError(f"Unknown project id: {project_id}")
828 source = session.get(RepositorySourceRecord, f"source-{project_id}")
829 if source is None:
830 raise KeyError(f"Missing source for project: {project_id}")
831 branch_name = f"documint/{project_record.slug}/{patch_record.artifact_key}"
832 existing = session.scalar(
833 select(PullRequestRecord).where(
834 PullRequestRecord.project_id == project_id,
835 PullRequestRecord.branch_name == branch_name,
836 PullRequestRecord.state == "open",
837 )
838 )
839 pr_title = (
840 request.title if request and request.title else patch_record.summary
841 )
842 pr_result = self._open_pull_request_on_github(
843 session=session,
844 project=project_record,
845 source=source,
846 patch=patch_record,
847 branch_name=branch_name,
848 title=pr_title,
849 )
850 if existing is None:
851 pr_record = PullRequestRecord(
852 id=f"pr-{uuid4().hex[:12]}",
853 project_id=project_id,
854 patch_id=patch_id,
855 branch_name=pr_result.branch_name,
856 title=pr_result.title,
857 url=pr_result.url,
858 state=pr_result.state,
859 )
860 session.add(pr_record)
861 else:
862 existing.patch_id = patch_id
863 existing.branch_name = pr_result.branch_name
864 existing.title = pr_result.title
865 existing.url = pr_result.url
866 existing.state = pr_result.state
867 pr_record = existing
868 patch_record.status = "pr_opened"
869 # Store approved patch for RAG style learning (graceful — never blocks)
870 try:
871 rag = get_rag()
872 if rag.available:
873 rag.store_approved_patch(
874 patch_id=patch_id,
875 artifact_id=patch_record.artifact_key,
876 project_id=project_id,
877 section_titles=patch_record.proposed_sections or [],
878 patch_content=patch_record.preview_markdown or "",
879 )
880 except Exception: # noqa: BLE001
881 pass # RAG storage is optional — never block the PR flow
882 self._record_activity(
883 session,
884 project_record.workspace_id,
885 project_record.id,
886 "pull_request.opened",
887 f"Review PR prepared for {project_record.name}",
888 pr_title,
889 {
890 "branch_name": pr_result.branch_name,
891 "url": pr_result.url,
892 "patch_id": patch_id,
893 "number": pr_result.number,
894 },
895 )
896 session.flush()
897 return self._pull_request_model(pr_record)
899 def list_pull_requests(
900 self, project_id: str, user_id: str | None = None
901 ) -> list[PullRequestModel]:
902 with session_scope() as session:
903 self._require_project_access(session, project_id, user_id)
904 prs = session.scalars(
905 select(PullRequestRecord)
906 .where(PullRequestRecord.project_id == project_id)
907 .order_by(desc(PullRequestRecord.created_at))
908 ).all()
909 return [self._pull_request_model(item) for item in prs]
911 def publish_preview(
912 self, project_id: str, user_id: str | None = None
913 ) -> PublishDeployment:
914 self._refresh_repo_state()
915 with session_scope() as session:
916 project_record = session.get(ProjectRecord, project_id)
917 if project_record is None:
918 raise KeyError(f"Unknown project id: {project_id}")
919 self._require_project_access(session, project_id, user_id)
920 workspace = session.get(WorkspaceRecord, project_record.workspace_id)
921 if workspace is None:
922 raise KeyError(f"Workspace missing for project {project_id}")
923 deployment_record = PublishDeploymentRecord(
924 id=f"preview-{uuid4().hex[:12]}",
925 project_id=project_id,
926 status=JobStatus.COMPLETED.value,
927 commit_ref=self._head_ref,
928 site_url=self._public_docs_base_url(
929 workspace.slug, project_record.slug
930 ),
931 preview_url=(
932 f"{settings.public_base_url.rstrip('/')}/app/projects/"
933 f"{project_record.id}?preview={self._head_ref}"
934 ),
935 docs_count=0,
936 generated_at=_now(),
937 )
938 session.add(deployment_record)
939 session.flush()
941 docs_root = self._project_docs_root(session, project_record.id)
942 published_pages = self._publish_pages_for_project(
943 session,
944 workspace.slug,
945 project_record.slug,
946 project_record.id,
947 deployment_record.id,
948 docs_root,
949 )
950 deployment_record.docs_count = len(published_pages)
951 self._record_activity(
952 session,
953 workspace.id,
954 project_record.id,
955 "publish.completed",
956 f"Published {len(published_pages)} pages for {project_record.name}",
957 deployment_record.site_url,
958 {
959 "deployment_id": deployment_record.id,
960 "site_url": deployment_record.site_url,
961 "commit_ref": deployment_record.commit_ref,
962 },
963 )
964 session.flush()
965 publish = self._publish_model(deployment_record)
966 # Collect artifact definitions for agent file generation (inside session scope)
967 artifact_defs_for_agents = list(
968 session.scalars(
969 select(ArtifactDefinitionRecord).where(
970 ArtifactDefinitionRecord.project_id == project_id
971 )
972 ).all()
973 )
974 self.revalidate_project(project_id, publish.id)
975 self._run_frontend_revalidation_job(project_id, publish.id)
976 # --- Agent file generation (CLAUDE.md, AGENTS.md, llms.txt, llms-full.txt) ---
977 # Build lightweight MintDocument stubs from artifact definitions and write agent files.
978 # TODO: Phase 2 — open a PR with these files instead of writing them locally.
979 try:
980 mint_docs: list[MintDocument] = []
981 for rec in artifact_defs_for_agents:
982 sentinel: dict[str, object] = {
983 "k": "__artifact__",
984 "n": rec.title,
985 "artifact_key": rec.artifact_key,
986 "artifact_type": rec.artifact_type,
987 }
988 mint_docs.append(
989 MintDocument(
990 source_files=list(rec.source_patterns),
991 symbols=[sentinel],
992 narrative=rec.summary,
993 drift_status="CLEAN",
994 )
995 )
996 if mint_docs:
997 output_dir = self.repo_root / ".mint" / project_record.slug
998 AgentFileGenerator().write_to_directory(
999 output_dir=output_dir,
1000 project_name=project_record.name,
1001 artifacts=mint_docs,
1002 project_description=project_record.description or "",
1003 )
1004 # Persist generated file content back to the project record
1005 file_col_map = [
1006 ("CLAUDE.md", "claude_md_content"),
1007 ("AGENTS.md", "agents_md_content"),
1008 ("llms.txt", "llms_txt_content"),
1009 ("llms-full.txt", "llms_full_txt_content"),
1010 ]
1011 with session_scope() as _save_session:
1012 _proj = _save_session.get(ProjectRecord, project_id)
1013 if _proj is not None:
1014 for filename, col in file_col_map:
1015 filepath = output_dir / filename
1016 if filepath.exists():
1017 setattr(_proj, col, filepath.read_text(encoding="utf-8"))
1018 _proj.last_scanned_at = datetime.now(tz=UTC)
1019 except Exception:
1020 # Agent file generation is best-effort and must never fail a publish
1021 pass
1022 return publish
1024 def list_publishes(
1025 self, project_id: str, user_id: str | None = None
1026 ) -> list[PublishDeployment]:
1027 with session_scope() as session:
1028 self._require_project_access(session, project_id, user_id)
1029 publishes = session.scalars(
1030 select(PublishDeploymentRecord)
1031 .where(PublishDeploymentRecord.project_id == project_id)
1032 .order_by(desc(PublishDeploymentRecord.generated_at))
1033 ).all()
1034 return [self._publish_model(item) for item in publishes]
1036 def get_publish(
1037 self,
1038 project_id: str,
1039 deployment_id: str,
1040 user_id: str | None = None,
1041 ) -> PublishDeployment:
1042 with session_scope() as session:
1043 self._require_project_access(session, project_id, user_id)
1044 deployment = session.get(PublishDeploymentRecord, deployment_id)
1045 if deployment is None or deployment.project_id != project_id:
1046 raise KeyError(f"Unknown deployment id: {deployment_id}")
1047 return self._publish_model(deployment)
1049 def list_activity(
1050 self, project_id: str, user_id: str | None = None
1051 ) -> list[ActivityEvent]:
1052 with session_scope() as session:
1053 self._require_project_access(session, project_id, user_id)
1054 events = session.scalars(
1055 select(ActivityEventRecord)
1056 .where(ActivityEventRecord.project_id == project_id)
1057 .order_by(desc(ActivityEventRecord.created_at))
1058 ).all()
1059 return [self._activity_model(item) for item in events]
1061 def create_job(
1062 self,
1063 *,
1064 job_kind: str,
1065 project_id: str | None = None,
1066 workspace_id: str | None = None,
1067 payload_json: dict[str, object] | None = None,
1068 user_id: str | None = None,
1069 ) -> QueuedJob:
1070 with session_scope() as session:
1071 resolved_workspace_id = workspace_id
1072 if project_id is not None:
1073 project = session.get(ProjectRecord, project_id)
1074 if project is None:
1075 raise KeyError(f"Unknown project id: {project_id}")
1076 self._require_project_access(session, project_id, user_id)
1077 resolved_workspace_id = project.workspace_id
1078 elif workspace_id is not None:
1079 self._require_workspace_access(session, workspace_id, user_id)
1081 record = BackgroundJobRecord(
1082 id=f"job-{uuid4().hex[:12]}",
1083 workspace_id=resolved_workspace_id,
1084 project_id=project_id,
1085 job_kind=job_kind,
1086 status=JobStatus.PENDING.value,
1087 payload_json=payload_json or {},
1088 )
1089 session.add(record)
1090 session.flush()
1091 return self._job_model(record)
1093 def get_job(self, job_id: str, user_id: str | None = None) -> QueuedJob:
1094 with session_scope() as session:
1095 record = session.get(BackgroundJobRecord, job_id)
1096 if record is None:
1097 raise KeyError(f"Unknown job id: {job_id}")
1098 if record.project_id is not None:
1099 self._require_project_access(session, record.project_id, user_id)
1100 elif record.workspace_id is not None:
1101 self._require_workspace_access(session, record.workspace_id, user_id)
1102 return self._job_model(record)
1104 def list_jobs(
1105 self, project_id: str, user_id: str | None = None
1106 ) -> list[QueuedJob]:
1107 with session_scope() as session:
1108 self._require_project_access(session, project_id, user_id)
1109 records = session.scalars(
1110 select(BackgroundJobRecord)
1111 .where(BackgroundJobRecord.project_id == project_id)
1112 .order_by(desc(BackgroundJobRecord.created_at))
1113 ).all()
1114 return [self._job_model(record) for record in records]
1116 def mark_job_running(self, job_id: str) -> QueuedJob:
1117 with session_scope() as session:
1118 record = session.get(BackgroundJobRecord, job_id)
1119 if record is None:
1120 raise KeyError(f"Unknown job id: {job_id}")
1121 record.status = JobStatus.RUNNING.value
1122 record.attempt_count += 1
1123 record.started_at = _now()
1124 session.flush()
1125 return self._job_model(record)
1127 def mark_job_completed(
1128 self,
1129 job_id: str,
1130 *,
1131 resource_type: str | None = None,
1132 resource_id: str | None = None,
1133 result_summary: str | None = None,
1134 result_json: dict[str, object] | None = None,
1135 ) -> QueuedJob:
1136 with session_scope() as session:
1137 record = session.get(BackgroundJobRecord, job_id)
1138 if record is None:
1139 raise KeyError(f"Unknown job id: {job_id}")
1140 record.status = JobStatus.COMPLETED.value
1141 if record.started_at is None:
1142 record.started_at = _now()
1143 record.attempt_count = max(1, record.attempt_count)
1144 record.completed_at = _now()
1145 record.error_summary = None
1146 record.resource_type = resource_type
1147 record.resource_id = resource_id
1148 record.result_summary = result_summary
1149 record.result_json = result_json
1150 session.flush()
1151 return self._job_model(record)
1153 def mark_job_failed(
1154 self,
1155 job_id: str,
1156 *,
1157 error_summary: str,
1158 result_json: dict[str, object] | None = None,
1159 ) -> QueuedJob:
1160 with session_scope() as session:
1161 record = session.get(BackgroundJobRecord, job_id)
1162 if record is None:
1163 raise KeyError(f"Unknown job id: {job_id}")
1164 record.status = JobStatus.FAILED.value
1165 if record.started_at is None:
1166 record.started_at = _now()
1167 record.attempt_count = max(1, record.attempt_count)
1168 record.completed_at = _now()
1169 record.error_summary = error_summary[:500]
1170 record.result_json = result_json
1171 if record.workspace_id is not None:
1172 self._record_activity(
1173 session,
1174 record.workspace_id,
1175 record.project_id,
1176 f"job.{record.job_kind}.failed",
1177 f"{record.job_kind.replace('_', ' ').title()} failed",
1178 record.error_summary,
1179 {"job_id": record.id},
1180 )
1181 session.flush()
1182 return self._job_model(record)
1184 def create_api_token(
1185 self,
1186 request: TokenCreateRequest,
1187 user_id: str | None = None,
1188 ) -> ApiTokenSummary:
1189 effective_user_id = user_id or settings.default_user_id
1190 raw_token = f"docu_{secrets.token_urlsafe(24)}"
1191 token_record = ApiTokenRecord(
1192 id=f"token-{uuid4().hex[:12]}",
1193 workspace_id=request.workspace_id,
1194 user_id=effective_user_id,
1195 label=request.label,
1196 token_prefix=raw_token[:12],
1197 token_hash=_token_hash(raw_token),
1198 scopes=request.scopes,
1199 )
1200 with session_scope() as session:
1201 self._require_workspace_access(session, request.workspace_id, user_id)
1202 session.add(token_record)
1203 self._record_activity(
1204 session,
1205 request.workspace_id,
1206 None,
1207 "token.created",
1208 f"API token {request.label} created",
1209 "Scoped API token generated for CLI or MCP access.",
1210 {"scopes": request.scopes},
1211 )
1212 session.flush()
1213 return self._api_token_model(token_record, raw_token)
1215 def list_api_tokens(
1216 self, workspace_id: str, user_id: str | None = None
1217 ) -> list[ApiTokenSummary]:
1218 with session_scope() as session:
1219 self._require_workspace_access(session, workspace_id, user_id)
1220 tokens = session.scalars(
1221 select(ApiTokenRecord)
1222 .where(ApiTokenRecord.workspace_id == workspace_id)
1223 .order_by(desc(ApiTokenRecord.created_at))
1224 ).all()
1225 return [self._api_token_model(item) for item in tokens]
1227 def exchange_cli_token(
1228 self, request: CLIExchangeRequest, user_id: str | None = None
1229 ) -> ApiTokenSummary:
1230 return self.create_api_token(
1231 TokenCreateRequest(
1232 workspace_id=request.workspace_id,
1233 label=request.label,
1234 scopes=request.scopes,
1235 ),
1236 user_id=user_id,
1237 )
1239 def revoke_api_token(
1240 self, workspace_id: str, token_id: str, user_id: str | None = None
1241 ) -> ApiTokenSummary:
1242 with session_scope() as session:
1243 self._require_workspace_access(session, workspace_id, user_id)
1244 record = session.scalar(
1245 select(ApiTokenRecord).where(
1246 ApiTokenRecord.id == token_id,
1247 ApiTokenRecord.workspace_id == workspace_id,
1248 )
1249 )
1250 if record is None:
1251 raise ValueError(f"Token {token_id} not found in workspace {workspace_id}")
1252 record.revoked_at = _now()
1253 session.flush()
1254 return self._api_token_model(record)
1256 def authenticate_api_token(self, raw_token: str) -> ApiTokenSummary | None:
1257 with session_scope() as session:
1258 record = session.scalar(
1259 select(ApiTokenRecord).where(
1260 ApiTokenRecord.token_hash == _token_hash(raw_token),
1261 ApiTokenRecord.revoked_at.is_(None),
1262 )
1263 )
1264 if record is None:
1265 return None
1266 record.last_used_at = _now()
1267 session.flush()
1268 return self._api_token_model(record)
1270 def get_artifact_trace(
1271 self,
1272 artifact_id: str,
1273 project_id: str | None = None,
1274 user_id: str | None = None,
1275 ) -> ArtifactTrace:
1276 target_project_id = project_id or settings.project_id
1277 self._ensure_bootstrap_defaults()
1278 self._refresh_repo_state()
1279 with session_scope() as session:
1280 self._require_project_access(session, target_project_id, user_id)
1281 settings_record = session.get(
1282 ProjectSettingsRecord, f"settings-{target_project_id}"
1283 )
1284 self._ensure_project_artifacts(session, target_project_id, settings_record)
1285 trace_record = session.scalar(
1286 select(ArtifactTraceRecord)
1287 .join(
1288 ArtifactDefinitionRecord,
1289 ArtifactTraceRecord.artifact_definition_id
1290 == ArtifactDefinitionRecord.id,
1291 )
1292 .where(ArtifactTraceRecord.project_id == target_project_id)
1293 .where(ArtifactDefinitionRecord.artifact_key == artifact_id)
1294 )
1295 if trace_record is None:
1296 raise KeyError(f"Unknown artifact id: {artifact_id}")
1297 definition = session.get(
1298 ArtifactDefinitionRecord, trace_record.artifact_definition_id
1299 )
1300 if definition is None:
1301 raise KeyError(f"Artifact definition missing for {artifact_id}")
1302 return self._trace_model(trace_record, definition)
1304 def explain_trace(
1305 self,
1306 artifact_id: str,
1307 project_id: str | None = None,
1308 user_id: str | None = None,
1309 ) -> dict[str, object]:
1310 artifact = self.get_artifact_trace(
1311 artifact_id, project_id=project_id, user_id=user_id
1312 )
1313 return {
1314 "artifact": artifact.model_dump(mode="json"),
1315 "explanation": (
1316 f"{artifact.title} is driven by {len(artifact.source_paths)} source paths "
1317 f"and publishes to {', '.join(artifact.doc_paths)}."
1318 ),
1319 }
1321 def list_public_pages(self, project_id: str) -> list[PublishedPage]:
1322 with session_scope() as session:
1323 pages = session.scalars(
1324 select(PublishedPageRecord)
1325 .where(PublishedPageRecord.project_id == project_id)
1326 .order_by(PublishedPageRecord.path.asc())
1327 ).all()
1328 return [self._published_page_model(item) for item in pages]
1330 def list_installations(
1331 self,
1332 workspace_id: str,
1333 user_id: str | None = None,
1334 ) -> list[GitHubInstallation]:
1335 with session_scope() as session:
1336 self._require_workspace_access(session, workspace_id, user_id)
1337 records = session.scalars(
1338 select(GitHubInstallationRecord)
1339 .where(GitHubInstallationRecord.workspace_id == workspace_id)
1340 .order_by(GitHubInstallationRecord.created_at.asc())
1341 ).all()
1342 return [self._installation_model(item) for item in records]
1344 def list_installation_repositories(
1345 self,
1346 installation_id: str,
1347 user_id: str | None = None,
1348 ) -> list[GitHubRepository]:
1349 with session_scope() as session:
1350 installation = session.get(GitHubInstallationRecord, installation_id)
1351 if installation is None:
1352 raise KeyError(f"Unknown installation id: {installation_id}")
1353 self._require_workspace_access(session, installation.workspace_id, user_id)
1354 records = session.scalars(
1355 select(GitHubRepositoryRecord)
1356 .where(GitHubRepositoryRecord.github_installation_id == installation.id)
1357 .order_by(GitHubRepositoryRecord.full_name.asc())
1358 ).all()
1359 return [self._github_repository_model(item) for item in records]
1361 def get_installation_workspace_id(self, installation_id: str) -> str | None:
1362 with session_scope() as session:
1363 installation = session.get(GitHubInstallationRecord, installation_id)
1364 return installation.workspace_id if installation is not None else None
1366 def sync_installation(
1367 self,
1368 installation_id: str,
1369 user_id: str | None = None,
1370 ) -> list[GitHubRepository]:
1371 with session_scope() as session:
1372 installation = session.get(GitHubInstallationRecord, installation_id)
1373 if installation is None:
1374 raise KeyError(f"Unknown installation id: {installation_id}")
1375 self._require_workspace_access(session, installation.workspace_id, user_id)
1376 external_installation_id = installation.installation_id
1377 if (
1378 external_installation_id is None
1379 and installation.id == "github-install-documint"
1380 and settings.self_bootstrap_installation_id
1381 ):
1382 external_installation_id = settings.self_bootstrap_installation_id
1383 if external_installation_id is None:
1384 raise KeyError(
1385 f"GitHub installation {installation_id} does not have an external installation_id yet"
1386 )
1387 repositories = fetch_installation_repositories(external_installation_id)
1388 with session_scope() as session:
1389 installation = session.get(GitHubInstallationRecord, installation_id)
1390 if installation is None:
1391 raise KeyError(f"Unknown installation id: {installation_id}")
1392 session.execute(
1393 delete(GitHubRepositoryRecord).where(
1394 GitHubRepositoryRecord.github_installation_id == installation.id
1395 )
1396 )
1397 synced_records: list[GitHubRepositoryRecord] = []
1398 for repository in repositories:
1399 full_name = repository.get("full_name")
1400 owner_payload = repository.get("owner")
1401 owner_login = (
1402 owner_payload.get("login")
1403 if isinstance(owner_payload, dict)
1404 else None
1405 )
1406 name = repository.get("name")
1407 if not all(
1408 isinstance(item, str) and item
1409 for item in (full_name, owner_login, name)
1410 ):
1411 continue
1412 full_name_str = cast(str, full_name)
1413 owner_login_str = cast(str, owner_login)
1414 name_str = cast(str, name)
1415 record = GitHubRepositoryRecord(
1416 id=f"ghrepo-{uuid4().hex[:12]}",
1417 github_installation_id=installation.id,
1418 repository_id=(
1419 str(repository.get("id")) if repository.get("id") else None
1420 ),
1421 full_name=full_name_str.lower(),
1422 owner=owner_login_str,
1423 name=name_str,
1424 default_branch=str(repository.get("default_branch") or "main"),
1425 visibility=str(
1426 repository.get("visibility")
1427 or ("private" if repository.get("private") else "public")
1428 ),
1429 is_private=bool(repository.get("private")),
1430 is_archived=bool(repository.get("archived")),
1431 metadata_json={
1432 "html_url": repository.get("html_url"),
1433 "permissions": repository.get("permissions"),
1434 },
1435 )
1436 session.add(record)
1437 synced_records.append(record)
1438 session.flush()
1439 synced = [self._github_repository_model(record) for record in synced_records]
1440 installation.account_login = (
1441 installation.account_login
1442 or str(installation.metadata_json.get("account_login") or "")
1443 or None
1444 )
1445 installation.metadata_json = {
1446 **installation.metadata_json,
1447 "synced_at": _now().isoformat(),
1448 "repository_count": len(synced),
1449 }
1450 self._record_activity(
1451 session,
1452 installation.workspace_id,
1453 None,
1454 "github.installation.synced",
1455 f"Synced installation {installation.id}",
1456 f"{len(synced)} repositories available for onboarding.",
1457 {"installation_id": installation.id, "repository_count": len(synced)},
1458 )
1459 session.flush()
1460 return synced
1462 def upsert_github_installation(
1463 self,
1464 *,
1465 external_installation_id: str,
1466 account_login: str | None,
1467 account_type: str | None,
1468 repository_selection: str,
1469 repositories: list[dict[str, Any]] | None = None,
1470 workspace_id: str | None = None,
1471 ) -> GitHubInstallation:
1472 target_workspace_id = workspace_id or settings.default_workspace_id
1473 with session_scope() as session:
1474 installation = session.scalar(
1475 select(GitHubInstallationRecord).where(
1476 GitHubInstallationRecord.installation_id == external_installation_id
1477 )
1478 )
1479 if installation is None:
1480 bootstrap_installation = (
1481 session.get(GitHubInstallationRecord, "github-install-documint")
1482 if target_workspace_id == settings.default_workspace_id
1483 else None
1484 )
1485 if bootstrap_installation is not None and (
1486 bootstrap_installation.installation_id is None
1487 or bootstrap_installation.installation_id == external_installation_id
1488 ):
1489 installation = bootstrap_installation
1490 else:
1491 installation = GitHubInstallationRecord(
1492 id=f"ghinst-{uuid4().hex[:12]}",
1493 workspace_id=target_workspace_id,
1494 installation_id=external_installation_id,
1495 )
1496 session.add(installation)
1497 elif (
1498 target_workspace_id == settings.default_workspace_id
1499 and installation.id != "github-install-documint"
1500 ):
1501 bootstrap_installation = session.get(
1502 GitHubInstallationRecord, "github-install-documint"
1503 )
1504 if bootstrap_installation is not None:
1505 installation = self._merge_installation_records(
1506 session,
1507 target=bootstrap_installation,
1508 source=installation,
1509 )
1510 installation.workspace_id = target_workspace_id
1511 installation.installation_id = external_installation_id
1512 installation.account_login = account_login
1513 installation.account_type = account_type
1514 installation.repository_selection = repository_selection
1515 installation.metadata_json = {
1516 **(installation.metadata_json or {}),
1517 "account_login": account_login,
1518 "account_type": account_type,
1519 }
1520 session.flush()
1521 if repositories is not None:
1522 session.execute(
1523 delete(GitHubRepositoryRecord).where(
1524 GitHubRepositoryRecord.github_installation_id == installation.id
1525 )
1526 )
1527 for repository in repositories:
1528 full_name = repository.get("full_name")
1529 owner_payload = repository.get("owner")
1530 owner_login = (
1531 owner_payload.get("login")
1532 if isinstance(owner_payload, dict)
1533 else None
1534 )
1535 name = repository.get("name")
1536 if not all(
1537 isinstance(item, str) and item
1538 for item in (full_name, owner_login, name)
1539 ):
1540 continue
1541 full_name_str = cast(str, full_name)
1542 owner_login_str = cast(str, owner_login)
1543 name_str = cast(str, name)
1544 session.add(
1545 GitHubRepositoryRecord(
1546 id=f"ghrepo-{uuid4().hex[:12]}",
1547 github_installation_id=installation.id,
1548 repository_id=(
1549 str(repository.get("id"))
1550 if repository.get("id")
1551 else None
1552 ),
1553 full_name=full_name_str.lower(),
1554 owner=owner_login_str,
1555 name=name_str,
1556 default_branch=str(
1557 repository.get("default_branch") or "main"
1558 ),
1559 visibility=str(
1560 repository.get("visibility")
1561 or (
1562 "private" if repository.get("private") else "public"
1563 )
1564 ),
1565 is_private=bool(repository.get("private")),
1566 is_archived=bool(repository.get("archived")),
1567 metadata_json={"html_url": repository.get("html_url")},
1568 )
1569 )
1570 return self._installation_model(installation)
1572 def ensure_clerk_user(self, claims: dict[str, Any]) -> User:
1573 external_id = claims.get("sub")
1574 if not isinstance(external_id, str) or not external_id:
1575 raise KeyError("Clerk token missing subject")
1576 email = claims.get("email")
1577 first_name = claims.get("first_name")
1578 last_name = claims.get("last_name")
1579 full_name = claims.get("name")
1580 resolved_name = (
1581 full_name
1582 if isinstance(full_name, str) and full_name.strip()
1583 else " ".join(
1584 item.strip()
1585 for item in (first_name, last_name)
1586 if isinstance(item, str) and item.strip()
1587 ).strip()
1588 or "Documint Operator"
1589 )
1590 with session_scope() as session:
1591 user = session.scalar(
1592 select(UserRecord).where(UserRecord.external_id == external_id)
1593 )
1594 if user is None:
1595 user = UserRecord(
1596 id=f"user-{uuid4().hex[:12]}",
1597 external_id=external_id,
1598 email=email if isinstance(email, str) else None,
1599 name=resolved_name,
1600 provider="clerk",
1601 )
1602 session.add(user)
1603 session.flush()
1604 self._assign_first_operator_workspace(session, user.id)
1605 else:
1606 user.email = email if isinstance(email, str) else user.email
1607 user.name = resolved_name
1608 user.provider = "clerk"
1609 session.flush()
1610 return self._user_model(user)
1612 def resolve_project_id_for_repository(
1613 self, repository_full_name: str
1614 ) -> str | None:
1615 normalized = repository_full_name.strip().lower()
1616 if not normalized or "/" not in normalized:
1617 return None
1618 owner, repo = normalized.split("/", 1)
1619 with session_scope() as session:
1620 source = session.scalar(
1621 select(RepositorySourceRecord).where(
1622 RepositorySourceRecord.owner == owner,
1623 RepositorySourceRecord.repo == repo,
1624 )
1625 )
1626 return source.project_id if source is not None else None
1628 def get_public_doc_page(
1629 self,
1630 workspace_slug: str,
1631 project_slug: str,
1632 page_path: str,
1633 ) -> PublicDocPage:
1634 normalized_path = page_path.strip("/") or "index"
1635 with session_scope() as session:
1636 page = session.scalar(
1637 select(PublishedPageRecord)
1638 .where(PublishedPageRecord.workspace_slug == workspace_slug)
1639 .where(PublishedPageRecord.project_slug == project_slug)
1640 .where(PublishedPageRecord.path == normalized_path)
1641 )
1642 if page is None:
1643 raise KeyError(
1644 f"Unknown published page: {workspace_slug}/{project_slug}/{normalized_path}"
1645 )
1646 return PublicDocPage(
1647 workspace_slug=workspace_slug,
1648 project_slug=project_slug,
1649 path=page.path,
1650 title=page.title,
1651 description=page.description,
1652 content_markdown=page.content_markdown,
1653 source_path=page.source_path,
1654 deployment_id=page.deployment_id,
1655 )
1657 def record_webhook_delivery(
1658 self,
1659 *,
1660 delivery_id: str | None,
1661 event_name: str,
1662 repository: str | None,
1663 action: str | None,
1664 ref: str | None,
1665 payload: dict[str, object],
1666 status: str,
1667 ) -> None:
1668 with session_scope() as session:
1669 existing = (
1670 session.scalar(
1671 select(GitHubWebhookDeliveryRecord).where(
1672 GitHubWebhookDeliveryRecord.delivery_id == delivery_id
1673 )
1674 )
1675 if delivery_id
1676 else None
1677 )
1678 if existing is None:
1679 existing = GitHubWebhookDeliveryRecord(
1680 id=f"ghd-{uuid4().hex[:12]}",
1681 delivery_id=delivery_id,
1682 event_name=event_name,
1683 repository=repository,
1684 action=action,
1685 ref=ref,
1686 payload=payload,
1687 status=status,
1688 processed_at=_now() if status != "received" else None,
1689 )
1690 session.add(existing)
1691 else:
1692 existing.status = status
1693 existing.event_name = event_name
1694 existing.repository = repository
1695 existing.action = action
1696 existing.ref = ref
1697 existing.payload = payload
1698 existing.processed_at = _now()
1700 def revalidate_project(
1701 self, project_id: str, deployment_id: str | None = None
1702 ) -> dict[str, object]:
1703 with session_scope() as session:
1704 project = session.get(ProjectRecord, project_id)
1705 if project is None:
1706 raise KeyError(f"Unknown project id: {project_id}")
1707 payload: dict[str, object] = {
1708 "project_id": project_id,
1709 "deployment_id": deployment_id,
1710 "status": "accepted",
1711 "revalidated_at": _now().isoformat(),
1712 "route": f"/p/{self._workspace_slug(session, project.workspace_id)}/{project.slug}/docs",
1713 }
1714 self._record_activity(
1715 session,
1716 project.workspace_id,
1717 project_id,
1718 "publish.revalidated",
1719 f"Revalidation requested for {project.name}",
1720 str(payload["route"]),
1721 payload,
1722 )
1723 return payload
1725 def _bootstrap_defaults(self) -> None:
1726 self._refresh_repo_state()
1727 config = self._load_repo_config()
1728 with session_scope() as session:
1729 if session.get(UserRecord, settings.default_user_id) is None:
1730 session.add(
1731 UserRecord(
1732 id=settings.default_user_id,
1733 email=settings.default_user_email,
1734 name=settings.default_user_name,
1735 provider="internal",
1736 )
1737 )
1738 workspace = session.get(WorkspaceRecord, settings.default_workspace_id)
1739 if workspace is None:
1740 workspace = WorkspaceRecord(
1741 id=settings.default_workspace_id,
1742 slug=settings.default_workspace_slug,
1743 name=settings.default_workspace_name,
1744 description="Bootstrap workspace used to dogfood Documint against itself.",
1745 )
1746 session.add(workspace)
1747 membership = session.scalar(
1748 select(WorkspaceMemberRecord).where(
1749 WorkspaceMemberRecord.workspace_id == workspace.id,
1750 WorkspaceMemberRecord.user_id == settings.default_user_id,
1751 )
1752 )
1753 if membership is None:
1754 session.add(
1755 WorkspaceMemberRecord(
1756 id=f"wm-{uuid4().hex[:12]}",
1757 workspace_id=workspace.id,
1758 user_id=settings.default_user_id,
1759 role="owner",
1760 )
1761 )
1762 installation = session.get(
1763 GitHubInstallationRecord, "github-install-documint"
1764 )
1765 duplicate_installation = (
1766 session.scalar(
1767 select(GitHubInstallationRecord).where(
1768 GitHubInstallationRecord.installation_id
1769 == settings.self_bootstrap_installation_id
1770 )
1771 )
1772 if settings.self_bootstrap_installation_id
1773 else None
1774 )
1775 if installation is None:
1776 installation = GitHubInstallationRecord(
1777 id="github-install-documint",
1778 workspace_id=workspace.id,
1779 installation_id=settings.self_bootstrap_installation_id,
1780 account_login=settings.project_owner,
1781 account_type="User",
1782 repository_selection="selected",
1783 metadata_json={
1784 "repository": f"{settings.project_owner}/{settings.project_repo}"
1785 },
1786 )
1787 session.add(installation)
1788 session.flush()
1789 if (
1790 duplicate_installation is not None
1791 and duplicate_installation.id != installation.id
1792 ):
1793 installation = self._merge_installation_records(
1794 session,
1795 target=installation,
1796 source=duplicate_installation,
1797 )
1798 else:
1799 installation.workspace_id = workspace.id
1800 installation.account_login = settings.project_owner
1801 installation.account_type = "User"
1802 installation.repository_selection = "selected"
1803 installation.metadata_json = {
1804 **(installation.metadata_json or {}),
1805 "repository": f"{settings.project_owner}/{settings.project_repo}",
1806 }
1807 installation.installation_id = settings.self_bootstrap_installation_id
1808 installation.workspace_id = workspace.id
1809 installation.account_login = settings.project_owner
1810 installation.account_type = "User"
1811 installation.repository_selection = "selected"
1812 installation.metadata_json = {
1813 **(installation.metadata_json or {}),
1814 "repository": f"{settings.project_owner}/{settings.project_repo}",
1815 }
1816 project = session.get(ProjectRecord, settings.project_id)
1817 if project is None:
1818 project = ProjectRecord(
1819 id=settings.project_id,
1820 workspace_id=workspace.id,
1821 github_installation_id="github-install-documint",
1822 name=config["project"]["name"],
1823 slug=config["project"]["slug"],
1824 description=config["project"]["description"],
1825 public_url=self._public_docs_base_url(
1826 workspace.slug, config["project"]["slug"]
1827 ),
1828 dashboard_url=f"{settings.public_base_url.rstrip('/')}/app/projects/{settings.project_id}",
1829 onboarding_status="connected",
1830 )
1831 session.add(project)
1832 source = session.get(
1833 RepositorySourceRecord, f"source-{settings.project_id}"
1834 )
1835 docs_root = str(config["docs"]["root"])
1836 if source is None:
1837 session.add(
1838 RepositorySourceRecord(
1839 id=f"source-{settings.project_id}",
1840 project_id=settings.project_id,
1841 provider="github",
1842 owner=settings.project_owner,
1843 repo=settings.project_repo,
1844 default_branch=settings.default_branch,
1845 local_path=str(self.repo_root),
1846 current_ref=self._head_ref,
1847 docs_root=docs_root,
1848 installation_id="github-install-documint",
1849 )
1850 )
1851 else:
1852 source.current_ref = self._head_ref
1853 source.docs_root = docs_root
1854 source.local_path = str(self.repo_root)
1855 repo_cache = session.scalar(
1856 select(GitHubRepositoryRecord).where(
1857 GitHubRepositoryRecord.github_installation_id
1858 == "github-install-documint",
1859 GitHubRepositoryRecord.full_name
1860 == f"{settings.project_owner}/{settings.project_repo}".lower(),
1861 )
1862 )
1863 if repo_cache is None:
1864 session.add(
1865 GitHubRepositoryRecord(
1866 id=f"ghrepo-{uuid4().hex[:12]}",
1867 github_installation_id="github-install-documint",
1868 repository_id=None,
1869 full_name=f"{settings.project_owner}/{settings.project_repo}".lower(),
1870 owner=settings.project_owner,
1871 name=settings.project_repo,
1872 default_branch=settings.default_branch,
1873 visibility="public",
1874 is_private=False,
1875 is_archived=False,
1876 metadata_json={
1877 "bootstrap": True,
1878 "html_url": f"https://github.com/{settings.project_owner}/{settings.project_repo}",
1879 },
1880 )
1881 )
1882 settings_record = session.get(
1883 ProjectSettingsRecord, f"settings-{settings.project_id}"
1884 )
1885 if settings_record is None:
1886 session.add(
1887 ProjectSettingsRecord(
1888 id=f"settings-{settings.project_id}",
1889 project_id=settings.project_id,
1890 docs_root=docs_root,
1891 config_version=int(config["version"]),
1892 config_json=config,
1893 ai_policy=dict(config["ai"]),
1894 publish_behavior=dict(config["publish"]),
1895 pr_behavior=dict(config["pull_requests"]),
1896 )
1897 )
1898 else:
1899 settings_record.docs_root = docs_root
1900 settings_record.config_version = int(config["version"])
1901 settings_record.config_json = config
1902 settings_record.ai_policy = dict(config["ai"])
1903 settings_record.publish_behavior = dict(config["publish"])
1904 settings_record.pr_behavior = dict(config["pull_requests"])
1905 self._upsert_artifact_definitions(session, settings.project_id, config)
1906 self._ensure_project_artifacts(
1907 session, settings.project_id, settings_record
1908 )
1910 def _merge_installation_records(
1911 self,
1912 session: Session,
1913 *,
1914 target: GitHubInstallationRecord,
1915 source: GitHubInstallationRecord,
1916 ) -> GitHubInstallationRecord:
1917 if target.id == source.id:
1918 return target
1919 source_installation_id = source.installation_id
1920 if source_installation_id and target.installation_id != source_installation_id:
1921 source.installation_id = None
1922 target.account_login = target.account_login or source.account_login
1923 target.account_type = target.account_type or source.account_type
1924 target.repository_selection = (
1925 target.repository_selection or source.repository_selection or "selected"
1926 )
1927 target.metadata_json = {
1928 **(source.metadata_json or {}),
1929 **(target.metadata_json or {}),
1930 }
1931 session.flush()
1932 for project in session.scalars(
1933 select(ProjectRecord).where(ProjectRecord.github_installation_id == source.id)
1934 ):
1935 project.github_installation_id = target.id
1936 for source_record in session.scalars(
1937 select(RepositorySourceRecord).where(
1938 RepositorySourceRecord.installation_id == source.id
1939 )
1940 ):
1941 source_record.installation_id = target.id
1942 existing_full_names = set(
1943 session.scalars(
1944 select(GitHubRepositoryRecord.full_name).where(
1945 GitHubRepositoryRecord.github_installation_id == target.id
1946 )
1947 )
1948 )
1949 for repository in session.scalars(
1950 select(GitHubRepositoryRecord).where(
1951 GitHubRepositoryRecord.github_installation_id == source.id
1952 )
1953 ):
1954 if repository.full_name in existing_full_names:
1955 session.delete(repository)
1956 continue
1957 repository.github_installation_id = target.id
1958 existing_full_names.add(repository.full_name)
1959 session.delete(source)
1960 session.flush()
1961 return target
1963 def _load_repo_config(self) -> dict[str, Any]:
1964 path = self.repo_root / settings.documint_config_filename
1965 if not path.exists() or yaml is None:
1966 return self._default_project_config()
1967 raw = yaml.safe_load(path.read_text(encoding="utf-8"))
1968 if not isinstance(raw, dict):
1969 return self._default_project_config()
1970 config = self._default_project_config()
1971 for key in ("version", "project", "docs", "publish", "pull_requests", "ai"):
1972 value = raw.get(key)
1973 if isinstance(value, dict):
1974 config[key].update(value)
1975 elif value is not None:
1976 config[key] = value
1977 artifacts = raw.get("artifacts")
1978 if isinstance(artifacts, list) and artifacts:
1979 config["artifacts"] = [
1980 artifact for artifact in artifacts if isinstance(artifact, dict)
1981 ]
1982 return config
1984 def _default_project_config(self) -> dict[str, Any]:
1985 return {
1986 "version": 1,
1987 "project": {
1988 "name": settings.project_name,
1989 "slug": settings.project_slug,
1990 "description": (
1991 "Verified, repo-native documentation operations for AI infra teams "
1992 "shipping APIs, SDKs, and MCP servers."
1993 ),
1994 },
1995 "docs": {"root": "content/docs"},
1996 "publish": {
1997 "public_subpath": True,
1998 "site_prefix": "/p",
1999 "revalidate_on_publish": True,
2000 },
2001 "pull_requests": {
2002 "branch_prefix": "documint",
2003 "strategy": "one_pr_per_artifact",
2004 "direct_default_branch_writes": False,
2005 },
2006 "ai": {
2007 "provider": "huggingface",
2008 "draft_policy": "on_demand",
2009 "review_policy": "human_required",
2010 },
2011 "artifacts": [
2012 {
2013 "id": spec.artifact_key,
2014 "slug": spec.slug,
2015 "title": spec.title,
2016 "type": spec.artifact_type.value,
2017 "summary": spec.summary,
2018 "doc_paths": list(spec.doc_paths),
2019 "source_patterns": list(spec.source_patterns),
2020 }
2021 for spec in DEFAULT_ARTIFACT_SPECS
2022 ],
2023 }
2025 def _artifact_specs_from_config(self, config: dict[str, Any]) -> list[ArtifactSpec]:
2026 raw_artifacts = config.get("artifacts")
2027 if not isinstance(raw_artifacts, list):
2028 raw_artifacts = []
2029 specs: list[ArtifactSpec] = []
2030 for item in raw_artifacts:
2031 if not isinstance(item, dict):
2032 continue
2033 artifact_type = item.get("type")
2034 artifact_id = item.get("id")
2035 slug = item.get("slug")
2036 title = item.get("title")
2037 summary = item.get("summary")
2038 doc_paths = item.get("doc_paths")
2039 source_patterns = item.get("source_patterns")
2040 source_paths = item.get("source_paths")
2041 if not all(
2042 isinstance(value, str)
2043 for value in (artifact_type, artifact_id, slug, title, summary)
2044 ):
2045 continue
2046 if not isinstance(doc_paths, list):
2047 continue
2048 try:
2049 artifact_key = cast(str, artifact_id)
2050 artifact_slug = cast(str, slug)
2051 artifact_title = cast(str, title)
2052 artifact_type_name = cast(str, artifact_type)
2053 artifact_summary = cast(str, summary)
2054 normalized_patterns: list[str] = []
2055 if isinstance(source_patterns, list):
2056 normalized_patterns.extend(
2057 pattern
2058 for pattern in source_patterns
2059 if isinstance(pattern, str) and pattern.strip()
2060 )
2061 if isinstance(source_paths, list):
2062 normalized_patterns.extend(
2063 path
2064 for path in source_paths
2065 if isinstance(path, str) and path.strip()
2066 )
2067 specs.append(
2068 ArtifactSpec(
2069 artifact_key=artifact_key,
2070 slug=artifact_slug,
2071 title=artifact_title,
2072 artifact_type=ArtifactType(artifact_type_name),
2073 summary=artifact_summary,
2074 doc_paths=tuple(
2075 path for path in doc_paths if isinstance(path, str)
2076 ),
2077 source_patterns=tuple(normalized_patterns),
2078 )
2079 )
2080 except ValueError:
2081 continue
2082 return specs or list(DEFAULT_ARTIFACT_SPECS)
2084 def _sync_project_runtime(self, session: Session, project: ProjectRecord) -> None:
2085 source = session.get(RepositorySourceRecord, f"source-{project.id}")
2086 if source is not None:
2087 source.current_ref = self._head_ref
2088 source.local_path = str(self.repo_root)
2090 def _upsert_artifact_definitions(
2091 self,
2092 session: Session,
2093 project_id: str,
2094 config: dict[str, object],
2095 ) -> None:
2096 specs = self._artifact_specs_from_config(config)
2097 existing = {
2098 item.artifact_key: item
2099 for item in session.scalars(
2100 select(ArtifactDefinitionRecord).where(
2101 ArtifactDefinitionRecord.project_id == project_id
2102 )
2103 ).all()
2104 }
2105 for spec in specs:
2106 record = existing.get(spec.artifact_key)
2107 record_id = f"{project_id}:{spec.artifact_key}"
2108 if record is None:
2109 session.add(
2110 ArtifactDefinitionRecord(
2111 id=record_id,
2112 project_id=project_id,
2113 artifact_key=spec.artifact_key,
2114 slug=spec.slug,
2115 title=spec.title,
2116 artifact_type=spec.artifact_type.value,
2117 summary=spec.summary,
2118 doc_paths=list(spec.doc_paths),
2119 source_patterns=list(spec.source_patterns),
2120 )
2121 )
2122 continue
2123 record.slug = spec.slug
2124 record.title = spec.title
2125 record.artifact_type = spec.artifact_type.value
2126 record.summary = spec.summary
2127 record.doc_paths = list(spec.doc_paths)
2128 record.source_patterns = list(spec.source_patterns)
2130 def _ensure_project_artifacts(
2131 self,
2132 session: Session,
2133 project_id: str,
2134 settings_record: ProjectSettingsRecord | None,
2135 ) -> None:
2136 config = (
2137 settings_record.config_json
2138 if settings_record is not None
2139 else self._load_repo_config()
2140 )
2141 self._upsert_artifact_definitions(session, project_id, config)
2142 definitions = session.scalars(
2143 select(ArtifactDefinitionRecord).where(
2144 ArtifactDefinitionRecord.project_id == project_id
2145 )
2146 ).all()
2147 for definition in definitions:
2148 trace = self._build_artifact_trace(definition)
2149 record = session.get(
2150 ArtifactTraceRecord, f"{project_id}:{definition.artifact_key}"
2151 )
2152 if record is None:
2153 session.add(
2154 ArtifactTraceRecord(
2155 id=f"{project_id}:{definition.artifact_key}",
2156 project_id=project_id,
2157 artifact_definition_id=definition.id,
2158 doc_paths=trace.doc_paths,
2159 source_paths=trace.source_paths,
2160 latest_source_revision=_revision_to_payload(
2161 trace.latest_source_revision
2162 ),
2163 latest_doc_revision=_revision_to_payload(
2164 trace.latest_doc_revision
2165 ),
2166 verification_status=trace.verification_status.value,
2167 )
2168 )
2169 continue
2170 record.doc_paths = trace.doc_paths
2171 record.source_paths = trace.source_paths
2172 record.latest_source_revision = _revision_to_payload(
2173 trace.latest_source_revision
2174 )
2175 record.latest_doc_revision = _revision_to_payload(trace.latest_doc_revision)
2176 record.verification_status = trace.verification_status.value
2178 def _artifact_traces_for_project(
2179 self, session: Session, project_id: str
2180 ) -> list[ArtifactTrace]:
2181 definitions = {
2182 item.id: item
2183 for item in session.scalars(
2184 select(ArtifactDefinitionRecord).where(
2185 ArtifactDefinitionRecord.project_id == project_id
2186 )
2187 ).all()
2188 }
2189 traces = session.scalars(
2190 select(ArtifactTraceRecord)
2191 .where(ArtifactTraceRecord.project_id == project_id)
2192 .order_by(ArtifactTraceRecord.id.asc())
2193 ).all()
2194 return [
2195 self._trace_model(item, definitions[item.artifact_definition_id])
2196 for item in traces
2197 if item.artifact_definition_id in definitions
2198 ]
2200 def _build_artifact_trace(
2201 self, definition: ArtifactDefinitionRecord
2202 ) -> ArtifactTrace:
2203 source_paths = self._resolve_patterns(definition.source_patterns)
2204 doc_paths = [
2205 path for path in definition.doc_paths if (self.repo_root / path).exists()
2206 ]
2207 latest_source = self._latest_revision(source_paths)
2208 latest_doc = self._latest_revision(doc_paths)
2210 if not doc_paths:
2211 status = VerificationStatus.MISSING
2212 elif latest_source and (
2213 latest_doc is None or latest_source.touched_at > latest_doc.touched_at
2214 ):
2215 status = VerificationStatus.STALE
2216 else:
2217 status = VerificationStatus.VERIFIED
2219 return ArtifactTrace(
2220 id=definition.artifact_key,
2221 slug=definition.slug,
2222 title=definition.title,
2223 artifact_type=ArtifactType(definition.artifact_type),
2224 summary=definition.summary,
2225 doc_paths=doc_paths,
2226 source_paths=source_paths,
2227 latest_source_revision=latest_source,
2228 latest_doc_revision=latest_doc,
2229 verification_status=status,
2230 )
2232 def _compute_findings(
2233 self,
2234 session: Session,
2235 project_id: str,
2236 traces: list[ArtifactTrace],
2237 ) -> list[DriftFinding]:
2238 findings: list[DriftFinding] = []
2239 now = _now()
2240 drift_engine = get_drift_engine()
2242 # Collect per-artifact drift results for cascade detection
2243 drift_results: dict[str, DriftResult] = {}
2244 # Load all definition records once so we can read/write symbol_hash/symbols_json
2245 definition_records: dict[str, ArtifactDefinitionRecord] = {
2246 rec.artifact_key: rec
2247 for rec in session.scalars(
2248 select(ArtifactDefinitionRecord).where(
2249 ArtifactDefinitionRecord.project_id == project_id
2250 )
2251 ).all()
2252 }
2254 for trace in traces:
2255 # --- Semantic drift check via DriftEngine ---
2256 definition = definition_records.get(trace.id)
2257 if definition is not None and trace.source_paths:
2258 # Read source file contents for this artifact
2259 source_contents: dict[str, str] = {}
2260 for rel_path in trace.source_paths:
2261 abs_path = self.repo_root / rel_path
2262 if abs_path.is_file():
2263 try:
2264 source_contents[rel_path] = abs_path.read_text(
2265 encoding="utf-8", errors="replace"
2266 )
2267 except OSError:
2268 pass
2270 if source_contents:
2271 drift_result = drift_engine.check(
2272 artifact_key=trace.id,
2273 source_contents=source_contents,
2274 stored_hash=definition.symbol_hash,
2275 stored_symbols_json=definition.symbols_json,
2276 )
2277 # Persist updated hash and symbols back to DB
2278 definition.symbol_hash = drift_result.new_symbol_hash
2279 definition.symbols_json = drift_engine.symbols_to_json(
2280 drift_result.new_symbols
2281 )
2282 drift_results[trace.id] = drift_result
2284 # --- Index symbols in the knowledge graph (best-effort) ---
2285 try:
2286 sgraph = get_symbol_graph()
2287 sgraph.index_artifact(
2288 artifact_id=trace.id,
2289 project_id=project_id,
2290 symbols=[
2291 s.to_lsif_compact() for s in drift_result.new_symbols
2292 ],
2293 )
2294 except Exception:
2295 logger.warning(
2296 "symbol_graph_index_failed",
2297 artifact=trace.id,
2298 exc_info=True,
2299 )
2301 if drift_result.is_stale:
2302 severity = (
2303 FindingSeverity.HIGH
2304 if drift_result.diff.has_breaking_changes
2305 else FindingSeverity.MEDIUM
2306 )
2307 findings.append(
2308 DriftFinding(
2309 id=f"finding-{project_id}-{trace.id}",
2310 project_id=project_id,
2311 artifact_id=trace.id,
2312 artifact_type=trace.artifact_type,
2313 severity=severity,
2314 summary=drift_result.finding_summary(),
2315 rationale=(
2316 "Structural symbol changes detected: exported symbols"
2317 " were added, removed, or modified."
2318 ),
2319 source_paths=trace.source_paths,
2320 doc_paths=trace.doc_paths,
2321 suggested_actions=[
2322 "Review the latest source diff and capture any interface changes.",
2323 "Update the target markdown page with fresh examples, links, and API details.",
2324 "Re-run drift and publish a preview before opening the docs PR.",
2325 ],
2326 source_revision=trace.latest_source_revision,
2327 doc_revision=trace.latest_doc_revision,
2328 status="open",
2329 created_at=now,
2330 updated_at=now,
2331 changed_symbols=(
2332 [c.to_dict() for c in drift_result.diff.changes]
2333 if drift_result.diff
2334 else []
2335 ),
2336 symbol_hash=drift_result.new_symbol_hash,
2337 confidence_score=(
2338 drift_result.diff.confidence_score()
2339 if drift_result.diff
2340 else 0.5
2341 ),
2342 has_breaking_changes=(
2343 drift_result.diff.has_breaking_changes
2344 if drift_result.diff
2345 else False
2346 ),
2347 )
2348 )
2350 # --- Cross-artifact impact via symbol graph (best-effort) ---
2351 try:
2352 changed_names = drift_result.diff.changed_symbol_names
2353 if changed_names:
2354 sgraph = get_symbol_graph()
2355 affected = sgraph.find_affected_artifacts(
2356 changed_names, project_id
2357 )
2358 # Keep only OTHER artifacts (exclude the current one)
2359 cross_refs = {
2360 aid: syms
2361 for aid, syms in affected.items()
2362 if aid != trace.id
2363 }
2364 if cross_refs:
2365 findings[-1].cross_artifact_refs = cross_refs
2366 logger.info(
2367 "cross_artifact_drift_detected",
2368 source_artifact=trace.id,
2369 affected_artifacts=list(cross_refs.keys()),
2370 affected_symbols={
2371 aid: syms for aid, syms in cross_refs.items()
2372 },
2373 )
2374 except Exception:
2375 logger.warning(
2376 "symbol_graph_cross_artifact_failed",
2377 artifact=trace.id,
2378 exc_info=True,
2379 )
2381 # Skip the timestamp-based check for this trace since we have semantic data
2382 continue
2384 # --- Fallback: timestamp-based drift detection (no source files or no definition) ---
2385 if trace.verification_status == VerificationStatus.VERIFIED:
2386 continue
2387 severity = (
2388 FindingSeverity.HIGH
2389 if trace.verification_status == VerificationStatus.MISSING
2390 else FindingSeverity.MEDIUM
2391 )
2392 findings.append(
2393 DriftFinding(
2394 id=f"finding-{project_id}-{trace.id}",
2395 project_id=project_id,
2396 artifact_id=trace.id,
2397 artifact_type=trace.artifact_type,
2398 severity=severity,
2399 summary=f"{trace.title} needs refresh before the next publish.",
2400 rationale=(
2401 "Source files changed after the documentation artifact was last verified."
2402 if trace.verification_status == VerificationStatus.STALE
2403 else "The expected documentation page does not exist yet."
2404 ),
2405 source_paths=trace.source_paths,
2406 doc_paths=trace.doc_paths,
2407 suggested_actions=[
2408 "Review the latest source diff and capture any interface changes.",
2409 "Update the target markdown page with fresh examples, links, and API details.",
2410 "Re-run drift and publish a preview before opening the docs PR.",
2411 ],
2412 source_revision=trace.latest_source_revision,
2413 doc_revision=trace.latest_doc_revision,
2414 status="open",
2415 created_at=now,
2416 updated_at=now,
2417 )
2418 )
2420 # --- Cascade detection: find cross-artifact ripple effects ---
2421 # Build a map of changed symbol names per artifact from drift results
2422 all_changed_symbols: dict[str, list[str]] = {}
2423 for artifact_key, dr in drift_results.items():
2424 if dr.is_stale and dr.diff and dr.diff.changes:
2425 all_changed_symbols[artifact_key] = dr.diff.changed_symbol_names
2427 if all_changed_symbols:
2428 # Build MintDocument stubs from artifact definitions for cascade text search.
2429 # Each stub needs:
2430 # - symbols: list with a sentinel entry so _artifact_key() returns the key
2431 # - narrative: the summary text to search for symbol mentions
2432 mint_stubs: list[MintDocument] = []
2433 for rec in definition_records.values():
2434 sentinel: dict[str, object] = {
2435 "k": "__artifact__",
2436 "n": rec.title,
2437 "artifact_key": rec.artifact_key,
2438 "artifact_type": rec.artifact_type,
2439 }
2440 stub = MintDocument(
2441 source_files=list(rec.source_patterns),
2442 symbols=[sentinel],
2443 narrative=rec.summary,
2444 )
2445 mint_stubs.append(stub)
2447 cascade_finding_ids: set[str] = set()
2448 for source_key, symbol_names in all_changed_symbols.items():
2449 cascades = find_cascades(
2450 changed_symbol_names=symbol_names,
2451 source_artifact_key=source_key,
2452 all_artifacts=mint_stubs,
2453 )
2454 for cascade in cascades:
2455 if cascade.confidence not in ("HIGH", "MEDIUM"):
2456 continue
2457 cascade_id = f"cascade-{project_id}-{source_key}-{cascade.affected_artifact_key}"
2458 if cascade_id in cascade_finding_ids:
2459 continue
2460 cascade_finding_ids.add(cascade_id)
2461 # Determine artifact type for the affected artifact
2462 affected_def = definition_records.get(cascade.affected_artifact_key)
2463 affected_type = (
2464 ArtifactType(affected_def.artifact_type)
2465 if affected_def
2466 else ArtifactType.API_REFERENCE
2467 )
2468 findings.append(
2469 DriftFinding(
2470 id=cascade_id,
2471 project_id=project_id,
2472 artifact_id=cascade.affected_artifact_key,
2473 artifact_type=affected_type,
2474 severity=FindingSeverity.MEDIUM,
2475 summary=(
2476 f"Cascade: symbols from {cascade.source_artifact_key}"
2477 f" referenced in {cascade.affected_artifact_key}"
2478 ),
2479 rationale=(
2480 f"Changed symbols ({', '.join(cascade.affected_symbol_names)})"
2481 f" from {cascade.source_artifact_key} appear in this artifact's"
2482 " narrative or API schema."
2483 ),
2484 source_paths=[],
2485 doc_paths=[],
2486 suggested_actions=[
2487 "Review the affected sections for stale references.",
2488 "Update any code examples or descriptions that mention the changed symbols.",
2489 ],
2490 status="open",
2491 created_at=now,
2492 updated_at=now,
2493 changed_symbols=[
2494 {"name": s, "kind": "cascade"}
2495 for s in cascade.affected_symbol_names
2496 ],
2497 confidence_score=0.8 if cascade.confidence == "HIGH" else 0.5,
2498 )
2499 )
2501 return findings
2503 def _filter_findings(
2504 self,
2505 findings: list[DriftFinding],
2506 changed_files: list[str],
2507 ) -> list[DriftFinding]:
2508 if not changed_files:
2509 return findings
2510 changed = {path.strip() for path in changed_files if path.strip()}
2511 filtered: list[DriftFinding] = []
2512 for finding in findings:
2513 matching_sources = [path for path in finding.source_paths if path in changed]
2514 matching_docs = [path for path in finding.doc_paths if path in changed]
2515 if not matching_sources and not matching_docs:
2516 continue
2517 filtered.append(
2518 finding.model_copy(
2519 update={
2520 "source_paths": matching_sources or finding.source_paths,
2521 "doc_paths": matching_docs or finding.doc_paths,
2522 "rationale": (
2523 f"{finding.rationale} Relevant changed files: "
2524 + ", ".join(matching_sources + matching_docs)
2525 )[:1000],
2526 }
2527 )
2528 )
2529 return filtered
2531 def _latest_run(self, session: Session, project_id: str) -> VerificationRun | None:
2532 record = session.scalar(
2533 select(VerificationRunRecord)
2534 .where(VerificationRunRecord.project_id == project_id)
2535 .order_by(desc(VerificationRunRecord.started_at))
2536 )
2537 if record is None:
2538 return None
2539 signal_record = session.get(SourceSignalRecord, record.signal_id)
2540 if signal_record is None:
2541 return None
2542 findings = [
2543 self._finding_model(item)
2544 for item in session.scalars(
2545 select(DriftFindingRecord)
2546 .where(DriftFindingRecord.verification_run_id == record.id)
2547 .order_by(desc(DriftFindingRecord.created_at))
2548 ).all()
2549 ]
2550 return VerificationRun(
2551 id=record.id,
2552 project_id=record.project_id,
2553 status=JobStatus(record.status),
2554 signal=SourceSignal(
2555 id=signal_record.id,
2556 type=SourceSignalType(signal_record.signal_type),
2557 ref=signal_record.ref,
2558 changed_files=list(signal_record.changed_files),
2559 created_at=signal_record.created_at,
2560 ),
2561 findings_count=record.findings_count,
2562 findings=findings,
2563 started_at=record.started_at,
2564 completed_at=record.completed_at,
2565 )
2567 def _latest_deployment(
2568 self, session: Session, project_id: str
2569 ) -> PublishDeployment | None:
2570 record = session.scalar(
2571 select(PublishDeploymentRecord)
2572 .where(PublishDeploymentRecord.project_id == project_id)
2573 .order_by(desc(PublishDeploymentRecord.generated_at))
2574 )
2575 return self._publish_model(record) if record is not None else None
2577 def _project_docs_root(self, session: Session, project_id: str) -> Path:
2578 settings_record = session.get(ProjectSettingsRecord, f"settings-{project_id}")
2579 docs_root = (
2580 settings_record.docs_root
2581 if settings_record is not None
2582 else str(settings.docs_content_path.relative_to(self.repo_root))
2583 )
2584 return (self.repo_root / docs_root).resolve()
2586 def _publish_pages_for_project(
2587 self,
2588 session: Session,
2589 workspace_slug: str,
2590 project_slug: str,
2591 project_id: str,
2592 deployment_id: str,
2593 docs_root: Path,
2594 ) -> list[PublishedPage]:
2595 pages: list[PublishedPage] = []
2596 session.execute(
2597 delete(PublishedPageRecord).where(
2598 PublishedPageRecord.project_id == project_id
2599 )
2600 )
2601 for markdown_path in sorted(docs_root.rglob("*.md")):
2602 relative_source = str(markdown_path.relative_to(self.repo_root))
2603 raw = markdown_path.read_text(encoding="utf-8")
2604 metadata, body = _parse_frontmatter(raw)
2605 relative_doc = markdown_path.relative_to(docs_root)
2606 path_slug = str(relative_doc.with_suffix("")).replace("\\", "/")
2607 page_path = "index" if path_slug == "index" else path_slug
2608 title = metadata.get("title") or relative_doc.stem.replace("-", " ").title()
2609 description = metadata.get("description") or ""
2610 record = PublishedPageRecord(
2611 id=f"page-{uuid4().hex[:12]}",
2612 project_id=project_id,
2613 deployment_id=deployment_id,
2614 workspace_slug=workspace_slug,
2615 project_slug=project_slug,
2616 path=page_path,
2617 title=title,
2618 description=description,
2619 content_markdown=body,
2620 search_body=f"{title}\n{description}\n{body}",
2621 source_path=relative_source,
2622 )
2623 session.add(record)
2624 session.flush()
2625 pages.append(self._published_page_model(record))
2626 if not any(page.path == "index" for page in pages):
2627 title = f"{project_slug.replace('-', ' ').title()} Docs"
2628 summary = (
2629 "Published documentation generated by Documint for the latest "
2630 "successful deployment."
2631 )
2632 links = "\n".join(
2633 f"- [{page.title}](./{page.path})"
2634 for page in pages
2635 if page.path != "index"
2636 )
2637 body = "\n".join(
2638 line
2639 for line in (
2640 f"# {title}",
2641 "",
2642 summary,
2643 "",
2644 "## Pages",
2645 "",
2646 links or "- No published pages are available yet.",
2647 )
2648 if line is not None
2649 )
2650 record = PublishedPageRecord(
2651 id=f"page-{uuid4().hex[:12]}",
2652 project_id=project_id,
2653 deployment_id=deployment_id,
2654 workspace_slug=workspace_slug,
2655 project_slug=project_slug,
2656 path="index",
2657 title=title,
2658 description=summary,
2659 content_markdown=body,
2660 search_body=f"{title}\n{summary}\n{body}",
2661 source_path=str(docs_root.relative_to(self.repo_root)),
2662 )
2663 session.add(record)
2664 session.flush()
2665 pages.append(self._published_page_model(record))
2666 return pages
2668 def _store_patch(
2669 self,
2670 session: Session,
2671 project_id: str,
2672 artifact_id: str,
2673 finding_id: str | None,
2674 target_path: str,
2675 result: DraftPatchResult,
2676 ) -> Any:
2677 patch_id = f"patch-{uuid4().hex[:12]}"
2678 patch_record = DocPatchRecord(
2679 id=patch_id,
2680 project_id=project_id,
2681 finding_id=finding_id,
2682 artifact_key=artifact_id,
2683 target_path=target_path,
2684 summary=result.summary,
2685 rationale=result.rationale,
2686 proposed_sections=result.proposed_sections,
2687 citations=[
2688 citation.model_dump(mode="json") for citation in result.citations
2689 ],
2690 preview_markdown=result.preview_markdown,
2691 ai_provider=result.ai_provider,
2692 model_name=result.model_name,
2693 chain_steps_used=result.chain_steps_used,
2694 confidence_score=result.confidence_score,
2695 status=self._patch_status_for_result(result),
2696 )
2697 session.add(patch_record)
2698 session.add(
2699 AgentRunRecord(
2700 id=f"agent-{uuid4().hex[:12]}",
2701 project_id=project_id,
2702 kind="patch_generation",
2703 provider=result.ai_provider,
2704 model=result.model_name,
2705 status="completed",
2706 input_summary=result.input_summary,
2707 output_summary=result.summary,
2708 metadata_json={"artifact_id": artifact_id, "finding_id": finding_id},
2709 created_at=_now(),
2710 completed_at=_now(),
2711 )
2712 )
2713 session.flush()
2714 return patch_record
2716 def _patch_status_for_result(self, result: DraftPatchResult) -> str:
2717 if not result.citations or not result.preview_markdown.strip():
2718 return "blocked"
2719 return "reviewable"
2721 def _patch_review_error(self, patch: DocPatchRecord) -> str | None:
2722 if not patch.citations:
2723 return "Patch draft is blocked because it has no source citations."
2724 if not patch.preview_markdown.strip():
2725 return "Patch draft is blocked because it has no markdown preview."
2726 return None
2728 def _record_activity(
2729 self,
2730 session: Session,
2731 workspace_id: str,
2732 project_id: str | None,
2733 kind: str,
2734 title: str,
2735 body: str,
2736 metadata_json: dict[str, object],
2737 ) -> None:
2738 session.add(
2739 ActivityEventRecord(
2740 id=f"evt-{uuid4().hex[:12]}",
2741 workspace_id=workspace_id,
2742 project_id=project_id,
2743 kind=kind,
2744 title=title,
2745 body=body,
2746 metadata_json=metadata_json,
2747 )
2748 )
2750 def _workspace_records_for_user(
2751 self, session: Session, user_id: str | None
2752 ) -> list[WorkspaceRecord]:
2753 if user_id is None:
2754 return list(
2755 session.scalars(
2756 select(WorkspaceRecord).order_by(WorkspaceRecord.created_at.asc())
2757 ).all()
2758 )
2759 return list(
2760 session.scalars(
2761 select(WorkspaceRecord)
2762 .join(
2763 WorkspaceMemberRecord,
2764 WorkspaceMemberRecord.workspace_id == WorkspaceRecord.id,
2765 )
2766 .where(WorkspaceMemberRecord.user_id == user_id)
2767 .order_by(WorkspaceRecord.created_at.asc())
2768 ).all()
2769 )
2771 def _require_workspace_access(
2772 self, session: Session, workspace_id: str, user_id: str | None
2773 ) -> None:
2774 if user_id is None:
2775 return
2776 membership = session.scalar(
2777 select(WorkspaceMemberRecord).where(
2778 WorkspaceMemberRecord.workspace_id == workspace_id,
2779 WorkspaceMemberRecord.user_id == user_id,
2780 )
2781 )
2782 if membership is None:
2783 raise PermissionError(
2784 f"User {user_id} does not belong to workspace {workspace_id}"
2785 )
2787 def _require_project_access(
2788 self, session: Session, project_id: str, user_id: str | None
2789 ) -> None:
2790 if user_id is None:
2791 return
2792 project = session.get(ProjectRecord, project_id)
2793 if project is None:
2794 raise KeyError(f"Unknown project id: {project_id}")
2795 self._require_workspace_access(session, project.workspace_id, user_id)
2797 def _assign_first_operator_workspace(self, session: Session, user_id: str) -> None:
2798 existing_membership = session.scalar(
2799 select(WorkspaceMemberRecord).where(
2800 WorkspaceMemberRecord.user_id == user_id
2801 )
2802 )
2803 if existing_membership is not None:
2804 return
2805 existing_clerk_memberships = session.scalar(
2806 select(WorkspaceMemberRecord)
2807 .join(UserRecord, UserRecord.id == WorkspaceMemberRecord.user_id)
2808 .where(UserRecord.provider == "clerk")
2809 )
2810 if existing_clerk_memberships is not None:
2811 return
2812 workspace = session.get(WorkspaceRecord, settings.default_workspace_id)
2813 if workspace is None:
2814 return
2815 session.add(
2816 WorkspaceMemberRecord(
2817 id=f"wm-{uuid4().hex[:12]}",
2818 workspace_id=workspace.id,
2819 user_id=user_id,
2820 role="owner",
2821 )
2822 )
2824 def _run_frontend_revalidation_job(
2825 self, project_id: str, deployment_id: str | None = None
2826 ) -> QueuedJob | None:
2827 if (
2828 not settings.frontend_revalidate_url
2829 or not settings.frontend_revalidate_secret
2830 ):
2831 return None
2832 job = self.create_job(
2833 job_kind="frontend_revalidate",
2834 project_id=project_id,
2835 payload_json={"deployment_id": deployment_id},
2836 user_id=None,
2837 )
2838 self.mark_job_running(job.job_id)
2839 try:
2840 with httpx.Client(timeout=10.0) as client:
2841 response = client.post(
2842 settings.frontend_revalidate_url,
2843 headers={
2844 "X-Documint-Internal": settings.frontend_revalidate_secret
2845 },
2846 json={"project_id": project_id, "deployment_id": deployment_id},
2847 )
2848 response.raise_for_status()
2849 except httpx.HTTPError as exc:
2850 failed_job = self.mark_job_failed(
2851 job.job_id,
2852 error_summary=f"Frontend revalidation failed: {exc}",
2853 result_json={"deployment_id": deployment_id},
2854 )
2855 with session_scope() as session:
2856 project = session.get(ProjectRecord, project_id)
2857 if project is not None:
2858 self._record_activity(
2859 session,
2860 project.workspace_id,
2861 project_id,
2862 "publish.revalidation.failed",
2863 f"Frontend revalidation failed for {project.name}",
2864 failed_job.error_summary or "Frontend revalidation failed.",
2865 {"job_id": failed_job.job_id, "deployment_id": deployment_id},
2866 )
2867 return failed_job
2868 return self.mark_job_completed(
2869 job.job_id,
2870 resource_type="publish_deployment",
2871 resource_id=deployment_id,
2872 result_summary="Frontend cache refreshed.",
2873 result_json={"deployment_id": deployment_id},
2874 )
2876 def _public_docs_base_url(self, workspace_slug: str, project_slug: str) -> str:
2877 return f"{settings.public_base_url.rstrip('/')}/p/{workspace_slug}/{project_slug}/docs"
2879 def _workspace_slug(self, session: Session, workspace_id: str) -> str:
2880 workspace = session.get(WorkspaceRecord, workspace_id)
2881 if workspace is None:
2882 return settings.default_workspace_slug
2883 return workspace.slug
2885 def _read_doc_path(self, relative_path: str) -> str:
2886 absolute_path = self.repo_root / relative_path
2887 if not absolute_path.exists():
2888 return ""
2889 return absolute_path.read_text(encoding="utf-8")
2891 def _read_source_content(self, source_paths: list[str], max_chars: int = 4000) -> str:
2892 """Read source files and return concatenated content for AI context, capped at max_chars."""
2893 parts: list[str] = []
2894 remaining = max_chars
2895 for path in source_paths[:5]:
2896 absolute_path = self.repo_root / path
2897 if not absolute_path.exists():
2898 continue
2899 try:
2900 text = absolute_path.read_text(encoding="utf-8", errors="replace")
2901 except OSError:
2902 continue
2903 snippet = text[:remaining]
2904 parts.append(f"### {path}\n```\n{snippet}\n```")
2905 remaining -= len(snippet)
2906 if remaining <= 0:
2907 break
2908 return "\n\n".join(parts)
2910 def _resolve_patterns(self, patterns: Iterable[str]) -> list[str]:
2911 matches: set[str] = set()
2912 for pattern in patterns:
2913 absolute_pattern = self.repo_root / pattern
2914 for match in glob.glob(str(absolute_pattern), recursive=True):
2915 path = Path(match)
2916 relative_path = path.relative_to(self.repo_root)
2917 if path.is_file() and self._should_track_relative_path(relative_path):
2918 matches.add(str(relative_path))
2919 exact = self.repo_root / pattern
2920 if exact.is_file() and self._should_track_relative_path(
2921 exact.relative_to(self.repo_root)
2922 ):
2923 matches.add(str(exact.relative_to(self.repo_root)))
2924 return sorted(matches)
2926 def _latest_revision(self, paths: Iterable[str]) -> RepositoryRevision | None:
2927 normalized_paths = [
2928 path
2929 for path in paths
2930 if (self.repo_root / path).exists()
2931 and self._should_track_relative_path(Path(path))
2932 ]
2933 if not normalized_paths:
2934 return None
2935 candidates: list[RepositoryRevision] = []
2936 dirty_paths = [path for path in normalized_paths if path in self._dirty_paths]
2937 clean_paths = [
2938 path for path in normalized_paths if path not in self._dirty_paths
2939 ]
2940 if dirty_paths:
2941 latest_dirty = max(
2942 dirty_paths,
2943 key=lambda path: (self.repo_root / path).stat().st_mtime,
2944 )
2945 candidates.append(
2946 RepositoryRevision(
2947 ref="WORKTREE",
2948 touched_at=_utc_from_timestamp(
2949 (self.repo_root / latest_dirty).stat().st_mtime
2950 ),
2951 committed=False,
2952 )
2953 )
2954 if clean_paths:
2955 result = self._run_git("log", "-1", "--format=%H|%ct", "--", *clean_paths)
2956 if result:
2957 commit_ref, _, timestamp = result.partition("|")
2958 if commit_ref and timestamp:
2959 candidates.append(
2960 RepositoryRevision(
2961 ref=commit_ref,
2962 touched_at=_utc_from_timestamp(int(timestamp)),
2963 committed=True,
2964 )
2965 )
2966 else:
2967 latest_clean = max(
2968 clean_paths,
2969 key=lambda path: (self.repo_root / path).stat().st_mtime,
2970 )
2971 candidates.append(
2972 RepositoryRevision(
2973 ref=(
2974 self._head_ref
2975 if self._head_ref_source in {"git", "env"}
2976 else "FILESYSTEM"
2977 ),
2978 touched_at=_utc_from_timestamp(
2979 (self.repo_root / latest_clean).stat().st_mtime
2980 ),
2981 committed=self._head_ref_source in {"git", "env"},
2982 )
2983 )
2984 if not candidates:
2985 return None
2986 return max(candidates, key=lambda item: item.touched_at)
2988 def _worktree_changed_files(self) -> list[str]:
2989 if self._dirty_paths:
2990 return sorted(self._dirty_paths)
2991 changed_files = self._read_worktree_changed_files()
2992 self._dirty_paths = set(changed_files)
2993 return changed_files
2995 def _refresh_repo_state(self) -> None:
2996 self._dirty_paths = set(self._read_worktree_changed_files())
2997 head = self._run_git("rev-parse", "--short", "HEAD")
2998 if head:
2999 clean_ref = head
3000 self._head_ref_source = "git"
3001 elif settings.deploy_commit_ref:
3002 clean_ref = settings.deploy_commit_ref[:12]
3003 self._head_ref_source = "env"
3004 else:
3005 clean_ref = "HEAD"
3006 self._head_ref_source = "unknown"
3007 if self._dirty_paths:
3008 self._head_ref = (
3009 f"{clean_ref}+WORKTREE" if clean_ref != "HEAD" else "WORKTREE"
3010 )
3011 else:
3012 self._head_ref = clean_ref
3014 def _read_worktree_changed_files(self) -> list[str]:
3015 result = self._run_git("status", "--porcelain")
3016 changed_files: list[str] = []
3017 for line in result.splitlines():
3018 if not line.strip():
3019 continue
3020 candidate = line[3:].strip()
3021 if " -> " in candidate:
3022 candidate = candidate.split(" -> ", 1)[1]
3023 relative_path = Path(candidate)
3024 if self._should_track_relative_path(relative_path):
3025 changed_files.append(candidate)
3026 return changed_files
3028 def _should_track_relative_path(self, relative_path: Path) -> bool:
3029 return not (
3030 any(part in IGNORED_PATH_PARTS for part in relative_path.parts)
3031 or relative_path.suffix in IGNORED_SUFFIXES
3032 )
3034 def _run_git(self, *args: str) -> str:
3035 if self._git_executable is None:
3036 return ""
3037 completed = subprocess.run( # noqa: S603 - internal git invocation only.
3038 [self._git_executable, "-C", str(self.repo_root), *args],
3039 check=False,
3040 capture_output=True,
3041 text=True,
3042 )
3043 if completed.returncode != 0:
3044 return ""
3045 return completed.stdout.strip()
3047 def _user_model(self, record: UserRecord) -> User:
3048 return User(
3049 id=record.id,
3050 external_id=record.external_id,
3051 email=record.email,
3052 name=record.name,
3053 provider=record.provider,
3054 created_at=record.created_at,
3055 )
3057 def _workspace_model(self, record: WorkspaceRecord) -> Workspace:
3058 return Workspace(
3059 id=record.id,
3060 slug=record.slug,
3061 name=record.name,
3062 description=record.description,
3063 created_at=record.created_at,
3064 )
3066 def _source_model(self, record: RepositorySourceRecord) -> RepositorySource:
3067 return RepositorySource(
3068 id=record.id,
3069 project_id=record.project_id,
3070 provider=record.provider,
3071 owner=record.owner,
3072 repo=record.repo,
3073 default_branch=record.default_branch,
3074 local_path=record.local_path,
3075 current_ref=record.current_ref,
3076 docs_root=record.docs_root,
3077 installation_id=record.installation_id,
3078 )
3080 def _installation_model(
3081 self, record: GitHubInstallationRecord
3082 ) -> GitHubInstallation:
3083 return GitHubInstallation(
3084 id=record.id,
3085 workspace_id=record.workspace_id,
3086 installation_id=record.installation_id,
3087 account_login=record.account_login,
3088 account_type=record.account_type,
3089 repository_selection=record.repository_selection,
3090 metadata_json=record.metadata_json,
3091 created_at=record.created_at,
3092 updated_at=record.updated_at,
3093 )
3095 def _github_repository_model(
3096 self, record: GitHubRepositoryRecord
3097 ) -> GitHubRepository:
3098 return GitHubRepository(
3099 id=record.id,
3100 github_installation_id=record.github_installation_id,
3101 repository_id=record.repository_id,
3102 full_name=record.full_name,
3103 owner=record.owner,
3104 name=record.name,
3105 default_branch=record.default_branch,
3106 visibility=record.visibility,
3107 is_private=record.is_private,
3108 is_archived=record.is_archived,
3109 metadata_json=record.metadata_json,
3110 created_at=record.created_at,
3111 updated_at=record.updated_at,
3112 )
3114 def _project_settings_model(
3115 self, record: ProjectSettingsRecord | None
3116 ) -> ProjectSettings | None:
3117 if record is None:
3118 return None
3119 return ProjectSettings(
3120 id=record.id,
3121 project_id=record.project_id,
3122 docs_root=record.docs_root,
3123 config_version=record.config_version,
3124 config_json=record.config_json,
3125 ai_policy=record.ai_policy,
3126 publish_behavior=record.publish_behavior,
3127 pr_behavior=record.pr_behavior,
3128 updated_at=record.updated_at,
3129 )
3131 def _project_model(self, session: Session, record: ProjectRecord) -> Project:
3132 source = session.get(RepositorySourceRecord, f"source-{record.id}")
3133 if source is None:
3134 raise KeyError(f"Missing source for project {record.id}")
3135 definitions = session.scalars(
3136 select(ArtifactDefinitionRecord)
3137 .where(ArtifactDefinitionRecord.project_id == record.id)
3138 .order_by(ArtifactDefinitionRecord.slug.asc())
3139 ).all()
3140 return Project(
3141 id=record.id,
3142 workspace_id=record.workspace_id,
3143 github_installation_id=record.github_installation_id,
3144 name=record.name,
3145 slug=record.slug,
3146 description=record.description,
3147 public_url=record.public_url,
3148 dashboard_url=record.dashboard_url,
3149 onboarding_status=record.onboarding_status,
3150 source=self._source_model(source),
3151 artifact_types=[ArtifactType(item.artifact_type) for item in definitions],
3152 )
3154 def _trace_model(
3155 self,
3156 record: ArtifactTraceRecord,
3157 definition: ArtifactDefinitionRecord,
3158 ) -> ArtifactTrace:
3159 return ArtifactTrace(
3160 id=definition.artifact_key,
3161 slug=definition.slug,
3162 title=definition.title,
3163 artifact_type=ArtifactType(definition.artifact_type),
3164 summary=definition.summary,
3165 doc_paths=list(record.doc_paths),
3166 source_paths=list(record.source_paths),
3167 latest_source_revision=_revision_from_payload(
3168 record.latest_source_revision
3169 ),
3170 latest_doc_revision=_revision_from_payload(record.latest_doc_revision),
3171 verification_status=VerificationStatus(record.verification_status),
3172 )
3174 def _finding_model(self, record: DriftFindingRecord) -> DriftFinding:
3175 return DriftFinding(
3176 id=record.id,
3177 project_id=record.project_id,
3178 verification_run_id=record.verification_run_id,
3179 artifact_id=record.artifact_key,
3180 artifact_type=ArtifactType(record.artifact_type),
3181 severity=FindingSeverity(record.severity),
3182 summary=record.summary,
3183 rationale=record.rationale,
3184 source_paths=list(record.source_paths),
3185 doc_paths=list(record.doc_paths),
3186 suggested_actions=list(record.suggested_actions),
3187 source_revision=_revision_from_payload(record.source_revision),
3188 doc_revision=_revision_from_payload(record.doc_revision),
3189 status=record.status,
3190 created_at=record.created_at,
3191 updated_at=record.updated_at,
3192 changed_symbols=list(record.changed_symbols or []),
3193 )
3195 def _patch_model(self, record: Any) -> DocPatch:
3196 return DocPatch(
3197 id=record.id,
3198 project_id=record.project_id,
3199 finding_id=record.finding_id,
3200 artifact_id=record.artifact_key,
3201 target_path=record.target_path,
3202 summary=record.summary,
3203 rationale=record.rationale,
3204 proposed_sections=list(record.proposed_sections),
3205 citations=[
3206 PatchCitation(**citation) for citation in (record.citations or [])
3207 ],
3208 preview_markdown=record.preview_markdown,
3209 ai_provider=record.ai_provider,
3210 model_name=record.model_name,
3211 chain_steps_used=record.chain_steps_used,
3212 confidence_score=record.confidence_score,
3213 status=record.status,
3214 created_at=record.created_at,
3215 updated_at=record.updated_at,
3216 )
3218 def _pull_request_model(self, record: PullRequestRecord) -> PullRequestModel:
3219 return PullRequestModel(
3220 id=record.id,
3221 project_id=record.project_id,
3222 patch_id=record.patch_id,
3223 branch_name=record.branch_name,
3224 title=record.title,
3225 url=record.url,
3226 state=record.state,
3227 created_at=record.created_at,
3228 updated_at=record.updated_at,
3229 )
3231 def _open_pull_request_on_github(
3232 self,
3233 *,
3234 session: Session,
3235 project: ProjectRecord,
3236 source: RepositorySourceRecord,
3237 patch: DocPatchRecord,
3238 branch_name: str,
3239 title: str,
3240 ) -> GitHubPullRequestResult:
3241 installation_record = (
3242 session.get(GitHubInstallationRecord, project.github_installation_id)
3243 if project.github_installation_id
3244 else None
3245 )
3246 external_installation_id = (
3247 installation_record.installation_id
3248 if installation_record is not None
3249 else None
3250 )
3251 if (
3252 external_installation_id is None
3253 and project.id == settings.project_id
3254 and settings.self_bootstrap_installation_id
3255 ):
3256 external_installation_id = settings.self_bootstrap_installation_id
3257 if external_installation_id and settings.github_app_private_key:
3258 body = (
3259 "This PR was opened by Documint.\n\n"
3260 f"- Artifact: `{patch.artifact_key}`\n"
3261 f"- Target path: `{patch.target_path}`\n"
3262 f"- Patch id: `{patch.id}`\n"
3263 )
3264 return create_or_update_pull_request(
3265 installation_id=external_installation_id,
3266 owner=source.owner,
3267 repo=source.repo,
3268 base_branch=source.default_branch,
3269 branch_name=branch_name,
3270 target_path=patch.target_path,
3271 content_markdown=patch.preview_markdown,
3272 title=title,
3273 body=body,
3274 commit_message=f"docs: update {patch.artifact_key} via Documint",
3275 )
3276 return GitHubPullRequestResult(
3277 branch_name=branch_name,
3278 title=title,
3279 url=(
3280 f"https://github.com/{source.owner}/{source.repo}/compare/"
3281 f"{source.default_branch}...{branch_name}?expand=1"
3282 ),
3283 state="open",
3284 )
3286 def _publish_model(self, record: PublishDeploymentRecord) -> PublishDeployment:
3287 return PublishDeployment(
3288 id=record.id,
3289 project_id=record.project_id,
3290 status=JobStatus(record.status),
3291 commit_ref=record.commit_ref,
3292 site_url=record.site_url,
3293 preview_url=record.preview_url,
3294 docs_count=record.docs_count,
3295 generated_at=record.generated_at,
3296 )
3298 def _published_page_model(self, record: PublishedPageRecord) -> PublishedPage:
3299 return PublishedPage(
3300 id=record.id,
3301 project_id=record.project_id,
3302 deployment_id=record.deployment_id,
3303 workspace_slug=record.workspace_slug,
3304 project_slug=record.project_slug,
3305 path=record.path,
3306 title=record.title,
3307 description=record.description,
3308 content_markdown=record.content_markdown,
3309 search_body=record.search_body,
3310 source_path=record.source_path,
3311 created_at=record.created_at,
3312 updated_at=record.updated_at,
3313 )
3315 def _activity_model(self, record: ActivityEventRecord) -> ActivityEvent:
3316 return ActivityEvent(
3317 id=record.id,
3318 workspace_id=record.workspace_id,
3319 project_id=record.project_id,
3320 kind=record.kind,
3321 title=record.title,
3322 body=record.body,
3323 metadata_json=record.metadata_json,
3324 created_at=record.created_at,
3325 )
3327 def _job_model(self, record: BackgroundJobRecord) -> QueuedJob:
3328 result_json: dict[str, object] = record.result_json or {}
3329 return QueuedJob(
3330 job_id=record.id,
3331 job_kind=record.job_kind,
3332 project_id=record.project_id,
3333 workspace_id=record.workspace_id,
3334 status=JobStatus(record.status),
3335 attempt_count=record.attempt_count,
3336 error_summary=record.error_summary,
3337 resource_type=record.resource_type,
3338 resource_id=record.resource_id,
3339 result_summary=record.result_summary,
3340 site_url=str(result_json["site_url"]) if "site_url" in result_json else None,
3341 created_at=record.created_at,
3342 started_at=record.started_at,
3343 completed_at=record.completed_at,
3344 updated_at=record.updated_at,
3345 )
3347 def _api_token_model(
3348 self,
3349 record: ApiTokenRecord,
3350 raw_token: str | None = None,
3351 ) -> ApiTokenSummary:
3352 return ApiTokenSummary(
3353 id=record.id,
3354 workspace_id=record.workspace_id,
3355 user_id=record.user_id,
3356 label=record.label,
3357 token_prefix=record.token_prefix,
3358 scopes=list(record.scopes),
3359 created_at=record.created_at,
3360 last_used_at=record.last_used_at,
3361 revoked_at=record.revoked_at,
3362 token=raw_token,
3363 )
3366_service: DocumintService | None = None
3367_service_key: tuple[str, str] | None = None
3370def get_service() -> DocumintService:
3371 global _service, _service_key
3372 key = (str(settings.repo_root), settings.database_url)
3373 if _service is None or _service_key != key:
3374 _service = DocumintService(settings.repo_root)
3375 _service_key = key
3376 return _service
3379def reset_service() -> None:
3380 global _service, _service_key
3381 _service = None
3382 _service_key = None
3383 reset_db()