Coverage for privacyforms_pdf / cli.py: 100%
121 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-06 21:04 +0100
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-06 21:04 +0100
1"""Command-line interface for privacyforms-pdf."""
3from __future__ import annotations
5import json
6import sys
7from pathlib import Path
9import click
11from .extractor import (
12 FormValidationError,
13 PDFCPUError,
14 PDFCPUExecutionError,
15 PDFFormExtractor,
16 PDFFormNotFoundError,
17)
20def create_extractor() -> PDFFormExtractor:
21 """Create a PDFFormExtractor instance, handling errors gracefully.
23 Returns:
24 Configured PDFFormExtractor instance.
26 Raises:
27 click.ClickException: If pdfcpu is not found.
28 """
29 try:
30 return PDFFormExtractor()
31 except PDFCPUError as e:
32 raise click.ClickException(str(e)) from e
35@click.group()
36@click.version_option(version="0.1.0", prog_name="pdf-forms")
37def main() -> None:
38 """PDF Form extraction and manipulation tools using pdfcpu.
40 This CLI provides commands to extract, list, and fill PDF forms.
41 Requires pdfcpu to be installed on your system.
43 Visit https://pdfcpu.io/install for installation instructions.
44 """
45 pass
48@main.command()
49def check() -> None:
50 """Check if pdfcpu is installed and working."""
51 try:
52 extractor = PDFFormExtractor()
53 version = extractor.get_pdfcpu_version()
54 click.echo(f"✓ pdfcpu is installed: {version}")
55 except PDFCPUError as e:
56 click.echo(f"✗ pdfcpu not found: {e}", err=True)
57 click.echo("Please install pdfcpu: https://pdfcpu.io/install", err=True)
58 sys.exit(1)
61@main.command()
62@click.argument("pdf_path", type=click.Path(exists=True, path_type=Path))
63@click.option(
64 "--output",
65 "-o",
66 type=click.Path(path_type=Path),
67 help="Output JSON file path (optional, prints to stdout if not provided)",
68)
69def extract(pdf_path: Path, output: Path | None) -> None:
70 """Extract form data from a PDF file.
72 PDF_PATH is the path to the PDF file to process.
74 Examples:
75 pdf-forms extract form.pdf
76 pdf-forms extract form.pdf -o data.json
77 """
78 extractor = create_extractor()
80 try:
81 if output:
82 extractor.extract_to_json(pdf_path, output)
83 click.echo(f"Form data extracted to: {output}")
84 else:
85 form_data = extractor.extract(pdf_path)
86 # Output as formatted JSON
87 json_output = json.dumps(form_data.raw_data, indent=2)
88 click.echo(json_output)
89 except PDFFormNotFoundError as e:
90 raise click.ClickException(str(e)) from e
91 except PDFCPUExecutionError as e:
92 raise click.ClickException("Failed to extract form.") from e
95@main.command()
96@click.argument("pdf_path", type=click.Path(exists=True, path_type=Path))
97def list_fields(pdf_path: Path) -> None:
98 """List all form fields in a PDF file.
100 PDF_PATH is the path to the PDF file to process.
102 Example:
103 pdf-forms list-fields form.pdf
104 """
105 extractor = create_extractor()
107 try:
108 fields = extractor.list_fields(pdf_path)
110 if not fields:
111 click.echo("No form fields found.")
112 return
114 # Calculate column widths for alignment
115 type_width = max(len(f.field_type) for f in fields) + 2
116 name_width = max(len(f.name) for f in fields) + 2
118 # Print header
119 header = f"{'Type':<{type_width}} {'Name':<{name_width}} Value"
120 click.echo(header)
121 click.echo("=" * len(header) * 2)
123 # Print fields
124 for field in fields:
125 value_str = str(field.value)
126 if len(value_str) > 50:
127 value_str = value_str[:47] + "..."
128 click.echo(f"{field.field_type:<{type_width}} {field.name:<{name_width}} {value_str}")
130 click.echo(f"\nTotal fields: {len(fields)}")
132 except PDFFormNotFoundError as e:
133 raise click.ClickException(str(e)) from e
134 except PDFCPUExecutionError as e:
135 raise click.ClickException("Failed to list fields.") from e
138@main.command()
139@click.argument("pdf_path", type=click.Path(exists=True, path_type=Path))
140@click.argument("field_name")
141def get_value(pdf_path: Path, field_name: str) -> None:
142 """Get the value of a specific form field.
144 PDF_PATH is the path to the PDF file to process.
145 FIELD_NAME is the name of the field to retrieve.
147 Example:
148 pdf-forms get-value form.pdf "Candidate Name"
149 """
150 extractor = create_extractor()
152 try:
153 value = extractor.get_field_value(pdf_path, field_name)
155 if value is None:
156 raise click.ClickException(f"Field '{field_name}' not found")
158 click.echo(value)
160 except PDFFormNotFoundError as e:
161 raise click.ClickException(str(e)) from e
162 except PDFCPUExecutionError as e:
163 raise click.ClickException("Failed to get field value.") from e
166@main.command()
167@click.argument("pdf_path", type=click.Path(exists=True, path_type=Path))
168def info(pdf_path: Path) -> None:
169 """Check if a PDF contains a form.
171 PDF_PATH is the path to the PDF file to process.
173 Example:
174 pdf-forms info form.pdf
175 """
176 extractor = create_extractor()
178 try:
179 has_form = extractor.has_form(pdf_path)
180 if has_form:
181 click.echo(f"✓ {pdf_path} contains a form")
182 else:
183 click.echo(f"✗ {pdf_path} does not contain a form")
184 except PDFCPUExecutionError as e:
185 raise click.ClickException("Failed to get form info.") from e
188@main.command("fill-form")
189@click.argument("pdf_path", type=click.Path(exists=True, path_type=Path))
190@click.argument("json_path", type=click.Path(exists=True, path_type=Path))
191@click.option(
192 "--output",
193 "-o",
194 type=click.Path(path_type=Path),
195 help="Output PDF file path (modifies input if not provided)",
196)
197@click.option(
198 "--validate/--no-validate",
199 default=True,
200 help="Validate JSON data against form fields before filling (default: validate)",
201)
202@click.option(
203 "--strict/--no-strict",
204 default=False,
205 help="Require all form fields to be provided (default: not strict)",
206)
207def fill_form(
208 pdf_path: Path, json_path: Path, output: Path | None, validate: bool, strict: bool
209) -> None:
210 """Fill a PDF form with data from a JSON file.
212 PDF_PATH is the path to the PDF form file.
213 JSON_PATH is the path to the JSON file with form data.
215 The JSON file must contain simple key:value pairs where keys are field names
216 and values are the values to fill:
218 {"Candidate Name": "John Smith", "Full time": true}
220 Examples:
221 pdf-forms fill-form form.pdf data.json -o filled.pdf
222 pdf-forms fill-form form.pdf data.json -o filled.pdf --strict
223 pdf-forms fill-form form.pdf data.json -o filled.pdf --no-validate
224 """
225 extractor = create_extractor()
227 try:
228 # Read and parse JSON
229 with open(json_path, encoding="utf-8") as f:
230 form_data = json.load(f)
232 # Validate if requested
233 if validate:
234 errors = extractor.validate_form_data(
235 pdf_path, form_data, strict=strict, allow_extra_fields=False
236 )
237 if errors:
238 click.echo("Validation errors:", err=True)
239 for error in errors:
240 click.echo(f" - {error}", err=True)
241 raise click.ClickException("Form validation failed")
243 click.echo("✓ Form data validation passed")
245 # Fill the form
246 extractor.fill_form(pdf_path, form_data, output, validate=False)
248 if output:
249 click.echo(f"✓ Form filled and saved to: {output}")
250 else:
251 click.echo(f"✓ Form filled: {pdf_path}")
253 except PDFFormNotFoundError as e:
254 raise click.ClickException(str(e)) from e
255 except FormValidationError as e:
256 raise click.ClickException(str(e)) from e
257 except PDFCPUExecutionError as e:
258 raise click.ClickException("Failed to fill form.") from e
259 except json.JSONDecodeError as e:
260 raise click.ClickException(f"Invalid JSON file: {e}") from e
263if __name__ == "__main__": # pragma: no cover
264 main()