Coverage for privacyforms_pdf / cli.py: 100%
141 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-07 14:38 +0100
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-07 14:38 +0100
1"""Command-line interface for privacyforms-pdf."""
3from __future__ import annotations
5import json
6from importlib.metadata import version as get_version
7from pathlib import Path
9import click
11from .extractor import (
12 FormValidationError,
13 PDFFormError,
14 PDFFormExtractor,
15 PDFFormNotFoundError,
16)
19def create_extractor(extract_geometry: bool = True) -> PDFFormExtractor:
20 """Create a PDFFormExtractor instance, handling errors gracefully.
22 Args:
23 extract_geometry: Whether to extract field geometry.
25 Returns:
26 Configured PDFFormExtractor instance.
27 """
28 # pypdf is always available, no external dependencies to check
29 return PDFFormExtractor(extract_geometry=extract_geometry)
32@click.group()
33@click.version_option(version=get_version("privacyforms-pdf"), prog_name="pdf-forms")
34@click.pass_context
35def main(ctx: click.Context) -> None:
36 """PDF Form extraction and manipulation tools using pypdf.
38 This CLI provides commands to extract, list, and fill PDF forms.
39 Uses pypdf library for all operations.
40 """
41 # Store context for subcommands
42 ctx.ensure_object(dict)
45@main.command()
46@click.pass_context
47def check(ctx: click.Context) -> None: # noqa: ARG001
48 """Check if the CLI is properly installed."""
49 click.echo("✓ pdf-forms CLI is ready")
50 click.echo("✓ Using pypdf for PDF form operations")
53@main.command()
54@click.argument("pdf_path", type=click.Path(exists=True, path_type=Path))
55@click.option(
56 "--output",
57 "-o",
58 type=click.Path(path_type=Path),
59 help="Output JSON file path (optional, prints to stdout if not provided)",
60)
61@click.option(
62 "--raw/--unified",
63 default=False,
64 help="Output raw pypdf data (default: unified PDFField format)",
65)
66@click.pass_context
67def extract(ctx: click.Context, pdf_path: Path, output: Path | None, raw: bool) -> None:
68 """Extract form data from a PDF file.
70 PDF_PATH is the path to the PDF file to process.
72 Examples:
73 pdf-forms extract form.pdf
74 pdf-forms extract form.pdf -o data.json
75 pdf-forms extract form.pdf --raw -o raw.json
76 """
77 extractor = create_extractor()
79 try:
80 if raw and output:
81 # Raw mode: write raw data
82 form_data = extractor.extract(pdf_path)
83 with open(output, "w", encoding="utf-8") as f:
84 json.dump(form_data.raw_data, f, indent=2)
85 click.echo(f"Raw form data extracted to: {output}")
86 elif raw:
87 # Raw mode to stdout
88 form_data = extractor.extract(pdf_path)
89 json_output = json.dumps(form_data.raw_data, indent=2)
90 click.echo(json_output)
91 elif output:
92 # Unified mode with file output
93 form_data = extractor.extract(pdf_path)
94 with open(output, "w", encoding="utf-8") as f:
95 json.dump(form_data.to_dict(), f, indent=2)
96 click.echo(f"Unified form data extracted to: {output}")
97 else:
98 # Unified mode to stdout
99 form_data = extractor.extract(pdf_path)
100 click.echo(form_data.to_json())
101 except PDFFormNotFoundError as e:
102 raise click.ClickException(str(e)) from e
103 except PDFFormError as e:
104 raise click.ClickException("Failed to extract form.") from e
107@main.command()
108@click.argument("pdf_path", type=click.Path(exists=True, path_type=Path))
109@click.option(
110 "--geometry/--no-geometry",
111 default=True,
112 help="Show geometry information (default: true)",
113)
114@click.pass_context
115def list_fields(ctx: click.Context, pdf_path: Path, geometry: bool) -> None: # noqa: ARG001
116 """List all form fields in a PDF file.
118 PDF_PATH is the path to the PDF file to process.
120 Example:
121 pdf-forms list-fields form.pdf
122 pdf-forms list-fields form.pdf --no-geometry
123 """
124 extractor = create_extractor(extract_geometry=geometry)
126 try:
127 fields = extractor.list_fields(pdf_path)
129 if not fields:
130 click.echo("No form fields found.")
131 return
133 # Calculate column widths for alignment
134 type_width = max(len(f.field_type) for f in fields) + 2
135 name_width = max(len(f.name) for f in fields) + 2
137 # Print header
138 header = f"{'Type':<{type_width}} {'Name':<{name_width}} Value"
139 if geometry:
140 header += " Page Position (x, y) Size (w×h)"
141 click.echo(header)
142 click.echo("=" * len(header) * 2)
144 # Print fields
145 for field in fields:
146 value_str = str(field.value)
147 if len(value_str) > 50:
148 value_str = value_str[:47] + "..."
149 line = f"{field.field_type:<{type_width}} {field.name:<{name_width}} {value_str}"
151 if geometry and field.geometry:
152 geom = field.geometry
153 pos = f"({geom.x:.1f}, {geom.y:.1f})"
154 size = f"{geom.width:.1f}×{geom.height:.1f}"
155 line += f" {geom.page:>3} {pos:<18} {size}"
156 elif geometry:
157 line += " N/A"
159 click.echo(line)
161 click.echo(f"\nTotal fields: {len(fields)}")
163 except PDFFormNotFoundError as e:
164 raise click.ClickException(str(e)) from e
165 except PDFFormError as e:
166 raise click.ClickException("Failed to list fields.") from e
169@main.command()
170@click.argument("pdf_path", type=click.Path(exists=True, path_type=Path))
171@click.argument("field_name")
172@click.pass_context
173def get_value(ctx: click.Context, pdf_path: Path, field_name: str) -> None: # noqa: ARG001
174 """Get the value of a specific form field.
176 PDF_PATH is the path to the PDF file to process.
177 FIELD_NAME is the name of the field to retrieve.
179 Example:
180 pdf-forms get-value form.pdf "Candidate Name"
181 """
182 extractor = create_extractor()
184 try:
185 value = extractor.get_field_value(pdf_path, field_name)
187 if value is None:
188 raise click.ClickException(f"Field '{field_name}' not found")
190 click.echo(value)
192 except PDFFormNotFoundError as e:
193 raise click.ClickException(str(e)) from e
194 except PDFFormError as e:
195 raise click.ClickException("Failed to get field value.") from e
198@main.command()
199@click.argument("pdf_path", type=click.Path(exists=True, path_type=Path))
200@click.pass_context
201def info(ctx: click.Context, pdf_path: Path) -> None: # noqa: ARG001
202 """Check if a PDF contains a form.
204 PDF_PATH is the path to the PDF file to process.
206 Example:
207 pdf-forms info form.pdf
208 """
209 extractor = create_extractor()
211 try:
212 has_form = extractor.has_form(pdf_path)
213 if has_form:
214 click.echo(f"✓ {pdf_path} contains a form")
215 else:
216 click.echo(f"✗ {pdf_path} does not contain a form")
217 except PDFFormError as e:
218 raise click.ClickException("Failed to get form info.") from e
221@main.command("fill-form")
222@click.argument("pdf_path", type=click.Path(exists=True, path_type=Path))
223@click.argument("json_path", type=click.Path(exists=True, path_type=Path))
224@click.option(
225 "--output",
226 "-o",
227 type=click.Path(path_type=Path),
228 help="Output PDF file path (modifies input if not provided)",
229)
230@click.option(
231 "--validate/--no-validate",
232 default=True,
233 help="Validate JSON data against form fields before filling (default: validate)",
234)
235@click.option(
236 "--strict/--no-strict",
237 default=False,
238 help="Require all form fields to be provided (default: not strict)",
239)
240@click.pass_context
241def fill_form(
242 ctx: click.Context, # noqa: ARG001
243 pdf_path: Path,
244 json_path: Path,
245 output: Path | None,
246 validate: bool,
247 strict: bool,
248) -> None:
249 """Fill a PDF form with data from a JSON file.
251 PDF_PATH is the path to the PDF form file.
252 JSON_PATH is the path to the JSON file with form data.
254 The JSON file must contain simple key:value pairs where keys are field names
255 and values are the values to fill:
257 {"Candidate Name": "John Smith", "Full time": true}
259 Examples:
260 pdf-forms fill-form form.pdf data.json -o filled.pdf
261 pdf-forms fill-form form.pdf data.json -o filled.pdf --strict
262 pdf-forms fill-form form.pdf data.json -o filled.pdf --no-validate
263 """
264 extractor = create_extractor()
266 try:
267 # Read and parse JSON
268 with open(json_path, encoding="utf-8") as f:
269 form_data = json.load(f)
271 # Validate if requested
272 if validate:
273 errors = extractor.validate_form_data(
274 pdf_path, form_data, strict=strict, allow_extra_fields=False
275 )
276 if errors:
277 click.echo("Validation errors:", err=True)
278 for error in errors:
279 click.echo(f" - {error}", err=True)
280 raise click.ClickException("Form validation failed")
282 click.echo("✓ Form data validation passed")
284 # Fill the form
285 extractor.fill_form(pdf_path, form_data, output, validate=False)
287 if output:
288 click.echo(f"✓ Form filled and saved to: {output}")
289 else:
290 click.echo(f"✓ Form filled: {pdf_path}")
292 except PDFFormNotFoundError as e:
293 raise click.ClickException(str(e)) from e
294 except FormValidationError as e:
295 raise click.ClickException(str(e)) from e
296 except PDFFormError as e:
297 raise click.ClickException("Failed to fill form.") from e
298 except json.JSONDecodeError as e:
299 raise click.ClickException(f"Invalid JSON file: {e}") from e
302if __name__ == "__main__": # pragma: no cover
303 main()