Coverage for privacyforms_pdf / cli.py: 100%

141 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-07 14:38 +0100

1"""Command-line interface for privacyforms-pdf.""" 

2 

3from __future__ import annotations 

4 

5import json 

6from importlib.metadata import version as get_version 

7from pathlib import Path 

8 

9import click 

10 

11from .extractor import ( 

12 FormValidationError, 

13 PDFFormError, 

14 PDFFormExtractor, 

15 PDFFormNotFoundError, 

16) 

17 

18 

19def create_extractor(extract_geometry: bool = True) -> PDFFormExtractor: 

20 """Create a PDFFormExtractor instance, handling errors gracefully. 

21 

22 Args: 

23 extract_geometry: Whether to extract field geometry. 

24 

25 Returns: 

26 Configured PDFFormExtractor instance. 

27 """ 

28 # pypdf is always available, no external dependencies to check 

29 return PDFFormExtractor(extract_geometry=extract_geometry) 

30 

31 

32@click.group() 

33@click.version_option(version=get_version("privacyforms-pdf"), prog_name="pdf-forms") 

34@click.pass_context 

35def main(ctx: click.Context) -> None: 

36 """PDF Form extraction and manipulation tools using pypdf. 

37 

38 This CLI provides commands to extract, list, and fill PDF forms. 

39 Uses pypdf library for all operations. 

40 """ 

41 # Store context for subcommands 

42 ctx.ensure_object(dict) 

43 

44 

45@main.command() 

46@click.pass_context 

47def check(ctx: click.Context) -> None: # noqa: ARG001 

48 """Check if the CLI is properly installed.""" 

49 click.echo("✓ pdf-forms CLI is ready") 

50 click.echo("✓ Using pypdf for PDF form operations") 

51 

52 

53@main.command() 

54@click.argument("pdf_path", type=click.Path(exists=True, path_type=Path)) 

55@click.option( 

56 "--output", 

57 "-o", 

58 type=click.Path(path_type=Path), 

59 help="Output JSON file path (optional, prints to stdout if not provided)", 

60) 

61@click.option( 

62 "--raw/--unified", 

63 default=False, 

64 help="Output raw pypdf data (default: unified PDFField format)", 

65) 

66@click.pass_context 

67def extract(ctx: click.Context, pdf_path: Path, output: Path | None, raw: bool) -> None: 

68 """Extract form data from a PDF file. 

69 

70 PDF_PATH is the path to the PDF file to process. 

71 

72 Examples: 

73 pdf-forms extract form.pdf 

74 pdf-forms extract form.pdf -o data.json 

75 pdf-forms extract form.pdf --raw -o raw.json 

76 """ 

77 extractor = create_extractor() 

78 

79 try: 

80 if raw and output: 

81 # Raw mode: write raw data 

82 form_data = extractor.extract(pdf_path) 

83 with open(output, "w", encoding="utf-8") as f: 

84 json.dump(form_data.raw_data, f, indent=2) 

85 click.echo(f"Raw form data extracted to: {output}") 

86 elif raw: 

87 # Raw mode to stdout 

88 form_data = extractor.extract(pdf_path) 

89 json_output = json.dumps(form_data.raw_data, indent=2) 

90 click.echo(json_output) 

91 elif output: 

92 # Unified mode with file output 

93 form_data = extractor.extract(pdf_path) 

94 with open(output, "w", encoding="utf-8") as f: 

95 json.dump(form_data.to_dict(), f, indent=2) 

96 click.echo(f"Unified form data extracted to: {output}") 

97 else: 

98 # Unified mode to stdout 

99 form_data = extractor.extract(pdf_path) 

100 click.echo(form_data.to_json()) 

101 except PDFFormNotFoundError as e: 

102 raise click.ClickException(str(e)) from e 

103 except PDFFormError as e: 

104 raise click.ClickException("Failed to extract form.") from e 

105 

106 

107@main.command() 

108@click.argument("pdf_path", type=click.Path(exists=True, path_type=Path)) 

109@click.option( 

110 "--geometry/--no-geometry", 

111 default=True, 

112 help="Show geometry information (default: true)", 

113) 

114@click.pass_context 

115def list_fields(ctx: click.Context, pdf_path: Path, geometry: bool) -> None: # noqa: ARG001 

116 """List all form fields in a PDF file. 

117 

118 PDF_PATH is the path to the PDF file to process. 

119 

120 Example: 

121 pdf-forms list-fields form.pdf 

122 pdf-forms list-fields form.pdf --no-geometry 

123 """ 

124 extractor = create_extractor(extract_geometry=geometry) 

125 

126 try: 

127 fields = extractor.list_fields(pdf_path) 

128 

129 if not fields: 

130 click.echo("No form fields found.") 

131 return 

132 

133 # Calculate column widths for alignment 

134 type_width = max(len(f.field_type) for f in fields) + 2 

135 name_width = max(len(f.name) for f in fields) + 2 

136 

137 # Print header 

138 header = f"{'Type':<{type_width}} {'Name':<{name_width}} Value" 

139 if geometry: 

140 header += " Page Position (x, y) Size (w×h)" 

141 click.echo(header) 

142 click.echo("=" * len(header) * 2) 

143 

144 # Print fields 

145 for field in fields: 

146 value_str = str(field.value) 

147 if len(value_str) > 50: 

148 value_str = value_str[:47] + "..." 

149 line = f"{field.field_type:<{type_width}} {field.name:<{name_width}} {value_str}" 

150 

151 if geometry and field.geometry: 

152 geom = field.geometry 

153 pos = f"({geom.x:.1f}, {geom.y:.1f})" 

154 size = f"{geom.width:.1f}×{geom.height:.1f}" 

155 line += f" {geom.page:>3} {pos:<18} {size}" 

156 elif geometry: 

157 line += " N/A" 

158 

159 click.echo(line) 

160 

161 click.echo(f"\nTotal fields: {len(fields)}") 

162 

163 except PDFFormNotFoundError as e: 

164 raise click.ClickException(str(e)) from e 

165 except PDFFormError as e: 

166 raise click.ClickException("Failed to list fields.") from e 

167 

168 

169@main.command() 

170@click.argument("pdf_path", type=click.Path(exists=True, path_type=Path)) 

171@click.argument("field_name") 

172@click.pass_context 

173def get_value(ctx: click.Context, pdf_path: Path, field_name: str) -> None: # noqa: ARG001 

174 """Get the value of a specific form field. 

175 

176 PDF_PATH is the path to the PDF file to process. 

177 FIELD_NAME is the name of the field to retrieve. 

178 

179 Example: 

180 pdf-forms get-value form.pdf "Candidate Name" 

181 """ 

182 extractor = create_extractor() 

183 

184 try: 

185 value = extractor.get_field_value(pdf_path, field_name) 

186 

187 if value is None: 

188 raise click.ClickException(f"Field '{field_name}' not found") 

189 

190 click.echo(value) 

191 

192 except PDFFormNotFoundError as e: 

193 raise click.ClickException(str(e)) from e 

194 except PDFFormError as e: 

195 raise click.ClickException("Failed to get field value.") from e 

196 

197 

198@main.command() 

199@click.argument("pdf_path", type=click.Path(exists=True, path_type=Path)) 

200@click.pass_context 

201def info(ctx: click.Context, pdf_path: Path) -> None: # noqa: ARG001 

202 """Check if a PDF contains a form. 

203 

204 PDF_PATH is the path to the PDF file to process. 

205 

206 Example: 

207 pdf-forms info form.pdf 

208 """ 

209 extractor = create_extractor() 

210 

211 try: 

212 has_form = extractor.has_form(pdf_path) 

213 if has_form: 

214 click.echo(f"{pdf_path} contains a form") 

215 else: 

216 click.echo(f"{pdf_path} does not contain a form") 

217 except PDFFormError as e: 

218 raise click.ClickException("Failed to get form info.") from e 

219 

220 

221@main.command("fill-form") 

222@click.argument("pdf_path", type=click.Path(exists=True, path_type=Path)) 

223@click.argument("json_path", type=click.Path(exists=True, path_type=Path)) 

224@click.option( 

225 "--output", 

226 "-o", 

227 type=click.Path(path_type=Path), 

228 help="Output PDF file path (modifies input if not provided)", 

229) 

230@click.option( 

231 "--validate/--no-validate", 

232 default=True, 

233 help="Validate JSON data against form fields before filling (default: validate)", 

234) 

235@click.option( 

236 "--strict/--no-strict", 

237 default=False, 

238 help="Require all form fields to be provided (default: not strict)", 

239) 

240@click.pass_context 

241def fill_form( 

242 ctx: click.Context, # noqa: ARG001 

243 pdf_path: Path, 

244 json_path: Path, 

245 output: Path | None, 

246 validate: bool, 

247 strict: bool, 

248) -> None: 

249 """Fill a PDF form with data from a JSON file. 

250 

251 PDF_PATH is the path to the PDF form file. 

252 JSON_PATH is the path to the JSON file with form data. 

253 

254 The JSON file must contain simple key:value pairs where keys are field names 

255 and values are the values to fill: 

256 

257 {"Candidate Name": "John Smith", "Full time": true} 

258 

259 Examples: 

260 pdf-forms fill-form form.pdf data.json -o filled.pdf 

261 pdf-forms fill-form form.pdf data.json -o filled.pdf --strict 

262 pdf-forms fill-form form.pdf data.json -o filled.pdf --no-validate 

263 """ 

264 extractor = create_extractor() 

265 

266 try: 

267 # Read and parse JSON 

268 with open(json_path, encoding="utf-8") as f: 

269 form_data = json.load(f) 

270 

271 # Validate if requested 

272 if validate: 

273 errors = extractor.validate_form_data( 

274 pdf_path, form_data, strict=strict, allow_extra_fields=False 

275 ) 

276 if errors: 

277 click.echo("Validation errors:", err=True) 

278 for error in errors: 

279 click.echo(f" - {error}", err=True) 

280 raise click.ClickException("Form validation failed") 

281 

282 click.echo("✓ Form data validation passed") 

283 

284 # Fill the form 

285 extractor.fill_form(pdf_path, form_data, output, validate=False) 

286 

287 if output: 

288 click.echo(f"✓ Form filled and saved to: {output}") 

289 else: 

290 click.echo(f"✓ Form filled: {pdf_path}") 

291 

292 except PDFFormNotFoundError as e: 

293 raise click.ClickException(str(e)) from e 

294 except FormValidationError as e: 

295 raise click.ClickException(str(e)) from e 

296 except PDFFormError as e: 

297 raise click.ClickException("Failed to fill form.") from e 

298 except json.JSONDecodeError as e: 

299 raise click.ClickException(f"Invalid JSON file: {e}") from e 

300 

301 

302if __name__ == "__main__": # pragma: no cover 

303 main()