Coverage for privacyforms_pdf / cli.py: 100%

121 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-06 21:14 +0100

1"""Command-line interface for privacyforms-pdf.""" 

2 

3from __future__ import annotations 

4 

5import json 

6import sys 

7from pathlib import Path 

8 

9import click 

10 

11from .extractor import ( 

12 FormValidationError, 

13 PDFCPUError, 

14 PDFCPUExecutionError, 

15 PDFFormExtractor, 

16 PDFFormNotFoundError, 

17) 

18 

19 

20def create_extractor() -> PDFFormExtractor: 

21 """Create a PDFFormExtractor instance, handling errors gracefully. 

22 

23 Returns: 

24 Configured PDFFormExtractor instance. 

25 

26 Raises: 

27 click.ClickException: If pdfcpu is not found. 

28 """ 

29 try: 

30 return PDFFormExtractor() 

31 except PDFCPUError as e: 

32 raise click.ClickException(str(e)) from e 

33 

34 

35@click.group() 

36@click.version_option(version="0.1.1", prog_name="pdf-forms") 

37def main() -> None: 

38 """PDF Form extraction and manipulation tools using pdfcpu. 

39 

40 This CLI provides commands to extract, list, and fill PDF forms. 

41 Requires pdfcpu to be installed on your system. 

42 

43 Visit https://pdfcpu.io/install for installation instructions. 

44 """ 

45 pass 

46 

47 

48@main.command() 

49def check() -> None: 

50 """Check if pdfcpu is installed and working.""" 

51 try: 

52 extractor = PDFFormExtractor() 

53 version = extractor.get_pdfcpu_version() 

54 click.echo(f"✓ pdfcpu is installed: {version}") 

55 except PDFCPUError as e: 

56 click.echo(f"✗ pdfcpu not found: {e}", err=True) 

57 click.echo("Please install pdfcpu: https://pdfcpu.io/install", err=True) 

58 sys.exit(1) 

59 

60 

61@main.command() 

62@click.argument("pdf_path", type=click.Path(exists=True, path_type=Path)) 

63@click.option( 

64 "--output", 

65 "-o", 

66 type=click.Path(path_type=Path), 

67 help="Output JSON file path (optional, prints to stdout if not provided)", 

68) 

69def extract(pdf_path: Path, output: Path | None) -> None: 

70 """Extract form data from a PDF file. 

71 

72 PDF_PATH is the path to the PDF file to process. 

73 

74 Examples: 

75 pdf-forms extract form.pdf 

76 pdf-forms extract form.pdf -o data.json 

77 """ 

78 extractor = create_extractor() 

79 

80 try: 

81 if output: 

82 extractor.extract_to_json(pdf_path, output) 

83 click.echo(f"Form data extracted to: {output}") 

84 else: 

85 form_data = extractor.extract(pdf_path) 

86 # Output as formatted JSON 

87 json_output = json.dumps(form_data.raw_data, indent=2) 

88 click.echo(json_output) 

89 except PDFFormNotFoundError as e: 

90 raise click.ClickException(str(e)) from e 

91 except PDFCPUExecutionError as e: 

92 raise click.ClickException("Failed to extract form.") from e 

93 

94 

95@main.command() 

96@click.argument("pdf_path", type=click.Path(exists=True, path_type=Path)) 

97def list_fields(pdf_path: Path) -> None: 

98 """List all form fields in a PDF file. 

99 

100 PDF_PATH is the path to the PDF file to process. 

101 

102 Example: 

103 pdf-forms list-fields form.pdf 

104 """ 

105 extractor = create_extractor() 

106 

107 try: 

108 fields = extractor.list_fields(pdf_path) 

109 

110 if not fields: 

111 click.echo("No form fields found.") 

112 return 

113 

114 # Calculate column widths for alignment 

115 type_width = max(len(f.field_type) for f in fields) + 2 

116 name_width = max(len(f.name) for f in fields) + 2 

117 

118 # Print header 

119 header = f"{'Type':<{type_width}} {'Name':<{name_width}} Value" 

120 click.echo(header) 

121 click.echo("=" * len(header) * 2) 

122 

123 # Print fields 

124 for field in fields: 

125 value_str = str(field.value) 

126 if len(value_str) > 50: 

127 value_str = value_str[:47] + "..." 

128 click.echo(f"{field.field_type:<{type_width}} {field.name:<{name_width}} {value_str}") 

129 

130 click.echo(f"\nTotal fields: {len(fields)}") 

131 

132 except PDFFormNotFoundError as e: 

133 raise click.ClickException(str(e)) from e 

134 except PDFCPUExecutionError as e: 

135 raise click.ClickException("Failed to list fields.") from e 

136 

137 

138@main.command() 

139@click.argument("pdf_path", type=click.Path(exists=True, path_type=Path)) 

140@click.argument("field_name") 

141def get_value(pdf_path: Path, field_name: str) -> None: 

142 """Get the value of a specific form field. 

143 

144 PDF_PATH is the path to the PDF file to process. 

145 FIELD_NAME is the name of the field to retrieve. 

146 

147 Example: 

148 pdf-forms get-value form.pdf "Candidate Name" 

149 """ 

150 extractor = create_extractor() 

151 

152 try: 

153 value = extractor.get_field_value(pdf_path, field_name) 

154 

155 if value is None: 

156 raise click.ClickException(f"Field '{field_name}' not found") 

157 

158 click.echo(value) 

159 

160 except PDFFormNotFoundError as e: 

161 raise click.ClickException(str(e)) from e 

162 except PDFCPUExecutionError as e: 

163 raise click.ClickException("Failed to get field value.") from e 

164 

165 

166@main.command() 

167@click.argument("pdf_path", type=click.Path(exists=True, path_type=Path)) 

168def info(pdf_path: Path) -> None: 

169 """Check if a PDF contains a form. 

170 

171 PDF_PATH is the path to the PDF file to process. 

172 

173 Example: 

174 pdf-forms info form.pdf 

175 """ 

176 extractor = create_extractor() 

177 

178 try: 

179 has_form = extractor.has_form(pdf_path) 

180 if has_form: 

181 click.echo(f"{pdf_path} contains a form") 

182 else: 

183 click.echo(f"{pdf_path} does not contain a form") 

184 except PDFCPUExecutionError as e: 

185 raise click.ClickException("Failed to get form info.") from e 

186 

187 

188@main.command("fill-form") 

189@click.argument("pdf_path", type=click.Path(exists=True, path_type=Path)) 

190@click.argument("json_path", type=click.Path(exists=True, path_type=Path)) 

191@click.option( 

192 "--output", 

193 "-o", 

194 type=click.Path(path_type=Path), 

195 help="Output PDF file path (modifies input if not provided)", 

196) 

197@click.option( 

198 "--validate/--no-validate", 

199 default=True, 

200 help="Validate JSON data against form fields before filling (default: validate)", 

201) 

202@click.option( 

203 "--strict/--no-strict", 

204 default=False, 

205 help="Require all form fields to be provided (default: not strict)", 

206) 

207def fill_form( 

208 pdf_path: Path, json_path: Path, output: Path | None, validate: bool, strict: bool 

209) -> None: 

210 """Fill a PDF form with data from a JSON file. 

211 

212 PDF_PATH is the path to the PDF form file. 

213 JSON_PATH is the path to the JSON file with form data. 

214 

215 The JSON file must contain simple key:value pairs where keys are field names 

216 and values are the values to fill: 

217 

218 {"Candidate Name": "John Smith", "Full time": true} 

219 

220 Examples: 

221 pdf-forms fill-form form.pdf data.json -o filled.pdf 

222 pdf-forms fill-form form.pdf data.json -o filled.pdf --strict 

223 pdf-forms fill-form form.pdf data.json -o filled.pdf --no-validate 

224 """ 

225 extractor = create_extractor() 

226 

227 try: 

228 # Read and parse JSON 

229 with open(json_path, encoding="utf-8") as f: 

230 form_data = json.load(f) 

231 

232 # Validate if requested 

233 if validate: 

234 errors = extractor.validate_form_data( 

235 pdf_path, form_data, strict=strict, allow_extra_fields=False 

236 ) 

237 if errors: 

238 click.echo("Validation errors:", err=True) 

239 for error in errors: 

240 click.echo(f" - {error}", err=True) 

241 raise click.ClickException("Form validation failed") 

242 

243 click.echo("✓ Form data validation passed") 

244 

245 # Fill the form 

246 extractor.fill_form(pdf_path, form_data, output, validate=False) 

247 

248 if output: 

249 click.echo(f"✓ Form filled and saved to: {output}") 

250 else: 

251 click.echo(f"✓ Form filled: {pdf_path}") 

252 

253 except PDFFormNotFoundError as e: 

254 raise click.ClickException(str(e)) from e 

255 except FormValidationError as e: 

256 raise click.ClickException(str(e)) from e 

257 except PDFCPUExecutionError as e: 

258 raise click.ClickException("Failed to fill form.") from e 

259 except json.JSONDecodeError as e: 

260 raise click.ClickException(f"Invalid JSON file: {e}") from e 

261 

262 

263if __name__ == "__main__": # pragma: no cover 

264 main()