Coverage for src \ sec_report_kit \ parsers \ __init__.py: 100%
32 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-13 08:06 +0530
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-13 08:06 +0530
1"""Input format parsers."""
4def detect_source_type(data: dict | list) -> str:
5 """Detect whether *data* came from one of the supported scanners.
7 Returns one of:
8 ``"trivy"``, ``"pip-audit"``, ``"bandit"``, ``"gitleaks"``,
9 ``"semgrep"``, ``"codeql"``, ``"osv-scanner"``, ``"checkov"``,
10 ``"tfsec"``, or ``"trufflehog"``.
11 Raises ``ValueError`` if the format cannot be recognised.
12 """
13 if isinstance(data, list):
14 # Gitleaks commonly emits a top-level JSON array of findings.
15 if all(isinstance(item, dict) for item in data):
16 sample = data[0] if data else {}
17 if isinstance(sample, dict) and (
18 "RuleID" in sample
19 or "Description" in sample
20 or "File" in sample
21 or "StartLine" in sample
22 or "Fingerprint" in sample
23 ):
24 return "gitleaks"
25 if isinstance(sample, dict) and (
26 "DetectorName" in sample
27 or "DetectorType" in sample
28 or "SourceName" in sample
29 ):
30 return "trufflehog"
32 if isinstance(data, dict):
33 if "Results" in data:
34 return "trivy"
35 if "runs" in data and isinstance(data.get("runs"), list):
36 return "codeql"
37 if "dependencies" in data or "vulnerabilities" in data:
38 return "pip-audit"
39 if "results" in data and isinstance(data.get("results"), dict):
40 return "checkov"
41 if "results" in data and isinstance(data.get("results"), list):
42 sample = data["results"][0] if data["results"] else {}
43 if isinstance(sample, dict) and "packages" in sample:
44 return "osv-scanner"
45 if isinstance(sample, dict) and (
46 "DetectorName" in sample
47 or "DetectorType" in sample
48 or "SourceName" in sample
49 ):
50 return "trufflehog"
51 if "results" in data and isinstance(data.get("results"), list):
52 if "errors" in data or "paths" in data or "version" in data:
53 return "semgrep"
54 if all(isinstance(item, dict) and "rule_id" in item for item in data.get("results", [])):
55 return "tfsec"
56 return "bandit"
57 if "findings" in data and isinstance(data.get("findings"), list):
58 return "gitleaks"
59 raise ValueError(
60 "Cannot detect source type: JSON does not match any known format "
61 "(expected supported scanner output such as Trivy, pip-audit, Bandit, "
62 "Gitleaks, Semgrep, CodeQL SARIF, OSV-Scanner, Checkov, tfsec, or TruffleHog)."
63 )