Coverage for src \ sec_report_kit \ parsers \ __init__.py: 100%

32 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-13 08:06 +0530

1"""Input format parsers.""" 

2 

3 

4def detect_source_type(data: dict | list) -> str: 

5 """Detect whether *data* came from one of the supported scanners. 

6 

7 Returns one of: 

8 ``"trivy"``, ``"pip-audit"``, ``"bandit"``, ``"gitleaks"``, 

9 ``"semgrep"``, ``"codeql"``, ``"osv-scanner"``, ``"checkov"``, 

10 ``"tfsec"``, or ``"trufflehog"``. 

11 Raises ``ValueError`` if the format cannot be recognised. 

12 """ 

13 if isinstance(data, list): 

14 # Gitleaks commonly emits a top-level JSON array of findings. 

15 if all(isinstance(item, dict) for item in data): 

16 sample = data[0] if data else {} 

17 if isinstance(sample, dict) and ( 

18 "RuleID" in sample 

19 or "Description" in sample 

20 or "File" in sample 

21 or "StartLine" in sample 

22 or "Fingerprint" in sample 

23 ): 

24 return "gitleaks" 

25 if isinstance(sample, dict) and ( 

26 "DetectorName" in sample 

27 or "DetectorType" in sample 

28 or "SourceName" in sample 

29 ): 

30 return "trufflehog" 

31 

32 if isinstance(data, dict): 

33 if "Results" in data: 

34 return "trivy" 

35 if "runs" in data and isinstance(data.get("runs"), list): 

36 return "codeql" 

37 if "dependencies" in data or "vulnerabilities" in data: 

38 return "pip-audit" 

39 if "results" in data and isinstance(data.get("results"), dict): 

40 return "checkov" 

41 if "results" in data and isinstance(data.get("results"), list): 

42 sample = data["results"][0] if data["results"] else {} 

43 if isinstance(sample, dict) and "packages" in sample: 

44 return "osv-scanner" 

45 if isinstance(sample, dict) and ( 

46 "DetectorName" in sample 

47 or "DetectorType" in sample 

48 or "SourceName" in sample 

49 ): 

50 return "trufflehog" 

51 if "results" in data and isinstance(data.get("results"), list): 

52 if "errors" in data or "paths" in data or "version" in data: 

53 return "semgrep" 

54 if all(isinstance(item, dict) and "rule_id" in item for item in data.get("results", [])): 

55 return "tfsec" 

56 return "bandit" 

57 if "findings" in data and isinstance(data.get("findings"), list): 

58 return "gitleaks" 

59 raise ValueError( 

60 "Cannot detect source type: JSON does not match any known format " 

61 "(expected supported scanner output such as Trivy, pip-audit, Bandit, " 

62 "Gitleaks, Semgrep, CodeQL SARIF, OSV-Scanner, Checkov, tfsec, or TruffleHog)." 

63 )