Coverage for src/csv_schema_validator/validate_csv.py: 19%

21 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-20 12:34 +0200

1import csv 

2 

3from .validator import FieldValidator 

4from .schema_validator import validate_schema_structure 

5 

6 

7def validate_csv(csv_file: str, schema: dict) -> bool: 

8 """ 

9 Validate a CSV file against a JSON schema. 

10 

11 Args: 

12 csv_file: The path to the CSV file to validate. 

13 schema: The JSON schema to validate the CSV file against. 

14 

15 Returns: 

16 A dictionary containing the validation results. 

17 The dictionary contains the following keys: 

18 - is_valid: A boolean indicating if the CSV file is valid. 

19 - errors: A list of errors found in the CSV file. 

20 Each error is a dictionary containing the following keys: 

21 - error_type: The type of error. 

22 - details: A dictionary containing the details of the error. 

23 """ 

24 

25 schema_validation_result = validate_schema_structure(schema) 

26 

27 if not schema_validation_result["is_valid"]: 

28 return schema_validation_result 

29 

30 required_field_names = [ 

31 field["name"] for field in schema["fields"] if field["required"] 

32 ] 

33 

34 validation_results = [] 

35 

36 with open(csv_file, "r") as file: 

37 reader = csv.reader(file) 

38 try: 

39 header = next(reader) 

40 except StopIteration: 

41 return { 

42 "is_valid": False, 

43 "errors": [ 

44 { 

45 "error_type": "empty_csv_file", 

46 "value": csv_file, 

47 "row": -1, 

48 "column": -1, 

49 "details": {}, 

50 } 

51 ], 

52 } 

53 

54 header_result = FieldValidator.validate_required_fields( 

55 header, required_field_names 

56 ) 

57 validation_results.append(header_result) 

58 

59 for row_number, row in enumerate(reader): 

60 validation_results += FieldValidator.validate_row( 

61 row, header, schema, row_number 

62 ) 

63 

64 filtered_validation_results = [ 

65 error 

66 for result in validation_results 

67 if not result["is_valid"] 

68 for error in result["errors"] 

69 ] 

70 

71 return { 

72 "is_valid": len(filtered_validation_results) == 0, 

73 "errors": filtered_validation_results, 

74 }