yaml_shredder.schema_generator
Automatic JSON Schema generation from YAML/JSON files.
1"""Automatic JSON Schema generation from YAML/JSON files.""" 2 3import json 4from datetime import date, datetime 5from pathlib import Path 6from typing import Any 7 8import yaml 9from genson import SchemaBuilder 10 11 12class SchemaGenerator: 13 """Generate JSON Schema from multiple YAML/JSON examples.""" 14 15 def __init__(self): 16 """Initialize the schema generator.""" 17 self.builder = SchemaBuilder() 18 self.files_processed = [] 19 20 def _normalize_data(self, obj: Any) -> Any: 21 """ 22 Normalize data by converting datetime objects to strings. 23 24 Args: 25 obj: Data to normalize 26 27 Returns: 28 Normalized data 29 """ 30 if isinstance(obj, (datetime, date)): 31 return obj.isoformat() 32 elif isinstance(obj, dict): 33 return {k: self._normalize_data(v) for k, v in obj.items()} 34 elif isinstance(obj, list): 35 return [self._normalize_data(item) for item in obj] 36 else: 37 return obj 38 39 def add_yaml_file(self, file_path: str | Path) -> None: 40 """ 41 Add a YAML file to the schema builder. 42 43 Args: 44 file_path: Path to YAML file 45 """ 46 file_path = Path(file_path) 47 with open(file_path) as f: 48 data = yaml.safe_load(f) 49 50 normalized_data = self._normalize_data(data) 51 self.builder.add_object(normalized_data) 52 self.files_processed.append(str(file_path)) 53 54 def add_json_file(self, file_path: str | Path) -> None: 55 """ 56 Add a JSON file to the schema builder. 57 58 Args: 59 file_path: Path to JSON file 60 """ 61 file_path = Path(file_path) 62 with open(file_path) as f: 63 data = json.load(f) 64 65 normalized_data = self._normalize_data(data) 66 self.builder.add_object(normalized_data) 67 self.files_processed.append(str(file_path)) 68 69 def add_object(self, obj: dict[str, Any]) -> None: 70 """ 71 Add a Python object to the schema builder. 72 73 Args: 74 obj: Dictionary object to add 75 """ 76 normalized_data = self._normalize_data(obj) 77 self.builder.add_object(normalized_data) 78 79 def generate_schema(self) -> dict[str, Any]: 80 """ 81 Generate the JSON schema from all added examples. 82 83 Returns: 84 JSON schema as dictionary 85 """ 86 return self.builder.to_schema() 87 88 def save_schema(self, output_path: str | Path) -> None: 89 """ 90 Save the generated schema to a file. 91 92 Args: 93 output_path: Path where to save the schema 94 """ 95 schema = self.generate_schema() 96 output_path = Path(output_path) 97 98 with open(output_path, "w") as f: 99 json.dump(schema, f, indent=2) 100 101 def get_stats(self) -> dict[str, Any]: 102 """ 103 Get statistics about the schema generation process. 104 105 Returns: 106 Dictionary with statistics 107 """ 108 schema = self.generate_schema() 109 return { 110 "files_processed": len(self.files_processed), 111 "file_list": self.files_processed, 112 "schema_properties": len(schema.get("properties", {})), 113 "required_fields": len(schema.get("required", [])), 114 } 115 116 117def generate_schema_from_directory( 118 directory: str | Path, pattern: str = "*.yaml", output_file: str | Path | None = None 119) -> dict[str, Any]: 120 """ 121 Generate schema from all matching files in a directory. 122 123 Args: 124 directory: Directory to scan 125 pattern: File pattern to match (default: *.yaml) 126 output_file: Optional path to save schema 127 128 Returns: 129 Generated JSON schema 130 """ 131 directory = Path(directory) 132 generator = SchemaGenerator() 133 134 # Find all matching files 135 files = sorted(directory.rglob(pattern)) 136 137 if not files: 138 raise ValueError(f"No files matching '{pattern}' found in {directory}") 139 140 # Process each file 141 for file_path in files: 142 if pattern.endswith(".yaml") or pattern.endswith(".yml"): 143 generator.add_yaml_file(file_path) 144 elif pattern.endswith(".json"): 145 generator.add_json_file(file_path) 146 147 # Generate and optionally save schema 148 schema = generator.generate_schema() 149 150 if output_file: 151 generator.save_schema(output_file) 152 153 # Print statistics 154 stats = generator.get_stats() 155 print("Schema generation complete:") 156 print(f" Files processed: {stats['files_processed']}") 157 print(f" Properties found: {stats['schema_properties']}") 158 print(f" Required fields: {stats['required_fields']}") 159 160 return schema
class
SchemaGenerator:
13class SchemaGenerator: 14 """Generate JSON Schema from multiple YAML/JSON examples.""" 15 16 def __init__(self): 17 """Initialize the schema generator.""" 18 self.builder = SchemaBuilder() 19 self.files_processed = [] 20 21 def _normalize_data(self, obj: Any) -> Any: 22 """ 23 Normalize data by converting datetime objects to strings. 24 25 Args: 26 obj: Data to normalize 27 28 Returns: 29 Normalized data 30 """ 31 if isinstance(obj, (datetime, date)): 32 return obj.isoformat() 33 elif isinstance(obj, dict): 34 return {k: self._normalize_data(v) for k, v in obj.items()} 35 elif isinstance(obj, list): 36 return [self._normalize_data(item) for item in obj] 37 else: 38 return obj 39 40 def add_yaml_file(self, file_path: str | Path) -> None: 41 """ 42 Add a YAML file to the schema builder. 43 44 Args: 45 file_path: Path to YAML file 46 """ 47 file_path = Path(file_path) 48 with open(file_path) as f: 49 data = yaml.safe_load(f) 50 51 normalized_data = self._normalize_data(data) 52 self.builder.add_object(normalized_data) 53 self.files_processed.append(str(file_path)) 54 55 def add_json_file(self, file_path: str | Path) -> None: 56 """ 57 Add a JSON file to the schema builder. 58 59 Args: 60 file_path: Path to JSON file 61 """ 62 file_path = Path(file_path) 63 with open(file_path) as f: 64 data = json.load(f) 65 66 normalized_data = self._normalize_data(data) 67 self.builder.add_object(normalized_data) 68 self.files_processed.append(str(file_path)) 69 70 def add_object(self, obj: dict[str, Any]) -> None: 71 """ 72 Add a Python object to the schema builder. 73 74 Args: 75 obj: Dictionary object to add 76 """ 77 normalized_data = self._normalize_data(obj) 78 self.builder.add_object(normalized_data) 79 80 def generate_schema(self) -> dict[str, Any]: 81 """ 82 Generate the JSON schema from all added examples. 83 84 Returns: 85 JSON schema as dictionary 86 """ 87 return self.builder.to_schema() 88 89 def save_schema(self, output_path: str | Path) -> None: 90 """ 91 Save the generated schema to a file. 92 93 Args: 94 output_path: Path where to save the schema 95 """ 96 schema = self.generate_schema() 97 output_path = Path(output_path) 98 99 with open(output_path, "w") as f: 100 json.dump(schema, f, indent=2) 101 102 def get_stats(self) -> dict[str, Any]: 103 """ 104 Get statistics about the schema generation process. 105 106 Returns: 107 Dictionary with statistics 108 """ 109 schema = self.generate_schema() 110 return { 111 "files_processed": len(self.files_processed), 112 "file_list": self.files_processed, 113 "schema_properties": len(schema.get("properties", {})), 114 "required_fields": len(schema.get("required", [])), 115 }
Generate JSON Schema from multiple YAML/JSON examples.
SchemaGenerator()
16 def __init__(self): 17 """Initialize the schema generator.""" 18 self.builder = SchemaBuilder() 19 self.files_processed = []
Initialize the schema generator.
def
add_yaml_file(self, file_path: str | pathlib._local.Path) -> None:
40 def add_yaml_file(self, file_path: str | Path) -> None: 41 """ 42 Add a YAML file to the schema builder. 43 44 Args: 45 file_path: Path to YAML file 46 """ 47 file_path = Path(file_path) 48 with open(file_path) as f: 49 data = yaml.safe_load(f) 50 51 normalized_data = self._normalize_data(data) 52 self.builder.add_object(normalized_data) 53 self.files_processed.append(str(file_path))
Add a YAML file to the schema builder.
Arguments:
- file_path: Path to YAML file
def
add_json_file(self, file_path: str | pathlib._local.Path) -> None:
55 def add_json_file(self, file_path: str | Path) -> None: 56 """ 57 Add a JSON file to the schema builder. 58 59 Args: 60 file_path: Path to JSON file 61 """ 62 file_path = Path(file_path) 63 with open(file_path) as f: 64 data = json.load(f) 65 66 normalized_data = self._normalize_data(data) 67 self.builder.add_object(normalized_data) 68 self.files_processed.append(str(file_path))
Add a JSON file to the schema builder.
Arguments:
- file_path: Path to JSON file
def
add_object(self, obj: dict[str, typing.Any]) -> None:
70 def add_object(self, obj: dict[str, Any]) -> None: 71 """ 72 Add a Python object to the schema builder. 73 74 Args: 75 obj: Dictionary object to add 76 """ 77 normalized_data = self._normalize_data(obj) 78 self.builder.add_object(normalized_data)
Add a Python object to the schema builder.
Arguments:
- obj: Dictionary object to add
def
generate_schema(self) -> dict[str, typing.Any]:
80 def generate_schema(self) -> dict[str, Any]: 81 """ 82 Generate the JSON schema from all added examples. 83 84 Returns: 85 JSON schema as dictionary 86 """ 87 return self.builder.to_schema()
Generate the JSON schema from all added examples.
Returns:
JSON schema as dictionary
def
save_schema(self, output_path: str | pathlib._local.Path) -> None:
89 def save_schema(self, output_path: str | Path) -> None: 90 """ 91 Save the generated schema to a file. 92 93 Args: 94 output_path: Path where to save the schema 95 """ 96 schema = self.generate_schema() 97 output_path = Path(output_path) 98 99 with open(output_path, "w") as f: 100 json.dump(schema, f, indent=2)
Save the generated schema to a file.
Arguments:
- output_path: Path where to save the schema
def
get_stats(self) -> dict[str, typing.Any]:
102 def get_stats(self) -> dict[str, Any]: 103 """ 104 Get statistics about the schema generation process. 105 106 Returns: 107 Dictionary with statistics 108 """ 109 schema = self.generate_schema() 110 return { 111 "files_processed": len(self.files_processed), 112 "file_list": self.files_processed, 113 "schema_properties": len(schema.get("properties", {})), 114 "required_fields": len(schema.get("required", [])), 115 }
Get statistics about the schema generation process.
Returns:
Dictionary with statistics
def
generate_schema_from_directory( directory: str | pathlib._local.Path, pattern: str = '*.yaml', output_file: str | pathlib._local.Path | None = None) -> dict[str, typing.Any]:
118def generate_schema_from_directory( 119 directory: str | Path, pattern: str = "*.yaml", output_file: str | Path | None = None 120) -> dict[str, Any]: 121 """ 122 Generate schema from all matching files in a directory. 123 124 Args: 125 directory: Directory to scan 126 pattern: File pattern to match (default: *.yaml) 127 output_file: Optional path to save schema 128 129 Returns: 130 Generated JSON schema 131 """ 132 directory = Path(directory) 133 generator = SchemaGenerator() 134 135 # Find all matching files 136 files = sorted(directory.rglob(pattern)) 137 138 if not files: 139 raise ValueError(f"No files matching '{pattern}' found in {directory}") 140 141 # Process each file 142 for file_path in files: 143 if pattern.endswith(".yaml") or pattern.endswith(".yml"): 144 generator.add_yaml_file(file_path) 145 elif pattern.endswith(".json"): 146 generator.add_json_file(file_path) 147 148 # Generate and optionally save schema 149 schema = generator.generate_schema() 150 151 if output_file: 152 generator.save_schema(output_file) 153 154 # Print statistics 155 stats = generator.get_stats() 156 print("Schema generation complete:") 157 print(f" Files processed: {stats['files_processed']}") 158 print(f" Properties found: {stats['schema_properties']}") 159 print(f" Required fields: {stats['required_fields']}") 160 161 return schema
Generate schema from all matching files in a directory.
Arguments:
- directory: Directory to scan
- pattern: File pattern to match (default: *.yaml)
- output_file: Optional path to save schema
Returns:
Generated JSON schema