Coverage for src/dataknobs_data/validation_v2/schema.py: 91%
110 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 12:29 -0500
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 12:29 -0500
1"""
2Schema definition with fluent API for record validation.
3"""
5from dataclasses import dataclass, field as dataclass_field
6from typing import Any, Dict, List, Optional, Type, Union
8from dataknobs_data.records import Record
9from dataknobs_data.fields import Field as RecordField, FieldType
11from .result import ValidationResult, ValidationContext
12from .constraints import Constraint, Required
13from .coercer import Coercer
16@dataclass
17class Field:
18 """
19 Field definition for schema validation.
21 Note: This is different from dataknobs_data.fields.Field - this defines
22 the expected structure and validation rules for a field in a schema.
23 """
25 name: str
26 field_type: FieldType
27 required: bool = False
28 default: Any = None
29 constraints: List[Constraint] = dataclass_field(default_factory=list)
30 description: Optional[str] = None
32 def add_constraint(self, constraint: Constraint) -> 'Field':
33 """
34 Add a constraint to this field (fluent API).
36 Args:
37 constraint: Constraint to add
39 Returns:
40 Self for chaining
41 """
42 self.constraints.append(constraint)
43 return self
45 def validate(
46 self,
47 value: Any,
48 context: Optional[ValidationContext] = None,
49 coerce: bool = False
50 ) -> ValidationResult:
51 """
52 Validate a value against this field definition.
54 Args:
55 value: Value to validate
56 context: Optional validation context
57 coerce: If True, attempt type coercion
59 Returns:
60 ValidationResult with outcome
61 """
62 # Handle None values
63 if value is None:
64 if self.required:
65 return ValidationResult.failure(value, [f"Field '{self.name}' is required"])
66 elif self.default is not None:
67 value = self.default
68 else:
69 return ValidationResult.success(None)
71 # Type coercion if requested
72 if coerce and not self._is_correct_type(value):
73 coercer = Coercer()
74 coerce_result = coercer.coerce(value, self.field_type)
75 if not coerce_result.valid:
76 return coerce_result
77 value = coerce_result.value
79 # Type validation
80 if not self._is_correct_type(value):
81 return ValidationResult.failure(
82 value,
83 [f"Field '{self.name}' expects type {self.field_type.name}, got {type(value).__name__}"]
84 )
86 # Apply constraints
87 result = ValidationResult.success(value)
88 for constraint in self.constraints:
89 check_result = constraint.check(value, context)
90 if not check_result.valid:
91 # Add field name to error messages for clarity
92 check_result.errors = [
93 f"Field '{self.name}': {error}" for error in check_result.errors
94 ]
95 result = result.merge(check_result)
97 return result
99 def _is_correct_type(self, value: Any) -> bool:
100 """Check if value matches expected field type."""
101 if value is None:
102 return True # None is handled separately
104 type_map = {
105 FieldType.STRING: str,
106 FieldType.INTEGER: int,
107 FieldType.FLOAT: (int, float), # Accept both
108 FieldType.BOOLEAN: bool,
109 FieldType.DATETIME: str, # Will be validated more strictly later
110 FieldType.JSON: (dict, list),
111 FieldType.BINARY: bytes,
112 }
114 expected_type = type_map.get(self.field_type)
115 if expected_type:
116 return isinstance(value, expected_type)
117 return True # Unknown types are considered valid
120class Schema:
121 """
122 Schema definition with fluent API for validation.
124 Provides a clean, chainable interface for defining record schemas
125 and validating records against them.
126 """
128 def __init__(self, name: str, strict: bool = False):
129 """
130 Initialize schema.
132 Args:
133 name: Schema name for identification
134 strict: If True, reject records with unknown fields
135 """
136 self.name = name
137 self.strict = strict
138 self.fields: Dict[str, Field] = {}
139 self.description: Optional[str] = None
141 def field(
142 self,
143 name: str,
144 field_type: Union[FieldType, str],
145 required: bool = False,
146 default: Any = None,
147 constraints: Optional[List[Constraint]] = None,
148 description: Optional[str] = None
149 ) -> 'Schema':
150 """
151 Add a field definition (fluent API).
153 Args:
154 name: Field name
155 field_type: Field type (FieldType enum or string)
156 required: Whether field is required
157 default: Default value if field is missing
158 constraints: List of constraints to apply
159 description: Field description
161 Returns:
162 Self for chaining
163 """
164 # Convert string to FieldType if needed
165 if isinstance(field_type, str):
166 try:
167 field_type = FieldType[field_type.upper()]
168 except KeyError:
169 raise ValueError(f"Invalid field type: {field_type}")
171 # Add Required constraint if field is required
172 field_constraints = constraints or []
173 if required and not any(isinstance(c, Required) for c in field_constraints):
174 field_constraints.insert(0, Required())
176 self.fields[name] = Field(
177 name=name,
178 field_type=field_type,
179 required=required,
180 default=default,
181 constraints=field_constraints,
182 description=description
183 )
184 return self
186 def with_description(self, description: str) -> 'Schema':
187 """
188 Set schema description (fluent API).
190 Args:
191 description: Schema description
193 Returns:
194 Self for chaining
195 """
196 self.description = description
197 return self
199 def validate(
200 self,
201 record: Union[Record, Dict[str, Any]],
202 coerce: bool = False,
203 context: Optional[ValidationContext] = None
204 ) -> ValidationResult:
205 """
206 Validate a record against this schema.
208 Args:
209 record: Record or dict to validate
210 coerce: If True, attempt type coercion
211 context: Optional validation context
213 Returns:
214 ValidationResult with validation outcome
215 """
216 if context is None:
217 context = ValidationContext()
219 # Convert dict to Record if needed
220 if isinstance(record, dict):
221 record = Record(data=record)
223 errors = []
224 warnings = []
225 validated_fields = {}
227 # Validate defined fields
228 for field_name, field_def in self.fields.items():
229 field_value = record.get_value(field_name)
231 # Validate field
232 result = field_def.validate(field_value, context, coerce)
234 if not result.valid:
235 errors.extend(result.errors)
236 else:
237 validated_fields[field_name] = result.value
239 warnings.extend(result.warnings)
241 # Check for unknown fields if strict mode
242 if self.strict:
243 unknown_fields = set(record.fields.keys()) - set(self.fields.keys())
244 if unknown_fields:
245 errors.append(f"Unknown fields in strict mode: {', '.join(unknown_fields)}")
247 # Create validated record with coerced values
248 if errors:
249 return ValidationResult.failure(record, errors, warnings)
250 else:
251 # Create new record with validated/coerced values
252 validated_record = Record(
253 data=validated_fields,
254 metadata=record.metadata,
255 id=record.id
256 )
257 return ValidationResult.success(validated_record, warnings)
259 def validate_many(
260 self,
261 records: List[Union[Record, Dict[str, Any]]],
262 coerce: bool = False,
263 stop_on_error: bool = False
264 ) -> List[ValidationResult]:
265 """
266 Validate multiple records.
268 Args:
269 records: List of records to validate
270 coerce: If True, attempt type coercion
271 stop_on_error: If True, stop validation on first error
273 Returns:
274 List of ValidationResults
275 """
276 context = ValidationContext() # Shared context for uniqueness checks
277 results = []
279 for record in records:
280 result = self.validate(record, coerce, context)
281 results.append(result)
283 if not result.valid and stop_on_error:
284 break
286 return results
288 def to_dict(self) -> Dict[str, Any]:
289 """
290 Convert schema to dictionary representation.
292 Returns:
293 Dictionary representation of schema
294 """
295 return {
296 "name": self.name,
297 "strict": self.strict,
298 "description": self.description,
299 "fields": {
300 name: {
301 "type": field_def.field_type.name,
302 "required": field_def.required,
303 "default": field_def.default,
304 "description": field_def.description,
305 "constraints": len(field_def.constraints)
306 }
307 for name, field_def in self.fields.items()
308 }
309 }
311 @classmethod
312 def from_dict(cls, data: Dict[str, Any]) -> 'Schema':
313 """
314 Create schema from dictionary representation.
316 Args:
317 data: Dictionary with schema definition
319 Returns:
320 Schema instance
321 """
322 schema = cls(
323 name=data.get("name", "unnamed"),
324 strict=data.get("strict", False)
325 )
326 schema.description = data.get("description")
328 # Add fields
329 fields = data.get("fields", {})
330 for field_name, field_data in fields.items():
331 schema.field(
332 name=field_name,
333 field_type=field_data.get("type", "STRING"),
334 required=field_data.get("required", False),
335 default=field_data.get("default"),
336 description=field_data.get("description")
337 )
339 return schema