Coverage for src/dataknobs_data/validation/schema.py: 26%
112 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 14:14 -0600
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 14:14 -0600
1"""Schema definition with fluent API for record validation.
2"""
4from __future__ import annotations
6from dataclasses import dataclass
7from dataclasses import field as dataclass_field
8from typing import Any
10from dataknobs_data.fields import FieldType
11from dataknobs_data.records import Record
13from .coercer import Coercer
14from .constraints import Constraint, Required
15from .result import ValidationContext, ValidationResult
18@dataclass
19class Field:
20 """Field definition for schema validation.
22 Note: This is different from dataknobs_data.fields.Field - this defines
23 the expected structure and validation rules for a field in a schema.
24 """
26 name: str
27 field_type: FieldType
28 required: bool = False
29 default: Any = None
30 constraints: list[Constraint] = dataclass_field(default_factory=list)
31 description: str | None = None
33 def add_constraint(self, constraint: Constraint) -> Field:
34 """Add a constraint to this field (fluent API).
36 Args:
37 constraint: Constraint to add
39 Returns:
40 Self for chaining
41 """
42 self.constraints.append(constraint)
43 return self
45 def validate(
46 self,
47 value: Any,
48 context: ValidationContext | None = None,
49 coerce: bool = False
50 ) -> ValidationResult:
51 """Validate a value against this field definition.
53 Args:
54 value: Value to validate
55 context: Optional validation context
56 coerce: If True, attempt type coercion
58 Returns:
59 ValidationResult with outcome
60 """
61 # Handle None values
62 if value is None:
63 if self.required:
64 return ValidationResult.failure(value, [f"Field '{self.name}' is required"])
65 elif self.default is not None:
66 value = self.default
67 else:
68 return ValidationResult.success(None)
70 # Type coercion if requested
71 if coerce and not self._is_correct_type(value):
72 coercer = Coercer()
73 coerce_result = coercer.coerce(value, self.field_type)
74 if not coerce_result.valid:
75 return coerce_result
76 value = coerce_result.value
78 # Type validation
79 if not self._is_correct_type(value):
80 return ValidationResult.failure(
81 value,
82 [f"Field '{self.name}' expects type {self.field_type.name}, got {type(value).__name__}"]
83 )
85 # Apply constraints
86 result = ValidationResult.success(value)
87 for constraint in self.constraints:
88 check_result = constraint.check(value, context)
89 if not check_result.valid:
90 # Add field name to error messages for clarity
91 check_result.errors = [
92 f"Field '{self.name}': {error}" for error in check_result.errors
93 ]
94 result = result.merge(check_result)
96 return result
98 def _is_correct_type(self, value: Any) -> bool:
99 """Check if value matches expected field type."""
100 if value is None:
101 return True # None is handled separately
103 type_map: dict[FieldType, type | tuple[type, ...]] = {
104 FieldType.STRING: str,
105 FieldType.INTEGER: int,
106 FieldType.FLOAT: (int, float), # Accept both
107 FieldType.BOOLEAN: bool,
108 FieldType.DATETIME: str, # Will be validated more strictly later
109 FieldType.JSON: (dict, list),
110 FieldType.BINARY: bytes,
111 }
113 expected_type = type_map.get(self.field_type)
114 if expected_type:
115 return isinstance(value, expected_type)
116 return True # Unknown types are considered valid
119class Schema:
120 """Schema definition with fluent API for validation.
122 Provides a clean, chainable interface for defining record schemas
123 and validating records against them.
124 """
126 def __init__(self, name: str, strict: bool = False):
127 """Initialize schema.
129 Args:
130 name: Schema name for identification
131 strict: If True, reject records with unknown fields
132 """
133 self.name = name
134 self.strict = strict
135 self.fields: dict[str, Field] = {}
136 self.description: str | None = None
138 def field(
139 self,
140 name: str,
141 field_type: FieldType | str,
142 required: bool = False,
143 default: Any = None,
144 constraints: list[Constraint] | None = None,
145 description: str | None = None
146 ) -> Schema:
147 """Add a field definition (fluent API).
149 Args:
150 name: Field name
151 field_type: Field type (FieldType enum or string)
152 required: Whether field is required
153 default: Default value if field is missing
154 constraints: List of constraints to apply
155 description: Field description
157 Returns:
158 Self for chaining
159 """
160 # Convert string to FieldType if needed
161 if isinstance(field_type, str):
162 try:
163 field_type = FieldType[field_type.upper()]
164 except KeyError as e:
165 raise ValueError(f"Invalid field type: {field_type}") from e
167 # Add Required constraint if field is required
168 field_constraints = constraints or []
169 if required and not any(isinstance(c, Required) for c in field_constraints):
170 field_constraints.insert(0, Required())
172 self.fields[name] = Field(
173 name=name,
174 field_type=field_type,
175 required=required,
176 default=default,
177 constraints=field_constraints,
178 description=description
179 )
180 return self
182 def with_description(self, description: str) -> Schema:
183 """Set schema description (fluent API).
185 Args:
186 description: Schema description
188 Returns:
189 Self for chaining
190 """
191 self.description = description
192 return self
194 def validate(
195 self,
196 record: Record | dict[str, Any],
197 coerce: bool = False,
198 context: ValidationContext | None = None
199 ) -> ValidationResult:
200 """Validate a record against this schema.
202 Args:
203 record: Record or dict to validate
204 coerce: If True, attempt type coercion
205 context: Optional validation context
207 Returns:
208 ValidationResult with validation outcome
209 """
210 if context is None:
211 context = ValidationContext()
213 # Convert dict to Record if needed
214 if isinstance(record, dict):
215 record = Record(data=record)
217 errors = []
218 warnings = []
219 validated_fields = {}
221 # Validate defined fields
222 for field_name, field_def in self.fields.items():
223 field_value = record.get_value(field_name)
225 # Validate field
226 result = field_def.validate(field_value, context, coerce)
228 if not result.valid:
229 errors.extend(result.errors)
230 else:
231 validated_fields[field_name] = result.value
233 warnings.extend(result.warnings)
235 # Check for unknown fields if strict mode
236 if self.strict:
237 unknown_fields = set(record.fields.keys()) - set(self.fields.keys())
238 if unknown_fields:
239 errors.append(f"Unknown fields in strict mode: {', '.join(unknown_fields)}")
241 # Create validated record with coerced values
242 if errors:
243 return ValidationResult.failure(record, errors, warnings)
244 else:
245 # Create new record with validated/coerced values
246 validated_record = Record(
247 data=validated_fields,
248 metadata=record.metadata,
249 id=record.id
250 )
251 return ValidationResult.success(validated_record, warnings)
253 def validate_many(
254 self,
255 records: list[Record | dict[str, Any]],
256 coerce: bool = False,
257 stop_on_error: bool = False
258 ) -> list[ValidationResult]:
259 """Validate multiple records.
261 Args:
262 records: List of records to validate
263 coerce: If True, attempt type coercion
264 stop_on_error: If True, stop validation on first error
266 Returns:
267 List of ValidationResults
268 """
269 context = ValidationContext() # Shared context for uniqueness checks
270 results = []
272 for record in records:
273 result = self.validate(record, coerce, context)
274 results.append(result)
276 if not result.valid and stop_on_error:
277 break
279 return results
281 def to_dict(self) -> dict[str, Any]:
282 """Convert schema to dictionary representation.
284 Returns:
285 Dictionary representation of schema
286 """
287 return {
288 "name": self.name,
289 "strict": self.strict,
290 "description": self.description,
291 "fields": {
292 name: {
293 "type": field_def.field_type.name,
294 "required": field_def.required,
295 "default": field_def.default,
296 "description": field_def.description,
297 "constraints": len(field_def.constraints)
298 }
299 for name, field_def in self.fields.items()
300 }
301 }
303 @classmethod
304 def from_dict(cls, data: dict[str, Any]) -> Schema:
305 """Create schema from dictionary representation.
307 Args:
308 data: Dictionary with schema definition
310 Returns:
311 Schema instance
312 """
313 schema = cls(
314 name=data.get("name", "unnamed"),
315 strict=data.get("strict", False)
316 )
317 schema.description = data.get("description")
319 # Add fields
320 fields = data.get("fields", {})
321 for field_name, field_data in fields.items():
322 schema.field(
323 name=field_name,
324 field_type=field_data.get("type", "STRING"),
325 required=field_data.get("required", False),
326 default=field_data.get("default"),
327 description=field_data.get("description")
328 )
330 return schema