Coverage for src / dataknobs_data / validation / schema.py: 26%

112 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-26 15:45 -0700

1"""Schema definition with fluent API for record validation. 

2""" 

3 

4from __future__ import annotations 

5 

6from dataclasses import dataclass 

7from dataclasses import field as dataclass_field 

8from typing import Any 

9 

10from dataknobs_data.fields import FieldType 

11from dataknobs_data.records import Record 

12 

13from .coercer import Coercer 

14from .constraints import Constraint, Required 

15from .result import ValidationContext, ValidationResult 

16 

17 

18@dataclass 

19class Field: 

20 """Field definition for schema validation. 

21  

22 Note: This is different from dataknobs_data.fields.Field - this defines 

23 the expected structure and validation rules for a field in a schema. 

24 """ 

25 

26 name: str 

27 field_type: FieldType 

28 required: bool = False 

29 default: Any = None 

30 constraints: list[Constraint] = dataclass_field(default_factory=list) 

31 description: str | None = None 

32 

33 def add_constraint(self, constraint: Constraint) -> Field: 

34 """Add a constraint to this field (fluent API). 

35  

36 Args: 

37 constraint: Constraint to add 

38  

39 Returns: 

40 Self for chaining 

41 """ 

42 self.constraints.append(constraint) 

43 return self 

44 

45 def validate( 

46 self, 

47 value: Any, 

48 context: ValidationContext | None = None, 

49 coerce: bool = False 

50 ) -> ValidationResult: 

51 """Validate a value against this field definition. 

52  

53 Args: 

54 value: Value to validate 

55 context: Optional validation context 

56 coerce: If True, attempt type coercion 

57  

58 Returns: 

59 ValidationResult with outcome 

60 """ 

61 # Handle None values 

62 if value is None: 

63 if self.required: 

64 return ValidationResult.failure(value, [f"Field '{self.name}' is required"]) 

65 elif self.default is not None: 

66 value = self.default 

67 else: 

68 return ValidationResult.success(None) 

69 

70 # Type coercion if requested 

71 if coerce and not self._is_correct_type(value): 

72 coercer = Coercer() 

73 coerce_result = coercer.coerce(value, self.field_type) 

74 if not coerce_result.valid: 

75 return coerce_result 

76 value = coerce_result.value 

77 

78 # Type validation 

79 if not self._is_correct_type(value): 

80 return ValidationResult.failure( 

81 value, 

82 [f"Field '{self.name}' expects type {self.field_type.name}, got {type(value).__name__}"] 

83 ) 

84 

85 # Apply constraints 

86 result = ValidationResult.success(value) 

87 for constraint in self.constraints: 

88 check_result = constraint.check(value, context) 

89 if not check_result.valid: 

90 # Add field name to error messages for clarity 

91 check_result.errors = [ 

92 f"Field '{self.name}': {error}" for error in check_result.errors 

93 ] 

94 result = result.merge(check_result) 

95 

96 return result 

97 

98 def _is_correct_type(self, value: Any) -> bool: 

99 """Check if value matches expected field type.""" 

100 if value is None: 

101 return True # None is handled separately 

102 

103 type_map: dict[FieldType, type | tuple[type, ...]] = { 

104 FieldType.STRING: str, 

105 FieldType.INTEGER: int, 

106 FieldType.FLOAT: (int, float), # Accept both 

107 FieldType.BOOLEAN: bool, 

108 FieldType.DATETIME: str, # Will be validated more strictly later 

109 FieldType.JSON: (dict, list), 

110 FieldType.BINARY: bytes, 

111 } 

112 

113 expected_type = type_map.get(self.field_type) 

114 if expected_type: 

115 return isinstance(value, expected_type) 

116 return True # Unknown types are considered valid 

117 

118 

119class Schema: 

120 """Schema definition with fluent API for validation. 

121  

122 Provides a clean, chainable interface for defining record schemas 

123 and validating records against them. 

124 """ 

125 

126 def __init__(self, name: str, strict: bool = False): 

127 """Initialize schema. 

128  

129 Args: 

130 name: Schema name for identification 

131 strict: If True, reject records with unknown fields 

132 """ 

133 self.name = name 

134 self.strict = strict 

135 self.fields: dict[str, Field] = {} 

136 self.description: str | None = None 

137 

138 def field( 

139 self, 

140 name: str, 

141 field_type: FieldType | str, 

142 required: bool = False, 

143 default: Any = None, 

144 constraints: list[Constraint] | None = None, 

145 description: str | None = None 

146 ) -> Schema: 

147 """Add a field definition (fluent API). 

148  

149 Args: 

150 name: Field name 

151 field_type: Field type (FieldType enum or string) 

152 required: Whether field is required 

153 default: Default value if field is missing 

154 constraints: List of constraints to apply 

155 description: Field description 

156  

157 Returns: 

158 Self for chaining 

159 """ 

160 # Convert string to FieldType if needed 

161 if isinstance(field_type, str): 

162 try: 

163 field_type = FieldType[field_type.upper()] 

164 except KeyError as e: 

165 raise ValueError(f"Invalid field type: {field_type}") from e 

166 

167 # Add Required constraint if field is required 

168 field_constraints = constraints or [] 

169 if required and not any(isinstance(c, Required) for c in field_constraints): 

170 field_constraints.insert(0, Required()) 

171 

172 self.fields[name] = Field( 

173 name=name, 

174 field_type=field_type, 

175 required=required, 

176 default=default, 

177 constraints=field_constraints, 

178 description=description 

179 ) 

180 return self 

181 

182 def with_description(self, description: str) -> Schema: 

183 """Set schema description (fluent API). 

184  

185 Args: 

186 description: Schema description 

187  

188 Returns: 

189 Self for chaining 

190 """ 

191 self.description = description 

192 return self 

193 

194 def validate( 

195 self, 

196 record: Record | dict[str, Any], 

197 coerce: bool = False, 

198 context: ValidationContext | None = None 

199 ) -> ValidationResult: 

200 """Validate a record against this schema. 

201  

202 Args: 

203 record: Record or dict to validate 

204 coerce: If True, attempt type coercion 

205 context: Optional validation context 

206  

207 Returns: 

208 ValidationResult with validation outcome 

209 """ 

210 if context is None: 

211 context = ValidationContext() 

212 

213 # Convert dict to Record if needed 

214 if isinstance(record, dict): 

215 record = Record(data=record) 

216 

217 errors = [] 

218 warnings = [] 

219 validated_fields = {} 

220 

221 # Validate defined fields 

222 for field_name, field_def in self.fields.items(): 

223 field_value = record.get_value(field_name) 

224 

225 # Validate field 

226 result = field_def.validate(field_value, context, coerce) 

227 

228 if not result.valid: 

229 errors.extend(result.errors) 

230 else: 

231 validated_fields[field_name] = result.value 

232 

233 warnings.extend(result.warnings) 

234 

235 # Check for unknown fields if strict mode 

236 if self.strict: 

237 unknown_fields = set(record.fields.keys()) - set(self.fields.keys()) 

238 if unknown_fields: 

239 errors.append(f"Unknown fields in strict mode: {', '.join(unknown_fields)}") 

240 

241 # Create validated record with coerced values 

242 if errors: 

243 return ValidationResult.failure(record, errors, warnings) 

244 else: 

245 # Create new record with validated/coerced values 

246 validated_record = Record( 

247 data=validated_fields, 

248 metadata=record.metadata, 

249 id=record.id 

250 ) 

251 return ValidationResult.success(validated_record, warnings) 

252 

253 def validate_many( 

254 self, 

255 records: list[Record | dict[str, Any]], 

256 coerce: bool = False, 

257 stop_on_error: bool = False 

258 ) -> list[ValidationResult]: 

259 """Validate multiple records. 

260  

261 Args: 

262 records: List of records to validate 

263 coerce: If True, attempt type coercion 

264 stop_on_error: If True, stop validation on first error 

265  

266 Returns: 

267 List of ValidationResults 

268 """ 

269 context = ValidationContext() # Shared context for uniqueness checks 

270 results = [] 

271 

272 for record in records: 

273 result = self.validate(record, coerce, context) 

274 results.append(result) 

275 

276 if not result.valid and stop_on_error: 

277 break 

278 

279 return results 

280 

281 def to_dict(self) -> dict[str, Any]: 

282 """Convert schema to dictionary representation. 

283  

284 Returns: 

285 Dictionary representation of schema 

286 """ 

287 return { 

288 "name": self.name, 

289 "strict": self.strict, 

290 "description": self.description, 

291 "fields": { 

292 name: { 

293 "type": field_def.field_type.name, 

294 "required": field_def.required, 

295 "default": field_def.default, 

296 "description": field_def.description, 

297 "constraints": len(field_def.constraints) 

298 } 

299 for name, field_def in self.fields.items() 

300 } 

301 } 

302 

303 @classmethod 

304 def from_dict(cls, data: dict[str, Any]) -> Schema: 

305 """Create schema from dictionary representation. 

306  

307 Args: 

308 data: Dictionary with schema definition 

309  

310 Returns: 

311 Schema instance 

312 """ 

313 schema = cls( 

314 name=data.get("name", "unnamed"), 

315 strict=data.get("strict", False) 

316 ) 

317 schema.description = data.get("description") 

318 

319 # Add fields 

320 fields = data.get("fields", {}) 

321 for field_name, field_data in fields.items(): 

322 schema.field( 

323 name=field_name, 

324 field_type=field_data.get("type", "STRING"), 

325 required=field_data.get("required", False), 

326 default=field_data.get("default"), 

327 description=field_data.get("description") 

328 ) 

329 

330 return schema