Coverage for src/dataknobs_data/validation_v2/schema.py: 91%

110 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-15 12:29 -0500

1""" 

2Schema definition with fluent API for record validation. 

3""" 

4 

5from dataclasses import dataclass, field as dataclass_field 

6from typing import Any, Dict, List, Optional, Type, Union 

7 

8from dataknobs_data.records import Record 

9from dataknobs_data.fields import Field as RecordField, FieldType 

10 

11from .result import ValidationResult, ValidationContext 

12from .constraints import Constraint, Required 

13from .coercer import Coercer 

14 

15 

16@dataclass 

17class Field: 

18 """ 

19 Field definition for schema validation. 

20  

21 Note: This is different from dataknobs_data.fields.Field - this defines 

22 the expected structure and validation rules for a field in a schema. 

23 """ 

24 

25 name: str 

26 field_type: FieldType 

27 required: bool = False 

28 default: Any = None 

29 constraints: List[Constraint] = dataclass_field(default_factory=list) 

30 description: Optional[str] = None 

31 

32 def add_constraint(self, constraint: Constraint) -> 'Field': 

33 """ 

34 Add a constraint to this field (fluent API). 

35  

36 Args: 

37 constraint: Constraint to add 

38  

39 Returns: 

40 Self for chaining 

41 """ 

42 self.constraints.append(constraint) 

43 return self 

44 

45 def validate( 

46 self, 

47 value: Any, 

48 context: Optional[ValidationContext] = None, 

49 coerce: bool = False 

50 ) -> ValidationResult: 

51 """ 

52 Validate a value against this field definition. 

53  

54 Args: 

55 value: Value to validate 

56 context: Optional validation context 

57 coerce: If True, attempt type coercion 

58  

59 Returns: 

60 ValidationResult with outcome 

61 """ 

62 # Handle None values 

63 if value is None: 

64 if self.required: 

65 return ValidationResult.failure(value, [f"Field '{self.name}' is required"]) 

66 elif self.default is not None: 

67 value = self.default 

68 else: 

69 return ValidationResult.success(None) 

70 

71 # Type coercion if requested 

72 if coerce and not self._is_correct_type(value): 

73 coercer = Coercer() 

74 coerce_result = coercer.coerce(value, self.field_type) 

75 if not coerce_result.valid: 

76 return coerce_result 

77 value = coerce_result.value 

78 

79 # Type validation 

80 if not self._is_correct_type(value): 

81 return ValidationResult.failure( 

82 value, 

83 [f"Field '{self.name}' expects type {self.field_type.name}, got {type(value).__name__}"] 

84 ) 

85 

86 # Apply constraints 

87 result = ValidationResult.success(value) 

88 for constraint in self.constraints: 

89 check_result = constraint.check(value, context) 

90 if not check_result.valid: 

91 # Add field name to error messages for clarity 

92 check_result.errors = [ 

93 f"Field '{self.name}': {error}" for error in check_result.errors 

94 ] 

95 result = result.merge(check_result) 

96 

97 return result 

98 

99 def _is_correct_type(self, value: Any) -> bool: 

100 """Check if value matches expected field type.""" 

101 if value is None: 

102 return True # None is handled separately 

103 

104 type_map = { 

105 FieldType.STRING: str, 

106 FieldType.INTEGER: int, 

107 FieldType.FLOAT: (int, float), # Accept both 

108 FieldType.BOOLEAN: bool, 

109 FieldType.DATETIME: str, # Will be validated more strictly later 

110 FieldType.JSON: (dict, list), 

111 FieldType.BINARY: bytes, 

112 } 

113 

114 expected_type = type_map.get(self.field_type) 

115 if expected_type: 

116 return isinstance(value, expected_type) 

117 return True # Unknown types are considered valid 

118 

119 

120class Schema: 

121 """ 

122 Schema definition with fluent API for validation. 

123  

124 Provides a clean, chainable interface for defining record schemas 

125 and validating records against them. 

126 """ 

127 

128 def __init__(self, name: str, strict: bool = False): 

129 """ 

130 Initialize schema. 

131  

132 Args: 

133 name: Schema name for identification 

134 strict: If True, reject records with unknown fields 

135 """ 

136 self.name = name 

137 self.strict = strict 

138 self.fields: Dict[str, Field] = {} 

139 self.description: Optional[str] = None 

140 

141 def field( 

142 self, 

143 name: str, 

144 field_type: Union[FieldType, str], 

145 required: bool = False, 

146 default: Any = None, 

147 constraints: Optional[List[Constraint]] = None, 

148 description: Optional[str] = None 

149 ) -> 'Schema': 

150 """ 

151 Add a field definition (fluent API). 

152  

153 Args: 

154 name: Field name 

155 field_type: Field type (FieldType enum or string) 

156 required: Whether field is required 

157 default: Default value if field is missing 

158 constraints: List of constraints to apply 

159 description: Field description 

160  

161 Returns: 

162 Self for chaining 

163 """ 

164 # Convert string to FieldType if needed 

165 if isinstance(field_type, str): 

166 try: 

167 field_type = FieldType[field_type.upper()] 

168 except KeyError: 

169 raise ValueError(f"Invalid field type: {field_type}") 

170 

171 # Add Required constraint if field is required 

172 field_constraints = constraints or [] 

173 if required and not any(isinstance(c, Required) for c in field_constraints): 

174 field_constraints.insert(0, Required()) 

175 

176 self.fields[name] = Field( 

177 name=name, 

178 field_type=field_type, 

179 required=required, 

180 default=default, 

181 constraints=field_constraints, 

182 description=description 

183 ) 

184 return self 

185 

186 def with_description(self, description: str) -> 'Schema': 

187 """ 

188 Set schema description (fluent API). 

189  

190 Args: 

191 description: Schema description 

192  

193 Returns: 

194 Self for chaining 

195 """ 

196 self.description = description 

197 return self 

198 

199 def validate( 

200 self, 

201 record: Union[Record, Dict[str, Any]], 

202 coerce: bool = False, 

203 context: Optional[ValidationContext] = None 

204 ) -> ValidationResult: 

205 """ 

206 Validate a record against this schema. 

207  

208 Args: 

209 record: Record or dict to validate 

210 coerce: If True, attempt type coercion 

211 context: Optional validation context 

212  

213 Returns: 

214 ValidationResult with validation outcome 

215 """ 

216 if context is None: 

217 context = ValidationContext() 

218 

219 # Convert dict to Record if needed 

220 if isinstance(record, dict): 

221 record = Record(data=record) 

222 

223 errors = [] 

224 warnings = [] 

225 validated_fields = {} 

226 

227 # Validate defined fields 

228 for field_name, field_def in self.fields.items(): 

229 field_value = record.get_value(field_name) 

230 

231 # Validate field 

232 result = field_def.validate(field_value, context, coerce) 

233 

234 if not result.valid: 

235 errors.extend(result.errors) 

236 else: 

237 validated_fields[field_name] = result.value 

238 

239 warnings.extend(result.warnings) 

240 

241 # Check for unknown fields if strict mode 

242 if self.strict: 

243 unknown_fields = set(record.fields.keys()) - set(self.fields.keys()) 

244 if unknown_fields: 

245 errors.append(f"Unknown fields in strict mode: {', '.join(unknown_fields)}") 

246 

247 # Create validated record with coerced values 

248 if errors: 

249 return ValidationResult.failure(record, errors, warnings) 

250 else: 

251 # Create new record with validated/coerced values 

252 validated_record = Record( 

253 data=validated_fields, 

254 metadata=record.metadata, 

255 id=record.id 

256 ) 

257 return ValidationResult.success(validated_record, warnings) 

258 

259 def validate_many( 

260 self, 

261 records: List[Union[Record, Dict[str, Any]]], 

262 coerce: bool = False, 

263 stop_on_error: bool = False 

264 ) -> List[ValidationResult]: 

265 """ 

266 Validate multiple records. 

267  

268 Args: 

269 records: List of records to validate 

270 coerce: If True, attempt type coercion 

271 stop_on_error: If True, stop validation on first error 

272  

273 Returns: 

274 List of ValidationResults 

275 """ 

276 context = ValidationContext() # Shared context for uniqueness checks 

277 results = [] 

278 

279 for record in records: 

280 result = self.validate(record, coerce, context) 

281 results.append(result) 

282 

283 if not result.valid and stop_on_error: 

284 break 

285 

286 return results 

287 

288 def to_dict(self) -> Dict[str, Any]: 

289 """ 

290 Convert schema to dictionary representation. 

291  

292 Returns: 

293 Dictionary representation of schema 

294 """ 

295 return { 

296 "name": self.name, 

297 "strict": self.strict, 

298 "description": self.description, 

299 "fields": { 

300 name: { 

301 "type": field_def.field_type.name, 

302 "required": field_def.required, 

303 "default": field_def.default, 

304 "description": field_def.description, 

305 "constraints": len(field_def.constraints) 

306 } 

307 for name, field_def in self.fields.items() 

308 } 

309 } 

310 

311 @classmethod 

312 def from_dict(cls, data: Dict[str, Any]) -> 'Schema': 

313 """ 

314 Create schema from dictionary representation. 

315  

316 Args: 

317 data: Dictionary with schema definition 

318  

319 Returns: 

320 Schema instance 

321 """ 

322 schema = cls( 

323 name=data.get("name", "unnamed"), 

324 strict=data.get("strict", False) 

325 ) 

326 schema.description = data.get("description") 

327 

328 # Add fields 

329 fields = data.get("fields", {}) 

330 for field_name, field_data in fields.items(): 

331 schema.field( 

332 name=field_name, 

333 field_type=field_data.get("type", "STRING"), 

334 required=field_data.get("required", False), 

335 default=field_data.get("default"), 

336 description=field_data.get("description") 

337 ) 

338 

339 return schema