Coverage for src/dataknobs_data/validation/coercer.py: 9%

137 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-29 14:14 -0600

1"""Type coercion with predictable, consistent behavior. 

2""" 

3 

4from __future__ import annotations 

5 

6import json 

7from datetime import datetime 

8from typing import Any 

9 

10from dataknobs_data.fields import FieldType 

11 

12from .result import ValidationResult 

13 

14 

15class Coercer: 

16 """Type coercion with predictable results. 

17  

18 Always returns ValidationResult, never raises exceptions. 

19 Provides clear error messages when coercion fails. 

20 """ 

21 

22 def coerce( 

23 self, 

24 value: Any, 

25 target_type: type | FieldType 

26 ) -> ValidationResult: 

27 """Coerce a value to the target type. 

28  

29 Args: 

30 value: Value to coerce 

31 target_type: Target type (Python type or FieldType enum) 

32  

33 Returns: 

34 ValidationResult with coerced value or error 

35 """ 

36 # Handle None values 

37 if value is None: 

38 return ValidationResult.failure( 

39 None, 

40 [f"Cannot coerce None to {self._type_name(target_type)}"] 

41 ) 

42 

43 # Convert FieldType to Python type 

44 if isinstance(target_type, FieldType): 

45 target_type = self._field_type_to_python(target_type) 

46 

47 # If already correct type, return as-is 

48 if isinstance(value, target_type): 

49 return ValidationResult.success(value) 

50 

51 # Attempt coercion 

52 try: 

53 coerced = self._coerce_value(value, target_type) 

54 return ValidationResult.success(coerced) 

55 except Exception as e: 

56 return ValidationResult.failure( 

57 value, 

58 [f"Cannot coerce {type(value).__name__} to {self._type_name(target_type)}: {e!s}"] 

59 ) 

60 

61 def _field_type_to_python(self, field_type: FieldType) -> type: 

62 """Convert FieldType enum to Python type.""" 

63 type_map: dict[FieldType, type] = { 

64 FieldType.STRING: str, 

65 FieldType.INTEGER: int, 

66 FieldType.FLOAT: float, 

67 FieldType.BOOLEAN: bool, 

68 FieldType.DATETIME: datetime, 

69 FieldType.JSON: dict, # Using dict as primary type for JSON 

70 FieldType.BINARY: bytes, 

71 } 

72 return type_map.get(field_type, object) 

73 

74 def _type_name(self, target_type: type | FieldType | tuple[type, ...]) -> str: 

75 """Get readable name for type.""" 

76 if isinstance(target_type, FieldType): 

77 return target_type.name 

78 elif isinstance(target_type, tuple): 

79 # Union type represented as tuple 

80 return f"Union[{', '.join(t.__name__ if hasattr(t, '__name__') else str(t) for t in target_type)}]" 

81 elif isinstance(target_type, type): 

82 return target_type.__name__ 

83 # Fallback for unknown types (for runtime safety) 

84 return str(target_type) # type: ignore[unreachable] 

85 

86 def _coerce_value(self, value: Any, target_type: type) -> Any: 

87 """Perform the actual coercion. 

88  

89 Args: 

90 value: Value to coerce 

91 target_type: Target Python type 

92  

93 Returns: 

94 Coerced value 

95  

96 Raises: 

97 Exception: If coercion fails 

98 """ 

99 # Handle union types (like dict|list for JSON) 

100 if isinstance(target_type, tuple): 

101 for t in target_type: 

102 try: 

103 return self._coerce_value(value, t) 

104 except (ValueError, TypeError): 

105 continue 

106 raise ValueError(f"Could not coerce to any of {target_type}") 

107 

108 # String coercion 

109 if target_type == str: 

110 return str(value) 

111 

112 # Integer coercion 

113 elif target_type == int: 

114 if isinstance(value, str): 

115 # Remove whitespace and handle common formats 

116 value = value.strip() 

117 if value.lower() in ('true', 'false'): 

118 return 1 if value.lower() == 'true' else 0 

119 # Handle hex, octal, binary 

120 if value.startswith('0x') or value.startswith('0X'): 

121 return int(value, 16) 

122 elif value.startswith('0o') or value.startswith('0O'): 

123 return int(value, 8) 

124 elif value.startswith('0b') or value.startswith('0B'): 

125 return int(value, 2) 

126 return int(value) 

127 elif isinstance(value, float): 

128 # Check for data loss 

129 if value != int(value): 

130 raise ValueError(f"Float {value} cannot be losslessly converted to int") 

131 return int(value) 

132 elif isinstance(value, bool): 

133 return 1 if value else 0 

134 else: 

135 return int(value) 

136 

137 # Float coercion 

138 elif target_type == float: 

139 if isinstance(value, str): 

140 value = value.strip() 

141 if value.lower() in ('true', 'false'): 

142 return 1.0 if value.lower() == 'true' else 0.0 

143 return float(value) 

144 elif isinstance(value, bool): 

145 return 1.0 if value else 0.0 

146 else: 

147 return float(value) 

148 

149 # Boolean coercion 

150 elif target_type == bool: 

151 if isinstance(value, str): 

152 value = value.strip().lower() 

153 if value in ('true', '1', 'yes', 'y', 'on'): 

154 return True 

155 elif value in ('false', '0', 'no', 'n', 'off'): 

156 return False 

157 else: 

158 raise ValueError(f"String '{value}' is not a valid boolean") 

159 elif isinstance(value, (int, float)): 

160 return bool(value) 

161 else: 

162 return bool(value) 

163 

164 # DateTime coercion 

165 elif target_type == datetime: 

166 if isinstance(value, str): 

167 # Try common datetime formats 

168 formats = [ 

169 '%Y-%m-%d %H:%M:%S', 

170 '%Y-%m-%dT%H:%M:%S', 

171 '%Y-%m-%dT%H:%M:%SZ', 

172 '%Y-%m-%dT%H:%M:%S.%f', 

173 '%Y-%m-%dT%H:%M:%S.%fZ', 

174 '%Y/%m/%d %H:%M:%S', 

175 '%Y/%m/%d', 

176 '%Y-%m-%d', 

177 '%d/%m/%Y', 

178 '%d-%m-%Y', 

179 '%m/%d/%Y', 

180 '%m-%d-%Y', 

181 ] 

182 

183 for fmt in formats: 

184 try: 

185 return datetime.strptime(value, fmt) 

186 except ValueError: 

187 continue 

188 

189 # Try parsing as ISO format 

190 try: 

191 return datetime.fromisoformat(value.replace('Z', '+00:00')) 

192 except (ValueError, AttributeError): 

193 pass 

194 

195 raise ValueError(f"Could not parse datetime from '{value}'") 

196 elif isinstance(value, (int, float)): 

197 # Assume Unix timestamp 

198 return datetime.fromtimestamp(value) 

199 else: 

200 raise ValueError(f"Cannot coerce {type(value).__name__} to datetime") 

201 

202 # Dict coercion (for JSON type) 

203 elif target_type == dict: 

204 if isinstance(value, str): 

205 return json.loads(value) 

206 elif hasattr(value, '__dict__'): 

207 return vars(value) 

208 elif isinstance(value, (list, tuple)): 

209 # Try to convert list of pairs to dict 

210 if all(isinstance(item, (list, tuple)) and len(item) == 2 for item in value): 

211 return dict(value) 

212 raise ValueError("Cannot convert list to dict") 

213 else: 

214 return dict(value) 

215 

216 # List coercion (for JSON type) 

217 elif target_type == list: 

218 if isinstance(value, str): 

219 # Try parsing as JSON 

220 try: 

221 result = json.loads(value) 

222 if not isinstance(result, list): 

223 return [result] 

224 return result 

225 except (json.JSONDecodeError, TypeError): 

226 # Split comma-separated values 

227 if ',' in value: 

228 return [v.strip() for v in value.split(',')] 

229 return [value] 

230 elif isinstance(value, dict): 

231 # Convert dict to list of key-value pairs 

232 return list(value.items()) 

233 elif hasattr(value, '__iter__') and not isinstance(value, (str, bytes)): 

234 return list(value) 

235 else: 

236 return [value] 

237 

238 # Bytes coercion 

239 elif target_type == bytes: 

240 if isinstance(value, str): 

241 return value.encode('utf-8') 

242 elif isinstance(value, (list, tuple)): 

243 # Assume list of integers 

244 return bytes(value) 

245 elif isinstance(value, int): 

246 return bytes([value]) 

247 else: 

248 return bytes(value) 

249 

250 # Unknown type - attempt direct conversion 

251 else: 

252 return target_type(value) 

253 

254 def coerce_many( 

255 self, 

256 values: dict[str, Any], 

257 types: dict[str, type | FieldType] 

258 ) -> dict[str, ValidationResult]: 

259 """Coerce multiple values. 

260  

261 Args: 

262 values: Dictionary of field names to values 

263 types: Dictionary of field names to target types 

264  

265 Returns: 

266 Dictionary of field names to ValidationResults 

267 """ 

268 results = {} 

269 for field_name, value in values.items(): 

270 if field_name in types: 

271 results[field_name] = self.coerce(value, types[field_name]) 

272 else: 

273 # No type specified, pass through 

274 results[field_name] = ValidationResult.success(value) 

275 return results