Coverage for src/dataknobs_data/validation_v2/coercer.py: 60%

136 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-15 12:29 -0500

1""" 

2Type coercion with predictable, consistent behavior. 

3""" 

4 

5import json 

6from datetime import datetime 

7from typing import Any, Type, Union, get_origin, get_args 

8 

9from dataknobs_data.fields import FieldType 

10 

11from .result import ValidationResult 

12 

13 

14class Coercer: 

15 """ 

16 Type coercion with predictable results. 

17  

18 Always returns ValidationResult, never raises exceptions. 

19 Provides clear error messages when coercion fails. 

20 """ 

21 

22 def coerce( 

23 self, 

24 value: Any, 

25 target_type: Union[Type, FieldType] 

26 ) -> ValidationResult: 

27 """ 

28 Coerce a value to the target type. 

29  

30 Args: 

31 value: Value to coerce 

32 target_type: Target type (Python type or FieldType enum) 

33  

34 Returns: 

35 ValidationResult with coerced value or error 

36 """ 

37 # Handle None values 

38 if value is None: 

39 return ValidationResult.failure( 

40 None, 

41 [f"Cannot coerce None to {self._type_name(target_type)}"] 

42 ) 

43 

44 # Convert FieldType to Python type 

45 if isinstance(target_type, FieldType): 

46 target_type = self._field_type_to_python(target_type) 

47 

48 # If already correct type, return as-is 

49 if isinstance(value, target_type): 

50 return ValidationResult.success(value) 

51 

52 # Attempt coercion 

53 try: 

54 coerced = self._coerce_value(value, target_type) 

55 return ValidationResult.success(coerced) 

56 except Exception as e: 

57 return ValidationResult.failure( 

58 value, 

59 [f"Cannot coerce {type(value).__name__} to {self._type_name(target_type)}: {str(e)}"] 

60 ) 

61 

62 def _field_type_to_python(self, field_type: FieldType) -> Type: 

63 """Convert FieldType enum to Python type.""" 

64 type_map = { 

65 FieldType.STRING: str, 

66 FieldType.INTEGER: int, 

67 FieldType.FLOAT: float, 

68 FieldType.BOOLEAN: bool, 

69 FieldType.DATETIME: datetime, 

70 FieldType.JSON: (dict, list), 

71 FieldType.BINARY: bytes, 

72 } 

73 return type_map.get(field_type, object) 

74 

75 def _type_name(self, target_type: Union[Type, FieldType]) -> str: 

76 """Get readable name for type.""" 

77 if isinstance(target_type, FieldType): 

78 return target_type.name 

79 elif isinstance(target_type, type): 

80 return target_type.__name__ 

81 elif isinstance(target_type, tuple): 

82 return f"Union[{', '.join(t.__name__ for t in target_type)}]" 

83 else: 

84 return str(target_type) 

85 

86 def _coerce_value(self, value: Any, target_type: Type) -> Any: 

87 """ 

88 Perform the actual coercion. 

89  

90 Args: 

91 value: Value to coerce 

92 target_type: Target Python type 

93  

94 Returns: 

95 Coerced value 

96  

97 Raises: 

98 Exception: If coercion fails 

99 """ 

100 # Handle union types (like dict|list for JSON) 

101 if isinstance(target_type, tuple): 

102 for t in target_type: 

103 try: 

104 return self._coerce_value(value, t) 

105 except: 

106 continue 

107 raise ValueError(f"Could not coerce to any of {target_type}") 

108 

109 # String coercion 

110 if target_type == str: 

111 return str(value) 

112 

113 # Integer coercion 

114 elif target_type == int: 

115 if isinstance(value, str): 

116 # Remove whitespace and handle common formats 

117 value = value.strip() 

118 if value.lower() in ('true', 'false'): 

119 return 1 if value.lower() == 'true' else 0 

120 # Handle hex, octal, binary 

121 if value.startswith('0x') or value.startswith('0X'): 

122 return int(value, 16) 

123 elif value.startswith('0o') or value.startswith('0O'): 

124 return int(value, 8) 

125 elif value.startswith('0b') or value.startswith('0B'): 

126 return int(value, 2) 

127 return int(value) 

128 elif isinstance(value, float): 

129 # Check for data loss 

130 if value != int(value): 

131 raise ValueError(f"Float {value} cannot be losslessly converted to int") 

132 return int(value) 

133 elif isinstance(value, bool): 

134 return 1 if value else 0 

135 else: 

136 return int(value) 

137 

138 # Float coercion 

139 elif target_type == float: 

140 if isinstance(value, str): 

141 value = value.strip() 

142 if value.lower() in ('true', 'false'): 

143 return 1.0 if value.lower() == 'true' else 0.0 

144 return float(value) 

145 elif isinstance(value, bool): 

146 return 1.0 if value else 0.0 

147 else: 

148 return float(value) 

149 

150 # Boolean coercion 

151 elif target_type == bool: 

152 if isinstance(value, str): 

153 value = value.strip().lower() 

154 if value in ('true', '1', 'yes', 'y', 'on'): 

155 return True 

156 elif value in ('false', '0', 'no', 'n', 'off'): 

157 return False 

158 else: 

159 raise ValueError(f"String '{value}' is not a valid boolean") 

160 elif isinstance(value, (int, float)): 

161 return bool(value) 

162 else: 

163 return bool(value) 

164 

165 # DateTime coercion 

166 elif target_type == datetime: 

167 if isinstance(value, str): 

168 # Try common datetime formats 

169 formats = [ 

170 '%Y-%m-%d %H:%M:%S', 

171 '%Y-%m-%dT%H:%M:%S', 

172 '%Y-%m-%dT%H:%M:%SZ', 

173 '%Y-%m-%dT%H:%M:%S.%f', 

174 '%Y-%m-%dT%H:%M:%S.%fZ', 

175 '%Y/%m/%d %H:%M:%S', 

176 '%Y/%m/%d', 

177 '%Y-%m-%d', 

178 '%d/%m/%Y', 

179 '%d-%m-%Y', 

180 '%m/%d/%Y', 

181 '%m-%d-%Y', 

182 ] 

183 

184 for fmt in formats: 

185 try: 

186 return datetime.strptime(value, fmt) 

187 except ValueError: 

188 continue 

189 

190 # Try parsing as ISO format 

191 try: 

192 return datetime.fromisoformat(value.replace('Z', '+00:00')) 

193 except: 

194 pass 

195 

196 raise ValueError(f"Could not parse datetime from '{value}'") 

197 elif isinstance(value, (int, float)): 

198 # Assume Unix timestamp 

199 return datetime.fromtimestamp(value) 

200 else: 

201 raise ValueError(f"Cannot coerce {type(value).__name__} to datetime") 

202 

203 # Dict coercion (for JSON type) 

204 elif target_type == dict: 

205 if isinstance(value, str): 

206 return json.loads(value) 

207 elif hasattr(value, '__dict__'): 

208 return vars(value) 

209 elif isinstance(value, (list, tuple)): 

210 # Try to convert list of pairs to dict 

211 if all(isinstance(item, (list, tuple)) and len(item) == 2 for item in value): 

212 return dict(value) 

213 raise ValueError("Cannot convert list to dict") 

214 else: 

215 return dict(value) 

216 

217 # List coercion (for JSON type) 

218 elif target_type == list: 

219 if isinstance(value, str): 

220 # Try parsing as JSON 

221 try: 

222 result = json.loads(value) 

223 if not isinstance(result, list): 

224 return [result] 

225 return result 

226 except: 

227 # Split comma-separated values 

228 if ',' in value: 

229 return [v.strip() for v in value.split(',')] 

230 return [value] 

231 elif isinstance(value, dict): 

232 # Convert dict to list of key-value pairs 

233 return list(value.items()) 

234 elif hasattr(value, '__iter__') and not isinstance(value, (str, bytes)): 

235 return list(value) 

236 else: 

237 return [value] 

238 

239 # Bytes coercion 

240 elif target_type == bytes: 

241 if isinstance(value, str): 

242 return value.encode('utf-8') 

243 elif isinstance(value, (list, tuple)): 

244 # Assume list of integers 

245 return bytes(value) 

246 elif isinstance(value, int): 

247 return bytes([value]) 

248 else: 

249 return bytes(value) 

250 

251 # Unknown type - attempt direct conversion 

252 else: 

253 return target_type(value) 

254 

255 def coerce_many( 

256 self, 

257 values: dict[str, Any], 

258 types: dict[str, Union[Type, FieldType]] 

259 ) -> dict[str, ValidationResult]: 

260 """ 

261 Coerce multiple values. 

262  

263 Args: 

264 values: Dictionary of field names to values 

265 types: Dictionary of field names to target types 

266  

267 Returns: 

268 Dictionary of field names to ValidationResults 

269 """ 

270 results = {} 

271 for field_name, value in values.items(): 

272 if field_name in types: 

273 results[field_name] = self.coerce(value, types[field_name]) 

274 else: 

275 # No type specified, pass through 

276 results[field_name] = ValidationResult.success(value) 

277 return results