Coverage for src/dataknobs_data/validation/type_coercion.py: 0%

172 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-14 21:42 -0500

1"""Type coercion utilities for schema validation.""" 

2 

3import json 

4import logging 

5from datetime import datetime 

6from typing import Any, Callable, Dict, List, Type, Union 

7 

8from dataknobs_data.fields import FieldType 

9 

10logger = logging.getLogger(__name__) 

11 

12 

13class CoercionError(Exception): 

14 """Raised when type coercion fails.""" 

15 pass 

16 

17 

18class TypeCoercer: 

19 """Handle type coercion for field values.""" 

20 

21 def __init__(self): 

22 """Initialize type coercer.""" 

23 self._coercion_map = { 

24 str: self._to_string, 

25 int: self._to_int, 

26 float: self._to_float, 

27 bool: self._to_bool, 

28 list: self._to_list, 

29 dict: self._to_dict, 

30 datetime: self._to_datetime, 

31 } 

32 

33 self._field_type_map = { 

34 FieldType.STRING: self._to_string, 

35 FieldType.INTEGER: self._to_int, 

36 FieldType.FLOAT: self._to_float, 

37 FieldType.BOOLEAN: self._to_bool, 

38 FieldType.JSON: self._to_dict, # JSON can represent lists/dicts 

39 FieldType.DATETIME: self._to_datetime, 

40 } 

41 

42 def coerce(self, value: Any, target_type: Union[Type, FieldType, str]) -> Any: 

43 """Coerce a value to the target type. 

44  

45 Args: 

46 value: Value to coerce 

47 target_type: Target type (Type, FieldType, or string) 

48  

49 Returns: 

50 Coerced value 

51  

52 Raises: 

53 CoercionError: If coercion fails 

54 """ 

55 # Handle None values 

56 if value is None: 

57 return None 

58 

59 # Determine coercion function 

60 if isinstance(target_type, type): 

61 coercion_func = self._coercion_map.get(target_type) 

62 elif isinstance(target_type, FieldType): 

63 coercion_func = self._field_type_map.get(target_type) 

64 elif isinstance(target_type, str): 

65 # String type name 

66 type_name_map = { 

67 'str': self._to_string, 

68 'string': self._to_string, 

69 'int': self._to_int, 

70 'integer': self._to_int, 

71 'float': self._to_float, 

72 'double': self._to_float, 

73 'bool': self._to_bool, 

74 'boolean': self._to_bool, 

75 'list': self._to_list, 

76 'array': self._to_list, 

77 'dict': self._to_dict, 

78 'object': self._to_dict, 

79 'datetime': self._to_datetime, 

80 'date': self._to_datetime, 

81 } 

82 coercion_func = type_name_map.get(target_type.lower()) 

83 else: 

84 coercion_func = None 

85 

86 if not coercion_func: 

87 # No coercion needed or unknown type 

88 return value 

89 

90 try: 

91 return coercion_func(value) 

92 except Exception as e: 

93 raise CoercionError(f"Failed to coerce {value!r} to {target_type}: {e}") 

94 

95 def _to_string(self, value: Any) -> str: 

96 """Coerce to string.""" 

97 if isinstance(value, str): 

98 return value 

99 elif isinstance(value, bytes): 

100 return value.decode('utf-8', errors='replace') 

101 elif isinstance(value, (dict, list)): 

102 return json.dumps(value) 

103 elif isinstance(value, datetime): 

104 return value.isoformat() 

105 else: 

106 return str(value) 

107 

108 def _to_int(self, value: Any) -> int: 

109 """Coerce to integer.""" 

110 if isinstance(value, int): 

111 return value 

112 elif isinstance(value, float): 

113 return int(value) 

114 elif isinstance(value, str): 

115 # Handle various string formats 

116 value = value.strip() 

117 if value == "": 

118 raise ValueError("Empty string cannot be converted to int") 

119 

120 # Handle boolean strings 

121 if value.lower() in ('true', 'yes', 'on'): 

122 return 1 

123 elif value.lower() in ('false', 'no', 'off'): 

124 return 0 

125 

126 # Handle numeric strings 

127 try: 

128 # Try direct int conversion 

129 return int(value) 

130 except ValueError: 

131 # Try float then int (handles "1.0") 

132 return int(float(value)) 

133 elif isinstance(value, bool): 

134 return 1 if value else 0 

135 else: 

136 raise ValueError(f"Cannot convert {type(value).__name__} to int") 

137 

138 def _to_float(self, value: Any) -> float: 

139 """Coerce to float.""" 

140 if isinstance(value, (int, float)): 

141 return float(value) 

142 elif isinstance(value, str): 

143 value = value.strip() 

144 if value == "": 

145 raise ValueError("Empty string cannot be converted to float") 

146 

147 # Handle special values 

148 if value.lower() == 'inf': 

149 return float('inf') 

150 elif value.lower() == '-inf': 

151 return float('-inf') 

152 elif value.lower() == 'nan': 

153 return float('nan') 

154 

155 return float(value) 

156 elif isinstance(value, bool): 

157 return 1.0 if value else 0.0 

158 else: 

159 raise ValueError(f"Cannot convert {type(value).__name__} to float") 

160 

161 def _to_bool(self, value: Any) -> bool: 

162 """Coerce to boolean.""" 

163 if isinstance(value, bool): 

164 return value 

165 elif isinstance(value, str): 

166 value = value.strip().lower() 

167 if value in ('true', 'yes', '1', 'on', 't', 'y'): 

168 return True 

169 elif value in ('false', 'no', '0', 'off', 'f', 'n', ''): 

170 return False 

171 else: 

172 raise ValueError(f"Cannot interpret '{value}' as boolean") 

173 elif isinstance(value, (int, float)): 

174 return value != 0 

175 elif value is None: 

176 return False 

177 else: 

178 return bool(value) 

179 

180 def _to_list(self, value: Any) -> List[Any]: 

181 """Coerce to list.""" 

182 if isinstance(value, list): 

183 return value 

184 elif isinstance(value, tuple): 

185 return list(value) 

186 elif isinstance(value, set): 

187 return list(value) 

188 elif isinstance(value, dict): 

189 # Convert dict to list of key-value pairs 

190 return list(value.items()) 

191 elif isinstance(value, str): 

192 # Try to parse JSON array 

193 value = value.strip() 

194 if value.startswith('[') and value.endswith(']'): 

195 try: 

196 return json.loads(value) 

197 except json.JSONDecodeError: 

198 pass 

199 

200 # Split comma-separated values 

201 if ',' in value: 

202 return [v.strip() for v in value.split(',')] 

203 

204 # Single value as list 

205 return [value] if value else [] 

206 else: 

207 # Wrap single value in list 

208 return [value] 

209 

210 def _to_dict(self, value: Any) -> Dict[str, Any]: 

211 """Coerce to dictionary.""" 

212 if isinstance(value, dict): 

213 return value 

214 elif isinstance(value, str): 

215 # Try to parse JSON object 

216 value = value.strip() 

217 if value.startswith('{') and value.endswith('}'): 

218 try: 

219 return json.loads(value) 

220 except json.JSONDecodeError: 

221 pass 

222 

223 # Try key=value format 

224 if '=' in value: 

225 result = {} 

226 for item in value.split(','): 

227 if '=' in item: 

228 key, val = item.split('=', 1) 

229 result[key.strip()] = val.strip() 

230 return result 

231 

232 # Empty dict for empty string 

233 return {} if not value else {'value': value} 

234 elif isinstance(value, (list, tuple)): 

235 # Convert list of pairs to dict 

236 if value and isinstance(value[0], (list, tuple)) and len(value[0]) == 2: 

237 return dict(value) 

238 # Convert list to dict with numeric keys 

239 return {str(i): v for i, v in enumerate(value)} 

240 elif hasattr(value, '__dict__'): 

241 # Convert object to dict 

242 return value.__dict__ 

243 else: 

244 # Wrap value in dict 

245 return {'value': value} 

246 

247 def _to_datetime(self, value: Any) -> datetime: 

248 """Coerce to datetime.""" 

249 if isinstance(value, datetime): 

250 return value 

251 elif isinstance(value, str): 

252 value = value.strip() 

253 if not value: 

254 raise ValueError("Empty string cannot be converted to datetime") 

255 

256 # Try common datetime formats 

257 formats = [ 

258 '%Y-%m-%d %H:%M:%S', 

259 '%Y-%m-%dT%H:%M:%S', 

260 '%Y-%m-%dT%H:%M:%SZ', 

261 '%Y-%m-%dT%H:%M:%S.%f', 

262 '%Y-%m-%dT%H:%M:%S.%fZ', 

263 '%Y-%m-%d', 

264 '%m/%d/%Y', 

265 '%m/%d/%Y %H:%M:%S', 

266 '%d/%m/%Y', 

267 '%d/%m/%Y %H:%M:%S', 

268 ] 

269 

270 for fmt in formats: 

271 try: 

272 return datetime.strptime(value, fmt) 

273 except ValueError: 

274 continue 

275 

276 # Try ISO format 

277 try: 

278 return datetime.fromisoformat(value.replace('Z', '+00:00')) 

279 except ValueError: 

280 pass 

281 

282 # Try timestamp 

283 try: 

284 timestamp = float(value) 

285 return datetime.fromtimestamp(timestamp) 

286 except (ValueError, OSError): 

287 pass 

288 

289 raise ValueError(f"Cannot parse '{value}' as datetime") 

290 elif isinstance(value, (int, float)): 

291 # Assume Unix timestamp 

292 try: 

293 return datetime.fromtimestamp(value) 

294 except (ValueError, OSError) as e: 

295 raise ValueError(f"Invalid timestamp {value}: {e}") 

296 else: 

297 raise ValueError(f"Cannot convert {type(value).__name__} to datetime") 

298 

299 def register_coercion( 

300 self, 

301 target_type: Union[Type, str], 

302 coercion_func: Callable[[Any], Any] 

303 ) -> None: 

304 """Register a custom coercion function. 

305  

306 Args: 

307 target_type: Target type or type name 

308 coercion_func: Function to coerce values to target type 

309 """ 

310 if isinstance(target_type, type): 

311 self._coercion_map[target_type] = coercion_func 

312 elif isinstance(target_type, str): 

313 # Store in a separate map for string type names 

314 if not hasattr(self, '_custom_coercions'): 

315 self._custom_coercions = {} 

316 self._custom_coercions[target_type.lower()] = coercion_func