Coverage for src/dataknobs_data/validation_old_backup/type_coercion.py: 0%
172 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 12:32 -0500
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 12:32 -0500
1"""Type coercion utilities for schema validation."""
3import json
4import logging
5from datetime import datetime
6from typing import Any, Callable, Dict, List, Type, Union
8from dataknobs_data.fields import FieldType
10logger = logging.getLogger(__name__)
13class CoercionError(Exception):
14 """Raised when type coercion fails."""
15 pass
18class TypeCoercer:
19 """Handle type coercion for field values."""
21 def __init__(self):
22 """Initialize type coercer."""
23 self._coercion_map = {
24 str: self._to_string,
25 int: self._to_int,
26 float: self._to_float,
27 bool: self._to_bool,
28 list: self._to_list,
29 dict: self._to_dict,
30 datetime: self._to_datetime,
31 }
33 self._field_type_map = {
34 FieldType.STRING: self._to_string,
35 FieldType.INTEGER: self._to_int,
36 FieldType.FLOAT: self._to_float,
37 FieldType.BOOLEAN: self._to_bool,
38 FieldType.JSON: self._to_dict, # JSON can represent lists/dicts
39 FieldType.DATETIME: self._to_datetime,
40 }
42 def coerce(self, value: Any, target_type: Union[Type, FieldType, str]) -> Any:
43 """Coerce a value to the target type.
45 Args:
46 value: Value to coerce
47 target_type: Target type (Type, FieldType, or string)
49 Returns:
50 Coerced value
52 Raises:
53 CoercionError: If coercion fails
54 """
55 # Handle None values
56 if value is None:
57 return None
59 # Determine coercion function
60 if isinstance(target_type, type):
61 coercion_func = self._coercion_map.get(target_type)
62 elif isinstance(target_type, FieldType):
63 coercion_func = self._field_type_map.get(target_type)
64 elif isinstance(target_type, str):
65 # String type name
66 type_name_map = {
67 'str': self._to_string,
68 'string': self._to_string,
69 'int': self._to_int,
70 'integer': self._to_int,
71 'float': self._to_float,
72 'double': self._to_float,
73 'bool': self._to_bool,
74 'boolean': self._to_bool,
75 'list': self._to_list,
76 'array': self._to_list,
77 'dict': self._to_dict,
78 'object': self._to_dict,
79 'datetime': self._to_datetime,
80 'date': self._to_datetime,
81 }
82 coercion_func = type_name_map.get(target_type.lower())
83 else:
84 coercion_func = None
86 if not coercion_func:
87 # No coercion needed or unknown type
88 return value
90 try:
91 return coercion_func(value)
92 except Exception as e:
93 raise CoercionError(f"Failed to coerce {value!r} to {target_type}: {e}")
95 def _to_string(self, value: Any) -> str:
96 """Coerce to string."""
97 if isinstance(value, str):
98 return value
99 elif isinstance(value, bytes):
100 return value.decode('utf-8', errors='replace')
101 elif isinstance(value, (dict, list)):
102 return json.dumps(value)
103 elif isinstance(value, datetime):
104 return value.isoformat()
105 else:
106 return str(value)
108 def _to_int(self, value: Any) -> int:
109 """Coerce to integer."""
110 if isinstance(value, int):
111 return value
112 elif isinstance(value, float):
113 return int(value)
114 elif isinstance(value, str):
115 # Handle various string formats
116 value = value.strip()
117 if value == "":
118 raise ValueError("Empty string cannot be converted to int")
120 # Handle boolean strings
121 if value.lower() in ('true', 'yes', 'on'):
122 return 1
123 elif value.lower() in ('false', 'no', 'off'):
124 return 0
126 # Handle numeric strings
127 try:
128 # Try direct int conversion
129 return int(value)
130 except ValueError:
131 # Try float then int (handles "1.0")
132 return int(float(value))
133 elif isinstance(value, bool):
134 return 1 if value else 0
135 else:
136 raise ValueError(f"Cannot convert {type(value).__name__} to int")
138 def _to_float(self, value: Any) -> float:
139 """Coerce to float."""
140 if isinstance(value, (int, float)):
141 return float(value)
142 elif isinstance(value, str):
143 value = value.strip()
144 if value == "":
145 raise ValueError("Empty string cannot be converted to float")
147 # Handle special values
148 if value.lower() == 'inf':
149 return float('inf')
150 elif value.lower() == '-inf':
151 return float('-inf')
152 elif value.lower() == 'nan':
153 return float('nan')
155 return float(value)
156 elif isinstance(value, bool):
157 return 1.0 if value else 0.0
158 else:
159 raise ValueError(f"Cannot convert {type(value).__name__} to float")
161 def _to_bool(self, value: Any) -> bool:
162 """Coerce to boolean."""
163 if isinstance(value, bool):
164 return value
165 elif isinstance(value, str):
166 value = value.strip().lower()
167 if value in ('true', 'yes', '1', 'on', 't', 'y'):
168 return True
169 elif value in ('false', 'no', '0', 'off', 'f', 'n', ''):
170 return False
171 else:
172 raise ValueError(f"Cannot interpret '{value}' as boolean")
173 elif isinstance(value, (int, float)):
174 return value != 0
175 elif value is None:
176 return False
177 else:
178 return bool(value)
180 def _to_list(self, value: Any) -> List[Any]:
181 """Coerce to list."""
182 if isinstance(value, list):
183 return value
184 elif isinstance(value, tuple):
185 return list(value)
186 elif isinstance(value, set):
187 return list(value)
188 elif isinstance(value, dict):
189 # Convert dict to list of key-value pairs
190 return list(value.items())
191 elif isinstance(value, str):
192 # Try to parse JSON array
193 value = value.strip()
194 if value.startswith('[') and value.endswith(']'):
195 try:
196 return json.loads(value)
197 except json.JSONDecodeError:
198 pass
200 # Split comma-separated values
201 if ',' in value:
202 return [v.strip() for v in value.split(',')]
204 # Single value as list
205 return [value] if value else []
206 else:
207 # Wrap single value in list
208 return [value]
210 def _to_dict(self, value: Any) -> Dict[str, Any]:
211 """Coerce to dictionary."""
212 if isinstance(value, dict):
213 return value
214 elif isinstance(value, str):
215 # Try to parse JSON object
216 value = value.strip()
217 if value.startswith('{') and value.endswith('}'):
218 try:
219 return json.loads(value)
220 except json.JSONDecodeError:
221 pass
223 # Try key=value format
224 if '=' in value:
225 result = {}
226 for item in value.split(','):
227 if '=' in item:
228 key, val = item.split('=', 1)
229 result[key.strip()] = val.strip()
230 return result
232 # Empty dict for empty string
233 return {} if not value else {'value': value}
234 elif isinstance(value, (list, tuple)):
235 # Convert list of pairs to dict
236 if value and isinstance(value[0], (list, tuple)) and len(value[0]) == 2:
237 return dict(value)
238 # Convert list to dict with numeric keys
239 return {str(i): v for i, v in enumerate(value)}
240 elif hasattr(value, '__dict__'):
241 # Convert object to dict
242 return value.__dict__
243 else:
244 # Wrap value in dict
245 return {'value': value}
247 def _to_datetime(self, value: Any) -> datetime:
248 """Coerce to datetime."""
249 if isinstance(value, datetime):
250 return value
251 elif isinstance(value, str):
252 value = value.strip()
253 if not value:
254 raise ValueError("Empty string cannot be converted to datetime")
256 # Try common datetime formats
257 formats = [
258 '%Y-%m-%d %H:%M:%S',
259 '%Y-%m-%dT%H:%M:%S',
260 '%Y-%m-%dT%H:%M:%SZ',
261 '%Y-%m-%dT%H:%M:%S.%f',
262 '%Y-%m-%dT%H:%M:%S.%fZ',
263 '%Y-%m-%d',
264 '%m/%d/%Y',
265 '%m/%d/%Y %H:%M:%S',
266 '%d/%m/%Y',
267 '%d/%m/%Y %H:%M:%S',
268 ]
270 for fmt in formats:
271 try:
272 return datetime.strptime(value, fmt)
273 except ValueError:
274 continue
276 # Try ISO format
277 try:
278 return datetime.fromisoformat(value.replace('Z', '+00:00'))
279 except ValueError:
280 pass
282 # Try timestamp
283 try:
284 timestamp = float(value)
285 return datetime.fromtimestamp(timestamp)
286 except (ValueError, OSError):
287 pass
289 raise ValueError(f"Cannot parse '{value}' as datetime")
290 elif isinstance(value, (int, float)):
291 # Assume Unix timestamp
292 try:
293 return datetime.fromtimestamp(value)
294 except (ValueError, OSError) as e:
295 raise ValueError(f"Invalid timestamp {value}: {e}")
296 else:
297 raise ValueError(f"Cannot convert {type(value).__name__} to datetime")
299 def register_coercion(
300 self,
301 target_type: Union[Type, str],
302 coercion_func: Callable[[Any], Any]
303 ) -> None:
304 """Register a custom coercion function.
306 Args:
307 target_type: Target type or type name
308 coercion_func: Function to coerce values to target type
309 """
310 if isinstance(target_type, type):
311 self._coercion_map[target_type] = coercion_func
312 elif isinstance(target_type, str):
313 # Store in a separate map for string type names
314 if not hasattr(self, '_custom_coercions'):
315 self._custom_coercions = {}
316 self._custom_coercions[target_type.lower()] = coercion_func