Coverage for src/dataknobs_data/validation_v2/coercer.py: 60%
136 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 12:29 -0500
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-15 12:29 -0500
1"""
2Type coercion with predictable, consistent behavior.
3"""
5import json
6from datetime import datetime
7from typing import Any, Type, Union, get_origin, get_args
9from dataknobs_data.fields import FieldType
11from .result import ValidationResult
14class Coercer:
15 """
16 Type coercion with predictable results.
18 Always returns ValidationResult, never raises exceptions.
19 Provides clear error messages when coercion fails.
20 """
22 def coerce(
23 self,
24 value: Any,
25 target_type: Union[Type, FieldType]
26 ) -> ValidationResult:
27 """
28 Coerce a value to the target type.
30 Args:
31 value: Value to coerce
32 target_type: Target type (Python type or FieldType enum)
34 Returns:
35 ValidationResult with coerced value or error
36 """
37 # Handle None values
38 if value is None:
39 return ValidationResult.failure(
40 None,
41 [f"Cannot coerce None to {self._type_name(target_type)}"]
42 )
44 # Convert FieldType to Python type
45 if isinstance(target_type, FieldType):
46 target_type = self._field_type_to_python(target_type)
48 # If already correct type, return as-is
49 if isinstance(value, target_type):
50 return ValidationResult.success(value)
52 # Attempt coercion
53 try:
54 coerced = self._coerce_value(value, target_type)
55 return ValidationResult.success(coerced)
56 except Exception as e:
57 return ValidationResult.failure(
58 value,
59 [f"Cannot coerce {type(value).__name__} to {self._type_name(target_type)}: {str(e)}"]
60 )
62 def _field_type_to_python(self, field_type: FieldType) -> Type:
63 """Convert FieldType enum to Python type."""
64 type_map = {
65 FieldType.STRING: str,
66 FieldType.INTEGER: int,
67 FieldType.FLOAT: float,
68 FieldType.BOOLEAN: bool,
69 FieldType.DATETIME: datetime,
70 FieldType.JSON: (dict, list),
71 FieldType.BINARY: bytes,
72 }
73 return type_map.get(field_type, object)
75 def _type_name(self, target_type: Union[Type, FieldType]) -> str:
76 """Get readable name for type."""
77 if isinstance(target_type, FieldType):
78 return target_type.name
79 elif isinstance(target_type, type):
80 return target_type.__name__
81 elif isinstance(target_type, tuple):
82 return f"Union[{', '.join(t.__name__ for t in target_type)}]"
83 else:
84 return str(target_type)
86 def _coerce_value(self, value: Any, target_type: Type) -> Any:
87 """
88 Perform the actual coercion.
90 Args:
91 value: Value to coerce
92 target_type: Target Python type
94 Returns:
95 Coerced value
97 Raises:
98 Exception: If coercion fails
99 """
100 # Handle union types (like dict|list for JSON)
101 if isinstance(target_type, tuple):
102 for t in target_type:
103 try:
104 return self._coerce_value(value, t)
105 except:
106 continue
107 raise ValueError(f"Could not coerce to any of {target_type}")
109 # String coercion
110 if target_type == str:
111 return str(value)
113 # Integer coercion
114 elif target_type == int:
115 if isinstance(value, str):
116 # Remove whitespace and handle common formats
117 value = value.strip()
118 if value.lower() in ('true', 'false'):
119 return 1 if value.lower() == 'true' else 0
120 # Handle hex, octal, binary
121 if value.startswith('0x') or value.startswith('0X'):
122 return int(value, 16)
123 elif value.startswith('0o') or value.startswith('0O'):
124 return int(value, 8)
125 elif value.startswith('0b') or value.startswith('0B'):
126 return int(value, 2)
127 return int(value)
128 elif isinstance(value, float):
129 # Check for data loss
130 if value != int(value):
131 raise ValueError(f"Float {value} cannot be losslessly converted to int")
132 return int(value)
133 elif isinstance(value, bool):
134 return 1 if value else 0
135 else:
136 return int(value)
138 # Float coercion
139 elif target_type == float:
140 if isinstance(value, str):
141 value = value.strip()
142 if value.lower() in ('true', 'false'):
143 return 1.0 if value.lower() == 'true' else 0.0
144 return float(value)
145 elif isinstance(value, bool):
146 return 1.0 if value else 0.0
147 else:
148 return float(value)
150 # Boolean coercion
151 elif target_type == bool:
152 if isinstance(value, str):
153 value = value.strip().lower()
154 if value in ('true', '1', 'yes', 'y', 'on'):
155 return True
156 elif value in ('false', '0', 'no', 'n', 'off'):
157 return False
158 else:
159 raise ValueError(f"String '{value}' is not a valid boolean")
160 elif isinstance(value, (int, float)):
161 return bool(value)
162 else:
163 return bool(value)
165 # DateTime coercion
166 elif target_type == datetime:
167 if isinstance(value, str):
168 # Try common datetime formats
169 formats = [
170 '%Y-%m-%d %H:%M:%S',
171 '%Y-%m-%dT%H:%M:%S',
172 '%Y-%m-%dT%H:%M:%SZ',
173 '%Y-%m-%dT%H:%M:%S.%f',
174 '%Y-%m-%dT%H:%M:%S.%fZ',
175 '%Y/%m/%d %H:%M:%S',
176 '%Y/%m/%d',
177 '%Y-%m-%d',
178 '%d/%m/%Y',
179 '%d-%m-%Y',
180 '%m/%d/%Y',
181 '%m-%d-%Y',
182 ]
184 for fmt in formats:
185 try:
186 return datetime.strptime(value, fmt)
187 except ValueError:
188 continue
190 # Try parsing as ISO format
191 try:
192 return datetime.fromisoformat(value.replace('Z', '+00:00'))
193 except:
194 pass
196 raise ValueError(f"Could not parse datetime from '{value}'")
197 elif isinstance(value, (int, float)):
198 # Assume Unix timestamp
199 return datetime.fromtimestamp(value)
200 else:
201 raise ValueError(f"Cannot coerce {type(value).__name__} to datetime")
203 # Dict coercion (for JSON type)
204 elif target_type == dict:
205 if isinstance(value, str):
206 return json.loads(value)
207 elif hasattr(value, '__dict__'):
208 return vars(value)
209 elif isinstance(value, (list, tuple)):
210 # Try to convert list of pairs to dict
211 if all(isinstance(item, (list, tuple)) and len(item) == 2 for item in value):
212 return dict(value)
213 raise ValueError("Cannot convert list to dict")
214 else:
215 return dict(value)
217 # List coercion (for JSON type)
218 elif target_type == list:
219 if isinstance(value, str):
220 # Try parsing as JSON
221 try:
222 result = json.loads(value)
223 if not isinstance(result, list):
224 return [result]
225 return result
226 except:
227 # Split comma-separated values
228 if ',' in value:
229 return [v.strip() for v in value.split(',')]
230 return [value]
231 elif isinstance(value, dict):
232 # Convert dict to list of key-value pairs
233 return list(value.items())
234 elif hasattr(value, '__iter__') and not isinstance(value, (str, bytes)):
235 return list(value)
236 else:
237 return [value]
239 # Bytes coercion
240 elif target_type == bytes:
241 if isinstance(value, str):
242 return value.encode('utf-8')
243 elif isinstance(value, (list, tuple)):
244 # Assume list of integers
245 return bytes(value)
246 elif isinstance(value, int):
247 return bytes([value])
248 else:
249 return bytes(value)
251 # Unknown type - attempt direct conversion
252 else:
253 return target_type(value)
255 def coerce_many(
256 self,
257 values: dict[str, Any],
258 types: dict[str, Union[Type, FieldType]]
259 ) -> dict[str, ValidationResult]:
260 """
261 Coerce multiple values.
263 Args:
264 values: Dictionary of field names to values
265 types: Dictionary of field names to target types
267 Returns:
268 Dictionary of field names to ValidationResults
269 """
270 results = {}
271 for field_name, value in values.items():
272 if field_name in types:
273 results[field_name] = self.coerce(value, types[field_name])
274 else:
275 # No type specified, pass through
276 results[field_name] = ValidationResult.success(value)
277 return results