Coverage for src/dataknobs_data/validation/coercer.py: 9%
137 statements
« prev ^ index » next coverage.py v7.11.3, created at 2025-11-13 11:23 -0700
« prev ^ index » next coverage.py v7.11.3, created at 2025-11-13 11:23 -0700
1"""Type coercion with predictable, consistent behavior.
2"""
4from __future__ import annotations
6import json
7from datetime import datetime
8from typing import Any
10from dataknobs_data.fields import FieldType
12from .result import ValidationResult
15class Coercer:
16 """Type coercion with predictable results.
18 Always returns ValidationResult, never raises exceptions.
19 Provides clear error messages when coercion fails.
20 """
22 def coerce(
23 self,
24 value: Any,
25 target_type: type | FieldType
26 ) -> ValidationResult:
27 """Coerce a value to the target type.
29 Args:
30 value: Value to coerce
31 target_type: Target type (Python type or FieldType enum)
33 Returns:
34 ValidationResult with coerced value or error
35 """
36 # Handle None values
37 if value is None:
38 return ValidationResult.failure(
39 None,
40 [f"Cannot coerce None to {self._type_name(target_type)}"]
41 )
43 # Convert FieldType to Python type
44 if isinstance(target_type, FieldType):
45 target_type = self._field_type_to_python(target_type)
47 # If already correct type, return as-is
48 if isinstance(value, target_type):
49 return ValidationResult.success(value)
51 # Attempt coercion
52 try:
53 coerced = self._coerce_value(value, target_type)
54 return ValidationResult.success(coerced)
55 except Exception as e:
56 return ValidationResult.failure(
57 value,
58 [f"Cannot coerce {type(value).__name__} to {self._type_name(target_type)}: {e!s}"]
59 )
61 def _field_type_to_python(self, field_type: FieldType) -> type:
62 """Convert FieldType enum to Python type."""
63 type_map: dict[FieldType, type] = {
64 FieldType.STRING: str,
65 FieldType.INTEGER: int,
66 FieldType.FLOAT: float,
67 FieldType.BOOLEAN: bool,
68 FieldType.DATETIME: datetime,
69 FieldType.JSON: dict, # Using dict as primary type for JSON
70 FieldType.BINARY: bytes,
71 }
72 return type_map.get(field_type, object)
74 def _type_name(self, target_type: type | FieldType | tuple[type, ...]) -> str:
75 """Get readable name for type."""
76 if isinstance(target_type, FieldType):
77 return target_type.name
78 elif isinstance(target_type, tuple):
79 # Union type represented as tuple
80 return f"Union[{', '.join(t.__name__ if hasattr(t, '__name__') else str(t) for t in target_type)}]"
81 elif isinstance(target_type, type):
82 return target_type.__name__
83 # Fallback for unknown types (for runtime safety)
84 return str(target_type) # type: ignore[unreachable]
86 def _coerce_value(self, value: Any, target_type: type) -> Any:
87 """Perform the actual coercion.
89 Args:
90 value: Value to coerce
91 target_type: Target Python type
93 Returns:
94 Coerced value
96 Raises:
97 Exception: If coercion fails
98 """
99 # Handle union types (like dict|list for JSON)
100 if isinstance(target_type, tuple):
101 for t in target_type:
102 try:
103 return self._coerce_value(value, t)
104 except (ValueError, TypeError):
105 continue
106 raise ValueError(f"Could not coerce to any of {target_type}")
108 # String coercion
109 if target_type == str:
110 return str(value)
112 # Integer coercion
113 elif target_type == int:
114 if isinstance(value, str):
115 # Remove whitespace and handle common formats
116 value = value.strip()
117 if value.lower() in ('true', 'false'):
118 return 1 if value.lower() == 'true' else 0
119 # Handle hex, octal, binary
120 if value.startswith('0x') or value.startswith('0X'):
121 return int(value, 16)
122 elif value.startswith('0o') or value.startswith('0O'):
123 return int(value, 8)
124 elif value.startswith('0b') or value.startswith('0B'):
125 return int(value, 2)
126 return int(value)
127 elif isinstance(value, float):
128 # Check for data loss
129 if value != int(value):
130 raise ValueError(f"Float {value} cannot be losslessly converted to int")
131 return int(value)
132 elif isinstance(value, bool):
133 return 1 if value else 0
134 else:
135 return int(value)
137 # Float coercion
138 elif target_type == float:
139 if isinstance(value, str):
140 value = value.strip()
141 if value.lower() in ('true', 'false'):
142 return 1.0 if value.lower() == 'true' else 0.0
143 return float(value)
144 elif isinstance(value, bool):
145 return 1.0 if value else 0.0
146 else:
147 return float(value)
149 # Boolean coercion
150 elif target_type == bool:
151 if isinstance(value, str):
152 value = value.strip().lower()
153 if value in ('true', '1', 'yes', 'y', 'on'):
154 return True
155 elif value in ('false', '0', 'no', 'n', 'off'):
156 return False
157 else:
158 raise ValueError(f"String '{value}' is not a valid boolean")
159 elif isinstance(value, (int, float)):
160 return bool(value)
161 else:
162 return bool(value)
164 # DateTime coercion
165 elif target_type == datetime:
166 if isinstance(value, str):
167 # Try common datetime formats
168 formats = [
169 '%Y-%m-%d %H:%M:%S',
170 '%Y-%m-%dT%H:%M:%S',
171 '%Y-%m-%dT%H:%M:%SZ',
172 '%Y-%m-%dT%H:%M:%S.%f',
173 '%Y-%m-%dT%H:%M:%S.%fZ',
174 '%Y/%m/%d %H:%M:%S',
175 '%Y/%m/%d',
176 '%Y-%m-%d',
177 '%d/%m/%Y',
178 '%d-%m-%Y',
179 '%m/%d/%Y',
180 '%m-%d-%Y',
181 ]
183 for fmt in formats:
184 try:
185 return datetime.strptime(value, fmt)
186 except ValueError:
187 continue
189 # Try parsing as ISO format
190 try:
191 return datetime.fromisoformat(value.replace('Z', '+00:00'))
192 except (ValueError, AttributeError):
193 pass
195 raise ValueError(f"Could not parse datetime from '{value}'")
196 elif isinstance(value, (int, float)):
197 # Assume Unix timestamp
198 return datetime.fromtimestamp(value)
199 else:
200 raise ValueError(f"Cannot coerce {type(value).__name__} to datetime")
202 # Dict coercion (for JSON type)
203 elif target_type == dict:
204 if isinstance(value, str):
205 return json.loads(value)
206 elif hasattr(value, '__dict__'):
207 return vars(value)
208 elif isinstance(value, (list, tuple)):
209 # Try to convert list of pairs to dict
210 if all(isinstance(item, (list, tuple)) and len(item) == 2 for item in value):
211 return dict(value)
212 raise ValueError("Cannot convert list to dict")
213 else:
214 return dict(value)
216 # List coercion (for JSON type)
217 elif target_type == list:
218 if isinstance(value, str):
219 # Try parsing as JSON
220 try:
221 result = json.loads(value)
222 if not isinstance(result, list):
223 return [result]
224 return result
225 except (json.JSONDecodeError, TypeError):
226 # Split comma-separated values
227 if ',' in value:
228 return [v.strip() for v in value.split(',')]
229 return [value]
230 elif isinstance(value, dict):
231 # Convert dict to list of key-value pairs
232 return list(value.items())
233 elif hasattr(value, '__iter__') and not isinstance(value, (str, bytes)):
234 return list(value)
235 else:
236 return [value]
238 # Bytes coercion
239 elif target_type == bytes:
240 if isinstance(value, str):
241 return value.encode('utf-8')
242 elif isinstance(value, (list, tuple)):
243 # Assume list of integers
244 return bytes(value)
245 elif isinstance(value, int):
246 return bytes([value])
247 else:
248 return bytes(value)
250 # Unknown type - attempt direct conversion
251 else:
252 return target_type(value)
254 def coerce_many(
255 self,
256 values: dict[str, Any],
257 types: dict[str, type | FieldType]
258 ) -> dict[str, ValidationResult]:
259 """Coerce multiple values.
261 Args:
262 values: Dictionary of field names to values
263 types: Dictionary of field names to target types
265 Returns:
266 Dictionary of field names to ValidationResults
267 """
268 results = {}
269 for field_name, value in values.items():
270 if field_name in types:
271 results[field_name] = self.coerce(value, types[field_name])
272 else:
273 # No type specified, pass through
274 results[field_name] = ValidationResult.success(value)
275 return results