Edit on GitHub

parsetypes

This package provides tools for parsing serialised data to recover their original underlying types.

The TypeParser class provides configurable type inference and parsing. This can be initialised with different settings to, for example:

  • treat inf as either a float or a normal string
  • give exact Decimal values instead of floats
  • detect inline lists
 1"""
 2	This package provides tools for parsing serialised data to recover their original underlying types.
 3
 4	The `TypeParser` class provides configurable type inference and parsing. This can be initialised with different settings to, for example:
 5	- treat `inf` as either a float or a normal string
 6	- give exact Decimal values instead of floats
 7	- detect inline lists
 8"""
 9
10
11__version__ = "0.3"
12
13from ._common import AnyScalar, AnyScalarType, AnyValue, AnyValueType, GenericValue, Nullable
14from ._parser import TypeParser
15from ._reduce_types import reduce_types
16
17__all__ = ('TypeParser', 'reduce_types', 'Nullable')
class TypeParser:
  60class TypeParser:
  61	"""
  62		A parser that can be used to infer the underlying types of data serialised as strings, and to convert them into their original underlying types.
  63
  64		The behaviour of the parser and the type inference can be configured either in the constructor or using mutable properties of a parser instance. See the constructor documentation for the list of available options.
  65	"""
  66
  67	def __init__(self,
  68		*,
  69		trim: bool=True,
  70		use_decimal: bool=False,
  71		list_delimiter: Optional[str]=None,
  72		none_values: Iterable[str]=[""],
  73		none_case_sensitive: bool=False,
  74		true_values: Iterable[str]=["true"],
  75		false_values: Iterable[str]=["false"],
  76		bool_case_sensitive: bool=False,
  77		int_case_sensitive: bool=False,
  78		inf_values: Iterable[str]=[],
  79		nan_values: Iterable[str]=[],
  80		float_case_sensitive: bool=False,
  81		case_sensitive: Optional[bool]=None,
  82	):
  83		"""
  84			Initialise a new parser
  85
  86			The behaviour of the parser and the type inference can be configured either in the constructor or using mutable properties of a parser instance. For example,
  87
  88			```python
  89			parser = TypeParser(list_delimiter=",")
  90			assert parser.list_delimiter == ","
  91			parser.list_delimiter = ";"
  92			assert parser.list_delimiter == ";"
  93			```
  94
  95			Keyword arguments
  96			-----------------
  97			`trim`
  98			: whether leading and trailing whitespace should be stripped from strings
  99
 100			`use_decimal`
 101			: whether non-integer numeric values should be inferred to be Decimal (exact values) instead of float (non-exact values). Note that this only applies to methods that attempt to infer the type (`infer()` `infer_series()`, `infer_table()`), and does not affect methods where the type is explicitly specified (`is_float()`, `is_decimal()`, `parse_float()`, `parse_decimal()`).
 102
 103			`list_delimiter`
 104			: the delimiter used for identifying lists and for separating list items. If set to None, the parser will not attempt to identify lists when inferring types, which usually causes the value to be treated as a str instead. Note that this setting is unaffected by <code><var>parser</var>.trim</code> and <code><var>parser</var>.case_sensitive</code>, and will always be used verbatim.
 105
 106			`none_values`
 107			: list of strings that represent the value `None`
 108
 109			`none_case_sensitive`
 110			: whether matches against `none_values` should be made in a case-sensitive manner
 111
 112			`true_values`
 113			: list of strings that represent the bool value `True`
 114
 115			`false_values`
 116			: list of strings that represent the bool value `False`
 117
 118			`bool_case_sensitive`
 119			: whether matches against `true_values` and `false_values` should be made in a case-sensitive manner
 120
 121			`int_case_sensitive`
 122			: whether checks for int should be done in a case-sensitive manner. This only applies to values given in scientific notation, where the mantissa and exponent usually are separated by `e`.
 123
 124			`inf_values`
 125			: list of strings that represent the float or Decimal value of infinity. Each of the strings can also be prepended with a negative sign to represent negative infinity.
 126
 127			`nan_values`
 128			: list of strings that represent a float or Decimal that is NaN (not a number)
 129
 130			`float_case_sensitive`
 131			: whether checks for float or Decimal should be done in a case-sensitive manner. This applies to matches against `inf_values` and `nan_values`, as well as to values given in scientific notation, where the mantissa and exponent are usually separated by `e`.
 132
 133			`case_sensitive`
 134			: whether all matches should be made in a case-sensitive manner. Sets all of `none_case_sensitive`, `bool_case_sensitive`, `int_case_sensitive`, `float_case_sensitive` to the same value, discarding any individual settings.
 135
 136			Raises
 137			------
 138			`ValueError` if any of the options would lead to ambiguities during parsing
 139		"""
 140
 141		self._trim: bool = False
 142		self._use_decimal: bool = False
 143		self._list_delimiter: Union[str, None] = None
 144		self._match_none_values: set[str] = set()
 145		self._original_none_values: set[str] = set()
 146		self._none_case_sensitive: bool = False
 147		self._match_true_values: set[str] = set()
 148		self._original_true_values: set[str] = set()
 149		self._match_false_values: set[str] = set()
 150		self._original_false_values: set[str] = set()
 151		self._bool_case_sensitive: bool = False
 152		self._int_case_sensitive: bool = False
 153		self._match_inf_values: set[str] = set()
 154		self._original_inf_values: set[str] = set()
 155		self._match_nan_values: set[str] = set()
 156		self._original_nan_values: set[str] = set()
 157		self._float_case_sensitive: bool = False
 158
 159		# Unconfigurable default values
 160		self._negative_char = "-"
 161		self._negative_chars = {self._negative_char, "−"}
 162		self._sign_chars = self._negative_chars | {"+"}
 163		self._digit_chars = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}  # Because isdigit("²") == True, but int("²") is invalid
 164		self._digit_separators = {"_"}
 165		self._scientific_char = "e"
 166		self._float_separator = "."
 167		self._reserved_chars = self._sign_chars | self._digit_chars | self._digit_separators | {self._scientific_char} | {self._float_separator}
 168		# special_chars = self._reserved_chars | self._list_delimiter
 169
 170		# Configured values
 171
 172		self.trim = trim
 173		self.use_decimal = use_decimal
 174		self.list_delimiter = list_delimiter
 175
 176		self.none_case_sensitive = none_case_sensitive
 177		self.bool_case_sensitive = bool_case_sensitive
 178		self.int_case_sensitive = int_case_sensitive
 179		self.float_case_sensitive = float_case_sensitive
 180		self.case_sensitive = case_sensitive
 181
 182		self.none_values = none_values
 183
 184		self.true_values = true_values
 185		self.false_values = false_values
 186
 187		self.inf_values = inf_values
 188		self.nan_values = nan_values
 189
 190		# Check if any special values conflict
 191		for name, special_values in [
 192			(_SpecialValue.LIST, [self._list_delimiter] if self._list_delimiter is not None else []),
 193			(_SpecialValue.NONE, self._match_none_values),
 194			(_SpecialValue.TRUE, self._match_true_values),
 195			(_SpecialValue.FALSE, self._match_false_values),
 196			(_SpecialValue.INF, self._match_inf_values),
 197			(_SpecialValue.NAN, self._match_nan_values),
 198		]:
 199			for special_value in special_values:
 200				self._validate_special(name, special_value)
 201
 202
 203	def _validate_special(self, name: _SpecialValue, value: str):
 204		if value in self._reserved_chars:
 205			raise ValueError(f"cannot use reserved char as {name.value}: {value}")
 206
 207		if name != _SpecialValue.NONE and self.is_none(value):
 208			raise ValueError(f"cannot use None value as {name.value}: {value}")
 209
 210		if (
 211			(name == _SpecialValue.TRUE and self.parse_bool(value) != True) or
 212			(name == _SpecialValue.FALSE and self.parse_bool(value) != False) or
 213			(name != _SpecialValue.TRUE and name != _SpecialValue.FALSE and self.is_bool(value))
 214		):
 215			raise ValueError(f"cannot use bool value as {name.value}: {value}")
 216
 217		if self.is_int(value):
 218			raise ValueError(f"cannot use int value as {name.value}: {value}")
 219
 220		if self._use_decimal:
 221			if (
 222				(name == _SpecialValue.INF and self.parse_decimal(value) != Decimal(math.inf)) or
 223				(name == _SpecialValue.NAN and not self.parse_decimal(value).is_nan()) or
 224				(name != _SpecialValue.INF and name != _SpecialValue.NAN and self.is_float(value))
 225			):
 226				raise ValueError(f"cannot use Decimal value as {name}: {value}")
 227		else:
 228			if (
 229				(name == _SpecialValue.INF and self.parse_float(value) != math.inf) or
 230				(name == _SpecialValue.NAN and self.parse_float(value) is not math.nan) or
 231				(name != _SpecialValue.INF and name != _SpecialValue.NAN and self.is_float(value))
 232			):
 233				raise ValueError(f"cannot use float value as {name}: {value}")
 234
 235
 236	@property
 237	def trim(self) -> bool:
 238		return self._trim
 239
 240	@trim.setter
 241	def trim(self, value: bool):
 242		if type(value) != bool:
 243			raise TypeError(f"trim must be a bool: {value}")
 244		if value != self._trim:
 245			self._trim = value
 246			self.none_values = self._original_none_values
 247			self.true_values = self._original_true_values
 248			self.false_values = self._original_false_values
 249			self.inf_values = self._original_inf_values
 250			self.nan_values = self._original_nan_values
 251
 252
 253	@property
 254	def use_decimal(self) -> bool:
 255		return self._use_decimal
 256
 257	@use_decimal.setter
 258	def use_decimal(self, value: bool):
 259		if type(value) != bool:
 260			raise TypeError(f"use_decimal must be a bool: {value}")
 261		self._use_decimal = value
 262
 263
 264	@property
 265	def list_delimiter(self) -> Union[str, None]:
 266		return self._list_delimiter
 267
 268	@list_delimiter.setter
 269	def list_delimiter(self, value: Union[str, None]):
 270		if value is not None and type(value) != str:
 271			raise TypeError(f"list_delimiter must be a str or None: {value}")
 272		if value is not None:
 273			self._validate_special(_SpecialValue.LIST, value)
 274		self._list_delimiter = value
 275
 276
 277	@property
 278	def none_values(self) -> set[str]:
 279		if self._trim:
 280			return {value.strip() for value in self._original_none_values}
 281		else:
 282			return self._original_none_values
 283
 284	@none_values.setter
 285	def none_values(self, values: Iterable[str]):
 286		if not isinstance(values, Iterable):
 287			raise TypeError(f"none_values must be an Iterable: {values}")
 288		for i, value in enumerate(values):
 289			if type(value) != str:
 290				raise TypeError(f"each item in none_values must be a str: {value} at index {i}")
 291		self._original_none_values = set(values)
 292		if self._trim:
 293			values = (value.strip() for value in values)
 294		if not self._none_case_sensitive:
 295			values = (value.lower() for value in values)
 296		self._match_none_values = set(values)
 297
 298
 299	@property
 300	def none_case_sensitive(self) -> bool:
 301		return self._none_case_sensitive
 302
 303	@none_case_sensitive.setter
 304	def none_case_sensitive(self, value: bool):
 305		if type(value) != bool:
 306			raise TypeError(f"none_case_sensitive must be a bool: {value}")
 307		if value != self._none_case_sensitive:
 308			self._none_case_sensitive = value
 309			self.none_values = self._original_none_values
 310
 311
 312	@property
 313	def true_values(self) -> set[str]:
 314		if self._trim:
 315			return {value.strip() for value in self._original_true_values}
 316		else:
 317			return self._original_true_values
 318
 319	@true_values.setter
 320	def true_values(self, values: Iterable[str]):
 321		if not isinstance(values, Iterable):
 322			raise TypeError(f"true_values must be an Iterable: {values}")
 323		for i, value in enumerate(values):
 324			if type(value) != str:
 325				raise TypeError(f"each item in true_values must be a str: {value} at index {i}")
 326		self._original_true_values = set(values)
 327		if self._trim:
 328			values = (value.strip() for value in values)
 329		if not self._bool_case_sensitive:
 330			values = (value.lower() for value in values)
 331		self._match_true_values = set(values)
 332
 333
 334	@property
 335	def false_values(self) -> set[str]:
 336		if self._trim:
 337			return {value.strip() for value in self._original_false_values}
 338		else:
 339			return self._original_false_values
 340
 341	@false_values.setter
 342	def false_values(self, values: Iterable[str]):
 343		if not isinstance(values, Iterable):
 344			raise TypeError(f"false_values must be an Iterable: {values}")
 345		for i, value in enumerate(values):
 346			if type(value) != str:
 347				raise TypeError(f"each item in false_values must be a str: {value} at index {i}")
 348		self._original_false_values = set(values)
 349		if self._trim:
 350			values = (value.strip() for value in values)
 351		if not self._bool_case_sensitive:
 352			values = (value.lower() for value in values)
 353		self._match_false_values = set(values)
 354
 355
 356	@property
 357	def bool_case_sensitive(self) -> bool:
 358		return self._bool_case_sensitive
 359
 360	@bool_case_sensitive.setter
 361	def bool_case_sensitive(self, value: bool):
 362		if type(value) != bool:
 363			raise TypeError(f"bool_case_sensitive must be a bool: {value}")
 364		if value != self._bool_case_sensitive:
 365			self._bool_case_sensitive = value
 366			self.true_values = self._original_true_values
 367			self.false_values = self._original_false_values
 368
 369
 370	@property
 371	def int_case_sensitive(self) -> bool:
 372		return self._int_case_sensitive
 373
 374	@int_case_sensitive.setter
 375	def int_case_sensitive(self, value: bool):
 376		if type(value) != bool:
 377			raise TypeError(f"int_case_sensitive must be a bool: {value}")
 378		self._int_case_sensitive = value
 379
 380
 381	@property
 382	def inf_values(self) -> set[str]:
 383		if self._trim:
 384			return {value.strip() for value in self._original_inf_values}
 385		else:
 386			return self._original_inf_values
 387
 388	@inf_values.setter
 389	def inf_values(self, values: Iterable[str]):
 390		if not isinstance(values, Iterable):
 391			raise TypeError(f"inf_values must be an Iterable: {values}")
 392		for i, value in enumerate(values):
 393			if type(value) != str:
 394				raise TypeError(f"each item in inf_values must be a str: {value} at index {i}")
 395		self._original_inf_values = set(values)
 396		if self._trim:
 397			values = (value.strip() for value in values)
 398		if not self._float_case_sensitive:
 399			values = (value.lower() for value in values)
 400		self._match_inf_values = set(values)
 401
 402
 403	@property
 404	def nan_values(self) -> set[str]:
 405		values = self._original_nan_values
 406		if self._trim:
 407			return {value.strip() for value in self._original_nan_values}
 408		else:
 409			return self._original_nan_values
 410
 411	@nan_values.setter
 412	def nan_values(self, values: Iterable[str]):
 413		if not isinstance(values, Iterable):
 414			raise TypeError(f"nan_values must be an Iterable: {values}")
 415		for i, value in enumerate(values):
 416			if type(value) != str:
 417				raise TypeError(f"each item in nan_values must be a str: {value} at index {i}")
 418		self._original_nan_values = set(values)
 419		if self._trim:
 420			values = (value.strip() for value in values)
 421		if not self._float_case_sensitive:
 422			values = (value.lower() for value in values)
 423		self._match_nan_values = set(values)
 424
 425
 426	@property
 427	def float_case_sensitive(self) -> bool:
 428		return self._float_case_sensitive
 429
 430	@float_case_sensitive.setter
 431	def float_case_sensitive(self, value: bool):
 432		if type(value) != bool:
 433			raise TypeError(f"float_case_sensitive must be a bool: {value}")
 434		if value != self._float_case_sensitive:
 435			self._float_case_sensitive = value
 436			self.inf_values = self._original_inf_values
 437			self.nan_values = self._original_nan_values
 438
 439
 440	@property
 441	def case_sensitive(self) -> Union[bool, None]:
 442		if (
 443			self._none_case_sensitive == self._bool_case_sensitive and
 444			self._none_case_sensitive == self._int_case_sensitive and
 445			self._none_case_sensitive == self._float_case_sensitive
 446		):
 447			return self._none_case_sensitive
 448		else:
 449			return None
 450
 451	@case_sensitive.setter
 452	def case_sensitive(self, value: Union[bool, None]):
 453		if value is not None and type(value) != bool:
 454			raise TypeError(f"case_sensitive must be a bool or None: {value}")
 455		if value is not None:
 456			self.none_case_sensitive = value
 457			self.int_case_sensitive = value
 458			self.bool_case_sensitive = value
 459			self.float_case_sensitive = value
 460
 461
 462	def is_none(self, value: str) -> bool:
 463		"""
 464			Check if a string represents the value None
 465
 466			Only strings that match the values in <code><var>parser</var>.none_values</code> will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on <code><var>parser</var>.none_case_sensitive</code>, which is False by default.
 467
 468			Arguments
 469			---------
 470			`value`
 471			: string to be checked
 472
 473			Returns
 474			-------
 475			whether it is None
 476
 477			Examples
 478			--------
 479			```python
 480			parser = TypeParser()
 481			parser.is_none("")     # True
 482			parser.is_none("abc")  # False
 483			```
 484		"""
 485		if self._trim:
 486			value = value.strip()
 487		if not self._bool_case_sensitive:
 488			value = value.lower()
 489
 490		if value in self._match_none_values:
 491			return True
 492		else:
 493			return False
 494
 495
 496	def is_bool(self, value: str) -> bool:
 497		"""
 498			Check if a string represents a bool
 499
 500			Only strings that match the values in <code><var>parser</var>.true_values</code> and <code><var>parser</var>.false_values</code> will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on <code><var>parser</var>.bool_case_sensitive</code>, which is False by default.
 501
 502			Arguments
 503			---------
 504			`value`
 505			: string to be checked
 506
 507			Returns
 508			-------
 509			whether it is a bool
 510
 511			Examples
 512			--------
 513			```python
 514			parser = TypeParser()
 515			parser.is_bool("true")  # True
 516			parser.is_bool("")      # True
 517			parser.is_bool("abc")   # False
 518			```
 519		"""
 520		if self._trim:
 521			value = value.strip()
 522
 523		if not self._bool_case_sensitive:
 524			value = value.lower()
 525		if value in self._match_true_values:
 526			return True
 527		if value in self._match_false_values:
 528			return True
 529
 530		return False
 531
 532
 533	def is_int(self, value: str, *, allow_negative: bool=True, allow_sign: bool=True, allow_scientific: bool=True) -> bool:
 534		"""
 535			Check if a string represents an int
 536
 537			Arguments
 538			---------
 539			`value`
 540			: string to be checked
 541
 542			Keyword arguments
 543			-----------------
 544
 545			`allow_negative`
 546			: whether to accept negative values
 547
 548			`allow_sign`
 549			: whether to accept values prepended with a sign character. If False, it implies that `allow_negative` is False also.
 550
 551			`allow_scientific`
 552			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>M</var> must be an integer and <var>X</var> must be a non-negative integer, even in cases where the expression would evaluate mathematically to an integer.
 553
 554			Returns
 555			-------
 556			whether it is an int
 557
 558			Examples
 559			--------
 560			```python
 561			parser = TypeParser()
 562			parser.is_int("0")    # True
 563			parser.is_int("-1")   # True
 564			parser.is_int("abc")  # False
 565			parser.is_int("")     # False
 566			```
 567		"""
 568		if self._trim:
 569			value = value.strip()
 570
 571		if len(value) == 0:
 572			return False
 573
 574		if allow_scientific:
 575			value, exp = _decompose_string_pair(value, self._scientific_char, self._int_case_sensitive)
 576			if exp is not None:
 577				return self.is_int(
 578					value, allow_sign=True, allow_negative=allow_negative, allow_scientific=False
 579				) and self.is_int(
 580					exp, allow_sign=True, allow_negative=False, allow_scientific=False
 581				)
 582
 583		if value[0] in self._sign_chars:
 584			if len(value) == 1:
 585				return False
 586			if not allow_sign:
 587				return False
 588			if not allow_negative and value[0] in self._negative_chars:
 589				return False
 590			value = value[1:]
 591		if value[0] in self._digit_separators or value[-1] in self._digit_separators:
 592			return False
 593
 594		prev_separated = False
 595		for c in value:
 596			if c in self._digit_separators:
 597				if prev_separated:
 598					return False
 599				prev_separated = True
 600			else:
 601				prev_separated = False
 602				if c not in self._digit_chars:
 603					return False
 604		return True
 605
 606
 607	def is_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
 608		"""
 609			Check if a string represents a float (or equivalently, a Decimal)
 610
 611			This function will also return True if the string represents an int.
 612
 613			Alias: `is_decimal()`
 614
 615			Arguments
 616			---------
 617			`value`
 618			: string to be checked
 619
 620			Keyword arguments
 621			-----------------
 622
 623			`allow_scientific`
 624			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
 625
 626			`allow_inf`
 627			: whether to accept positive and negative infinity values. If True, strings that match the values in <code><var>parser</var>.inf_values</code> (empty set by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
 628
 629			`allow_nan`
 630			: whether to accept NaN (not a number) representations. If True, strings that match the values in <code><var>parser</var>.nan_values</code> (empty set by default) are interpeted as NaN. The case sensitivity of this matching also depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
 631
 632			Returns
 633			-------
 634			whether it is a float or Decimal
 635
 636			Examples
 637			--------
 638			```python
 639			parser = TypeParser()
 640			parser.is_float("1.")       # True
 641			parser.is_float("12.3e-2")  # True
 642			parser.is_float("abc")      # False
 643			parser.is_float("")         # False
 644			```
 645		"""
 646		if self._trim:
 647			value = value.strip()
 648
 649		if len(value) > 0 and value[0] in self._sign_chars:
 650			value = value[1:]
 651
 652		if self._float_case_sensitive:
 653			special_value = value
 654		else:
 655			special_value = value.lower()
 656		if allow_inf and special_value in self._match_inf_values:
 657			return True
 658		if allow_nan and special_value in self._match_nan_values:
 659			return True
 660
 661		if len(value) == 0:
 662			return False
 663
 664		if allow_scientific:
 665			value, exp = _decompose_string_pair(value, self._scientific_char, self._float_case_sensitive)
 666			if exp is not None:
 667				return self.is_float(value, allow_scientific=False, allow_inf=False, allow_nan=False) and self.is_int(exp, allow_sign=True, allow_negative=True, allow_scientific=False)
 668
 669		value, frac = _decompose_string_pair(value, self._float_separator, self._float_case_sensitive)
 670		if frac is not None:
 671			if value == "" and frac == "":
 672				return False
 673			return (
 674				self.is_int(value, allow_sign=True, allow_negative=False, allow_scientific=False) or value == ""
 675			) and (
 676				self.is_int(frac, allow_sign=False, allow_negative=False, allow_scientific=False) or frac == ""
 677			)
 678
 679		return self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=False)
 680
 681
 682	def is_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
 683		"""
 684			Alias of `is_float()`
 685		"""
 686		return self.is_float(value, allow_scientific=allow_scientific, allow_inf=allow_inf, allow_nan=allow_nan)
 687
 688
 689	def parse_none(self, value: str) -> None:
 690		"""
 691			Parse a string and return it as the value None if possible
 692
 693			Only strings that match the values in <code><var>parser</var>.none_values</code> will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on <code><var>parser</var>.none_case_sensitive</code>, which is False by default.
 694
 695			Arguments
 696			---------
 697			`value`
 698			: string to be parsed
 699
 700			Returns
 701			-------
 702			parsed None value
 703
 704			Raises
 705			------
 706			`ValueError` if `value` cannot be parsed
 707
 708			Examples
 709			--------
 710			```python
 711			parser = TypeParser()
 712			parser.parse_none("")     # None
 713			parser.parse_none("abc")  # raises ValueError
 714			```
 715		"""
 716		if self.is_none(value):
 717			return None
 718		else:
 719			raise ValueError(f"not a none value: {value}")
 720
 721
 722	def parse_bool(self, value: str) -> bool:
 723		"""
 724			Parse a string and return it as a bool if possible
 725
 726			Only strings that match the values in <code><var>parser</var>.true_values</code> and <code><var>parser</var>.false_values</code> will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on <code><var>parser</var>.bool_case_sensitive</code>, which is False by default.
 727
 728			Arguments
 729			---------
 730			`value`
 731			: string to be parsed
 732
 733			Returns
 734			-------
 735			parsed bool value
 736
 737			Raises
 738			------
 739			`ValueError` if `value` cannot be parsed
 740
 741			Examples
 742			--------
 743			```python
 744			parser = TypeParser()
 745			parser.parse_bool("true")   # True
 746			parser.parse_bool("FALSE")  # False
 747			```
 748		"""
 749		if self._trim:
 750			value = value.strip()
 751
 752		if self._bool_case_sensitive:
 753			special_value = value
 754		else:
 755			special_value = value.lower()
 756
 757		if special_value in self._match_true_values:
 758			return True
 759		if special_value in self._match_false_values:
 760			return False
 761
 762		raise ValueError(f"not a boolean: {value}")
 763
 764
 765	def parse_int(self, value: str, *, allow_scientific: bool=True) -> int:
 766		"""
 767			Parse a string and return it as an int if possible
 768
 769			If the string represents a bool, it will be converted to `1` for True and `0` for False.
 770
 771			Arguments
 772			---------
 773			`value`
 774			: string to be parsed
 775
 776			Keyword arguments
 777			-----------------
 778
 779			`allow_scientific`
 780			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>M</var> must be an integer and <var>X</var> must be a non-negative integer, even in cases where the expression would evaluate mathematically to an integer.
 781
 782			Returns
 783			-------
 784			parsed int value
 785
 786			Raises
 787			------
 788			`ValueError` if `value` cannot be parsed
 789
 790			Examples
 791			--------
 792			```python
 793			parser = TypeParser()
 794			parser.parse_int("0")    # 0
 795			parser.parse_int("-1")   # -1
 796			parser.parse_int("2e3")  # 2000
 797			```
 798		"""
 799		if self._trim:
 800			value = value.strip()
 801
 802		if self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=allow_scientific):
 803			if allow_scientific:
 804				value, exp = _decompose_string_pair(value, self._scientific_char, self._int_case_sensitive)
 805				if exp is not None:
 806					if value[0] in (self._negative_chars - {self._negative_char}):
 807						value = self._negative_char + value[1:]
 808					return int(value) * (10 ** int(exp))
 809
 810			if value[0] in (self._negative_chars - {self._negative_char}):
 811				value = self._negative_char + value[1:]
 812			return int(value)
 813
 814		elif self.is_bool(value):
 815			return int(self.parse_bool(value))
 816		else:
 817			raise ValueError(f"not an integer: {value}")
 818
 819
 820	def _parse_floatlike(self,
 821		value: str,
 822		converter: Callable[[Union[str, bool]], _FloatLike],
 823		inf_value: _FloatLike,
 824		nan_value: _FloatLike,
 825		*,
 826		allow_scientific: bool=True,
 827		allow_inf: bool=True,
 828		allow_nan: bool=True
 829	) -> _FloatLike:
 830		if self._trim:
 831			value = value.strip()
 832		if self.is_float(value, allow_scientific=allow_scientific, allow_inf=allow_inf, allow_nan=allow_nan):
 833			if self._float_case_sensitive:
 834				special_value = value
 835			else:
 836				special_value = value.lower()
 837			if allow_inf and special_value in self._match_inf_values:
 838				return inf_value
 839			if allow_nan and special_value in self._match_nan_values:
 840				return nan_value
 841
 842			if len(value) > 0 and value[0] in self._sign_chars:
 843				positive_part = value[1:]
 844				if self._float_case_sensitive:
 845					special_value = positive_part
 846				else:
 847					special_value = positive_part.lower()
 848				if allow_inf and special_value in self._match_inf_values:
 849					if value[0] in self._negative_chars:
 850						return -1 * inf_value
 851					else:
 852						return inf_value
 853				if allow_nan and special_value in self._match_nan_values:
 854					return nan_value
 855
 856				if value[0] in self._negative_chars:
 857					value = self._negative_char + positive_part
 858			return converter(value)
 859		elif self.is_bool(value):
 860			return converter(self.parse_bool(value))
 861		else:
 862			raise ValueError(f"not a {_FloatLike.__name__}: {value}")
 863
 864
 865	def parse_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> float:
 866		"""
 867			Parse a string and return it as a (non-exact) float if possible
 868
 869			If the string represents a bool, it will be converted to `1.` for True and `0.` for False. If the string represents an int, it will be converted to a float also.
 870
 871			Behaves analogously to `parse_decimal()`, except that that returns an exact Decimal instead.
 872
 873			Arguments
 874			---------
 875			`value`
 876			: string to be parsed
 877
 878			Keyword arguments
 879			-----------------
 880
 881			`allow_scientific`
 882			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
 883
 884			`allow_inf`
 885			: whether to accept positive and negative infinity values. If True, strings that match the values in <code><var>parser</var>.inf_values</code> (empty set by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
 886
 887			`allow_nan`
 888			: whether to accept NaN (not a number) representations. If True, strings that match the values in <code><var>parser</var>.nan_values</code> (empty set by default) are interpeted as NaN. The case sensitivity of this matching also depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
 889
 890			Returns
 891			-------
 892			parsed float value
 893
 894			Raises
 895			------
 896			`ValueError` if `value` cannot be parsed
 897
 898			Examples
 899			--------
 900			```python
 901			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
 902			parser.parse_float("1.")       # 1.
 903			parser.parse_float("1.23e2")   # 123.
 904			parser.parse_float("1.23e-2")  # 0.0123
 905			parser.parse_float("inf")      # math.inf
 906			```
 907		"""
 908		return self._parse_floatlike(value, float, math.inf, math.nan,
 909			allow_scientific=allow_scientific,
 910			allow_inf=allow_inf,
 911			allow_nan=allow_nan,
 912		)
 913
 914
 915	def parse_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> Decimal:
 916		"""
 917			Parse a string and return it as an exact Decimal if possible
 918
 919			If the string represents a bool, it will be converted to `Decimal(1)` for True and `Decimal(0)` for False. If the string represents an int, it will be converted to a Decimal also.
 920
 921			Behaves analogously to `parse_float()`, except that that returns a non-exact float instead.
 922
 923			Arguments
 924			---------
 925			`value`
 926			: string to be parsed
 927
 928			Keyword arguments
 929			-----------------
 930
 931			`allow_scientific`
 932			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
 933
 934			`allow_inf`
 935			: whether to accept positive and negative infinity values. If True, strings that match the values in <code><var>parser</var>.inf_values</code> (empty set by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
 936
 937			`allow_nan`
 938			: whether to accept NaN (not a number) representations. If True, strings that match the values in <code><var>parser</var>.nan_values</code> (empty set by default) are interpeted as NaN. The case sensitivity of this matching also depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
 939
 940			Returns
 941			-------
 942			parsed Decimal value
 943
 944			Raises
 945			------
 946			`ValueError` if `value` cannot be parsed
 947
 948			Examples
 949			--------
 950			```python
 951			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
 952			parser.parse_decimal("1.")       # Decimal(1)
 953			parser.parse_decimal("1.23e2")   # Decimal(123)
 954			parser.parse_decimal("1.23e-2")  # Decimal(123) / Decimal(10000)
 955			parser.parse_decimal("inf")      # Decimal(math.inf)
 956			```
 957		"""
 958		return self._parse_floatlike(value, Decimal, Decimal(math.inf), Decimal(math.nan),
 959			allow_scientific=allow_scientific,
 960			allow_inf=allow_inf,
 961			allow_nan=allow_nan,
 962		)
 963
 964
 965	def infer(self, value: str) -> AnyValueType:
 966		"""
 967			Infer the underlying type of a string
 968
 969			Also check for inline lists if <code><var>parser</var>.list_delimiter</code> is not None.
 970
 971			Arguments
 972			---------
 973			`value`
 974			: the string for which the type should be inferred
 975
 976			Returns
 977			-------
 978			inferred type
 979
 980			Examples
 981			--------
 982			```python
 983			parser = TypeParser()
 984			parser.infer("true")  # bool
 985			parser.infer("2.0")   # float
 986			parser.infer("abc")   # str
 987			```
 988		"""
 989		if self.is_none(value):
 990			return NoneType
 991		if self.is_bool(value):
 992			return bool
 993		if self.is_int(value):
 994			return int
 995		if self.is_float(value):
 996			if self._use_decimal:
 997				return Decimal
 998			else:
 999				return float
1000
1001		if self._trim:
1002			value = value.strip()
1003
1004		if self._list_delimiter is not None and self._list_delimiter in value:
1005			subvalues = value.split(self._list_delimiter)
1006			if self._trim:
1007				subvalues = [subvalue.strip() for subvalue in subvalues]
1008			reduced_type = reduce_types(self.infer(subvalue) for subvalue in subvalues)
1009			r = list[reduced_type]
1010			return r
1011
1012		return GenericValue
1013
1014
1015	def infer_series(self, values: Iterable[str]) -> AnyValueType:
1016		"""
1017			Infer the underlying common type of a series of strings
1018
1019			If the values in the series do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
1020
1021			Arguments
1022			---------
1023			`values`
1024			: series of strings for which the type should be inferred
1025
1026			Returns
1027			-------
1028			inferred type
1029
1030			Examples
1031			--------
1032			```python
1033			parser = TypeParser()
1034			parser.infer_series(["1", "2", "3.4"])       # float
1035			parser.infer_series(["true", "false", "2"])  # int
1036			parser.infer_series(["1", "2.3", "abc"])     # str
1037			```
1038		"""
1039		return reduce_types(self.infer(value) for value in values)
1040
1041
1042	def infer_table(self, rows: Iterable[Sequence[str]]) -> list[AnyValueType]:
1043		"""
1044			Infer the underlying common type for each column of a table of strings
1045
1046			For each column, if the values do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
1047
1048			Note that the individual inferred types of every value in the table must be able to fit into memory.
1049
1050			Arguments
1051			---------
1052			`rows`
1053			: table of strings for which the types should be inferred, in row-major order
1054
1055			Returns
1056			-------
1057			inferred types
1058
1059			Examples
1060			--------
1061			```python
1062			parser = TypeParser()
1063			parser.infer_table([
1064				["1",   "true",  "1"],
1065				["2",   "false", "2.3"],
1066				["3.4", "2",     "abc"],
1067			])
1068			# [float, int, str]
1069			```
1070		"""
1071		rows_iter = iter(rows)
1072		first_row = next(rows_iter, None)
1073		if first_row is None:
1074			return []
1075
1076		num_cols = len(first_row)
1077		if num_cols == 0:
1078			return []
1079
1080		table = _TypeTable([[self.infer(value)] for value in first_row])
1081		for row in rows_iter:
1082			table.add_row([self.infer(value) for value in row])
1083
1084		return [reduce_types(col) for col in table.cols]
1085
1086
1087	def convert(self, value: str, target_type: AnyValueType) -> AnyValue:
1088		"""
1089			Convert a string to the specified target type if possible
1090
1091			Valid values for `target_type` include any return value from `infer()`, `infer_series()` and `infer_table()`. To infer and convert the string automatically, use `parse()`, `parse_series()` or `parse_table()` instead.
1092
1093			Arguments
1094			---------
1095			`value`
1096			: the string to be converted
1097
1098			`target_type`
1099			: type to which the value should be converted
1100
1101			Returns
1102			-------
1103			converted value
1104
1105			Raises
1106			-------
1107			`ValueError`
1108			: if `value` cannot be converted to `target_type`
1109
1110			`TypeError`
1111			: if `target_type` is not a valid type
1112
1113			Examples
1114			--------
1115			```python
1116			parser = TypeParser()
1117			parser.convert("true", bool)  # True
1118			parser.convert("2", int)      # 2
1119			parser.convert("2", float)    # 2.
1120			```
1121		"""
1122		base, type_args = _decompose_type(target_type)
1123		if base == NoneType:
1124			return self.parse_none(value)
1125		elif base == bool:
1126			return self.parse_bool(value)
1127		elif base == int:
1128			return self.parse_int(value)
1129		elif base == Decimal:
1130			return self.parse_decimal(value)
1131		elif base == float:
1132			return self.parse_float(value)
1133		elif base == str:
1134			return value
1135		elif base == Nullable:
1136			if self.is_none(value):
1137				return None
1138			else:
1139				if type_args is not None and len(type_args) == 1 and type_args[0] != str:
1140					inner_type = type_args[0]
1141					return self.convert(value, inner_type)
1142				else:
1143					return value
1144		elif base == list:
1145			subvalues = value.split(self._list_delimiter)
1146			if self._trim:
1147				subvalues = [subvalue.strip() for subvalue in subvalues]
1148			if type_args is not None and len(type_args) == 1 and type_args[0] != str:
1149				subtype = type_args[0]
1150				return [self.convert(subvalue, subtype) for subvalue in subvalues]
1151			else:
1152				return subvalues
1153		else:
1154			raise TypeError(f"cannot convert to type: {target_type}")
1155
1156
1157	def parse(self, value: str) -> AnyValue:
1158		"""
1159			Parse a string and convert it to its underlying type
1160
1161			Arguments
1162			---------
1163			`value`
1164			: the string to be parsed
1165
1166			Returns
1167			-------
1168			converted value
1169
1170			Examples
1171			--------
1172			```python
1173			parser = TypeParser()
1174			parser.parse("true")  # True
1175			parser.parse("2.0")   # 2.
1176			parser.parse("abc")   # "abc"
1177			```
1178		"""
1179		return self.convert(value, self.infer(value))
1180
1181
1182	def parse_series(self, values: Iterable[str]) -> list[AnyValue]:
1183		"""
1184			Parse a series of strings and convert them to their underlying common type
1185
1186			If the values in the series do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
1187
1188			Arguments
1189			---------
1190			`values`
1191			: series of strings to be parsed
1192
1193			Returns
1194			-------
1195			converted values
1196
1197			Examples
1198			--------
1199			```python
1200			parser = TypeParser()
1201			parser.parse_series(["1", "2", "3"])        # [1, 2, 3]
1202			parser.parse_series(["5", "6.7", "8."])     # [5., 6.7, 8.]
1203			parser.parse_series(["true", "false", ""])  # [True, False, None]
1204			parser.parse_series(["1", "2.3", "abc"])    # ["1", "2.3", "abc"]
1205			```
1206		"""
1207		inferred = self.infer_series(values)
1208		return [self.convert(value, inferred) for value in values]
1209
1210
1211	def parse_table(self, rows: Iterable[Sequence[str]]) -> list[list[AnyValue]]:
1212		"""
1213			Parse a table of strings and convert them to the underlying common type of each column
1214
1215			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
1216
1217			Note that the type to which the values should be converted is determined by `infer_table()`, and so the individual inferred types of every value in the table must be able to fit into memory.
1218
1219			This is a function that computes the entire table and returns it all at once. The generator function `iterate_table()` behaves analogously, except that it computes and yields each row one at a time instead.
1220
1221			Arguments
1222			---------
1223			`rows`
1224			: table of strings to be parsed, in row-major order
1225
1226			`iterator`
1227			: whether the parsed values should be yielded as an iterator. If False, which is the default, the entire table is computed and returned as a list of lists. If True, this function behaves as a generator, and the rows of the table are computed and yielded one at a time. However, note that even when set to True, the type inference requires that inferred type of each individual value must all be able to fit into memory at once.
1228
1229			Returns
1230			-------
1231			converted table of values, in row-major order
1232
1233			Examples
1234			--------
1235			```python
1236			parser = TypeParser()
1237			table = parser.parse_table([
1238				["1", "5",   "true",  "1"],
1239				["2", "6.7", "false", "2.3"],
1240				["3", "8.0", "",      "abc"],
1241			]):
1242			assert table == [
1243				[1, 5.,  True,  "1"],
1244				[2, 6.7, False, "2.3"],
1245				[3, 8.,  None,  "abc"],
1246			]
1247			```
1248		"""
1249		return [converted_row for converted_row in self.iterate_table(rows)]
1250
1251
1252	def iterate_table(self, rows: Iterable[Sequence[str]]) -> Iterator[list[AnyValue]]:
1253		"""
1254			Parse a table of strings for the underlying common type of each column, then convert and yield each row
1255
1256			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
1257
1258			This is a generator function that computes and yields each row one at a time. However, note that in order to determine the types to which each column should be converted, the individual inferred types of every value in the table must still be able to fit into memory.
1259
1260			The function `parse_table()` behaves analogously, except that it computes the entire table and returns it as a list of lists instead.
1261
1262			Arguments
1263			---------
1264			`rows`
1265			: table of strings to be parsed, in row-major order
1266
1267			Yields
1268			------
1269			each row of converted table values
1270
1271			Examples
1272			--------
1273			```python
1274			parser = TypeParser()
1275			table = parser.iterate_table([
1276				["1",   "true",  "1"],
1277				["2",   "false", "2.3"],
1278				["3.4", "2",     "abc"],
1279			]):
1280			assert next(table) == [1.,  1, "1"]
1281			assert next(table) == [2.,  0, "2.3"]
1282			assert next(table) == [3.4, 2, "abc"]
1283			```
1284		"""
1285		inferred_types = self.infer_table(rows)
1286
1287		for row in rows:
1288			yield [self.convert(value, inferred) for value, inferred in zip(row, inferred_types)]

A parser that can be used to infer the underlying types of data serialised as strings, and to convert them into their original underlying types.

The behaviour of the parser and the type inference can be configured either in the constructor or using mutable properties of a parser instance. See the constructor documentation for the list of available options.

TypeParser( *, trim: bool = True, use_decimal: bool = False, list_delimiter: Optional[str] = None, none_values: Iterable[str] = [''], none_case_sensitive: bool = False, true_values: Iterable[str] = ['true'], false_values: Iterable[str] = ['false'], bool_case_sensitive: bool = False, int_case_sensitive: bool = False, inf_values: Iterable[str] = [], nan_values: Iterable[str] = [], float_case_sensitive: bool = False, case_sensitive: Optional[bool] = None)
 67	def __init__(self,
 68		*,
 69		trim: bool=True,
 70		use_decimal: bool=False,
 71		list_delimiter: Optional[str]=None,
 72		none_values: Iterable[str]=[""],
 73		none_case_sensitive: bool=False,
 74		true_values: Iterable[str]=["true"],
 75		false_values: Iterable[str]=["false"],
 76		bool_case_sensitive: bool=False,
 77		int_case_sensitive: bool=False,
 78		inf_values: Iterable[str]=[],
 79		nan_values: Iterable[str]=[],
 80		float_case_sensitive: bool=False,
 81		case_sensitive: Optional[bool]=None,
 82	):
 83		"""
 84			Initialise a new parser
 85
 86			The behaviour of the parser and the type inference can be configured either in the constructor or using mutable properties of a parser instance. For example,
 87
 88			```python
 89			parser = TypeParser(list_delimiter=",")
 90			assert parser.list_delimiter == ","
 91			parser.list_delimiter = ";"
 92			assert parser.list_delimiter == ";"
 93			```
 94
 95			Keyword arguments
 96			-----------------
 97			`trim`
 98			: whether leading and trailing whitespace should be stripped from strings
 99
100			`use_decimal`
101			: whether non-integer numeric values should be inferred to be Decimal (exact values) instead of float (non-exact values). Note that this only applies to methods that attempt to infer the type (`infer()` `infer_series()`, `infer_table()`), and does not affect methods where the type is explicitly specified (`is_float()`, `is_decimal()`, `parse_float()`, `parse_decimal()`).
102
103			`list_delimiter`
104			: the delimiter used for identifying lists and for separating list items. If set to None, the parser will not attempt to identify lists when inferring types, which usually causes the value to be treated as a str instead. Note that this setting is unaffected by <code><var>parser</var>.trim</code> and <code><var>parser</var>.case_sensitive</code>, and will always be used verbatim.
105
106			`none_values`
107			: list of strings that represent the value `None`
108
109			`none_case_sensitive`
110			: whether matches against `none_values` should be made in a case-sensitive manner
111
112			`true_values`
113			: list of strings that represent the bool value `True`
114
115			`false_values`
116			: list of strings that represent the bool value `False`
117
118			`bool_case_sensitive`
119			: whether matches against `true_values` and `false_values` should be made in a case-sensitive manner
120
121			`int_case_sensitive`
122			: whether checks for int should be done in a case-sensitive manner. This only applies to values given in scientific notation, where the mantissa and exponent usually are separated by `e`.
123
124			`inf_values`
125			: list of strings that represent the float or Decimal value of infinity. Each of the strings can also be prepended with a negative sign to represent negative infinity.
126
127			`nan_values`
128			: list of strings that represent a float or Decimal that is NaN (not a number)
129
130			`float_case_sensitive`
131			: whether checks for float or Decimal should be done in a case-sensitive manner. This applies to matches against `inf_values` and `nan_values`, as well as to values given in scientific notation, where the mantissa and exponent are usually separated by `e`.
132
133			`case_sensitive`
134			: whether all matches should be made in a case-sensitive manner. Sets all of `none_case_sensitive`, `bool_case_sensitive`, `int_case_sensitive`, `float_case_sensitive` to the same value, discarding any individual settings.
135
136			Raises
137			------
138			`ValueError` if any of the options would lead to ambiguities during parsing
139		"""
140
141		self._trim: bool = False
142		self._use_decimal: bool = False
143		self._list_delimiter: Union[str, None] = None
144		self._match_none_values: set[str] = set()
145		self._original_none_values: set[str] = set()
146		self._none_case_sensitive: bool = False
147		self._match_true_values: set[str] = set()
148		self._original_true_values: set[str] = set()
149		self._match_false_values: set[str] = set()
150		self._original_false_values: set[str] = set()
151		self._bool_case_sensitive: bool = False
152		self._int_case_sensitive: bool = False
153		self._match_inf_values: set[str] = set()
154		self._original_inf_values: set[str] = set()
155		self._match_nan_values: set[str] = set()
156		self._original_nan_values: set[str] = set()
157		self._float_case_sensitive: bool = False
158
159		# Unconfigurable default values
160		self._negative_char = "-"
161		self._negative_chars = {self._negative_char, "−"}
162		self._sign_chars = self._negative_chars | {"+"}
163		self._digit_chars = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}  # Because isdigit("²") == True, but int("²") is invalid
164		self._digit_separators = {"_"}
165		self._scientific_char = "e"
166		self._float_separator = "."
167		self._reserved_chars = self._sign_chars | self._digit_chars | self._digit_separators | {self._scientific_char} | {self._float_separator}
168		# special_chars = self._reserved_chars | self._list_delimiter
169
170		# Configured values
171
172		self.trim = trim
173		self.use_decimal = use_decimal
174		self.list_delimiter = list_delimiter
175
176		self.none_case_sensitive = none_case_sensitive
177		self.bool_case_sensitive = bool_case_sensitive
178		self.int_case_sensitive = int_case_sensitive
179		self.float_case_sensitive = float_case_sensitive
180		self.case_sensitive = case_sensitive
181
182		self.none_values = none_values
183
184		self.true_values = true_values
185		self.false_values = false_values
186
187		self.inf_values = inf_values
188		self.nan_values = nan_values
189
190		# Check if any special values conflict
191		for name, special_values in [
192			(_SpecialValue.LIST, [self._list_delimiter] if self._list_delimiter is not None else []),
193			(_SpecialValue.NONE, self._match_none_values),
194			(_SpecialValue.TRUE, self._match_true_values),
195			(_SpecialValue.FALSE, self._match_false_values),
196			(_SpecialValue.INF, self._match_inf_values),
197			(_SpecialValue.NAN, self._match_nan_values),
198		]:
199			for special_value in special_values:
200				self._validate_special(name, special_value)

Initialise a new parser

The behaviour of the parser and the type inference can be configured either in the constructor or using mutable properties of a parser instance. For example,

parser = TypeParser(list_delimiter=",")
assert parser.list_delimiter == ","
parser.list_delimiter = ";"
assert parser.list_delimiter == ";"

Keyword arguments

trim : whether leading and trailing whitespace should be stripped from strings

use_decimal : whether non-integer numeric values should be inferred to be Decimal (exact values) instead of float (non-exact values). Note that this only applies to methods that attempt to infer the type (infer() infer_series(), infer_table()), and does not affect methods where the type is explicitly specified (is_float(), is_decimal(), parse_float(), parse_decimal()).

list_delimiter : the delimiter used for identifying lists and for separating list items. If set to None, the parser will not attempt to identify lists when inferring types, which usually causes the value to be treated as a str instead. Note that this setting is unaffected by parser.trim and parser.case_sensitive, and will always be used verbatim.

none_values : list of strings that represent the value None

none_case_sensitive : whether matches against none_values should be made in a case-sensitive manner

true_values : list of strings that represent the bool value True

false_values : list of strings that represent the bool value False

bool_case_sensitive : whether matches against true_values and false_values should be made in a case-sensitive manner

int_case_sensitive : whether checks for int should be done in a case-sensitive manner. This only applies to values given in scientific notation, where the mantissa and exponent usually are separated by e.

inf_values : list of strings that represent the float or Decimal value of infinity. Each of the strings can also be prepended with a negative sign to represent negative infinity.

nan_values : list of strings that represent a float or Decimal that is NaN (not a number)

float_case_sensitive : whether checks for float or Decimal should be done in a case-sensitive manner. This applies to matches against inf_values and nan_values, as well as to values given in scientific notation, where the mantissa and exponent are usually separated by e.

case_sensitive : whether all matches should be made in a case-sensitive manner. Sets all of none_case_sensitive, bool_case_sensitive, int_case_sensitive, float_case_sensitive to the same value, discarding any individual settings.

Raises

ValueError if any of the options would lead to ambiguities during parsing

def is_none(self, value: str) -> bool:
462	def is_none(self, value: str) -> bool:
463		"""
464			Check if a string represents the value None
465
466			Only strings that match the values in <code><var>parser</var>.none_values</code> will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on <code><var>parser</var>.none_case_sensitive</code>, which is False by default.
467
468			Arguments
469			---------
470			`value`
471			: string to be checked
472
473			Returns
474			-------
475			whether it is None
476
477			Examples
478			--------
479			```python
480			parser = TypeParser()
481			parser.is_none("")     # True
482			parser.is_none("abc")  # False
483			```
484		"""
485		if self._trim:
486			value = value.strip()
487		if not self._bool_case_sensitive:
488			value = value.lower()
489
490		if value in self._match_none_values:
491			return True
492		else:
493			return False

Check if a string represents the value None

Only strings that match the values in parser.none_values will be interpreted as None. The default accepted values are [""], i.e. an empty string. The case sensitivity of this matching depends on parser.none_case_sensitive, which is False by default.

Arguments

value : string to be checked

Returns

whether it is None

Examples

parser = TypeParser()
parser.is_none("")     # True
parser.is_none("abc")  # False
def is_bool(self, value: str) -> bool:
496	def is_bool(self, value: str) -> bool:
497		"""
498			Check if a string represents a bool
499
500			Only strings that match the values in <code><var>parser</var>.true_values</code> and <code><var>parser</var>.false_values</code> will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on <code><var>parser</var>.bool_case_sensitive</code>, which is False by default.
501
502			Arguments
503			---------
504			`value`
505			: string to be checked
506
507			Returns
508			-------
509			whether it is a bool
510
511			Examples
512			--------
513			```python
514			parser = TypeParser()
515			parser.is_bool("true")  # True
516			parser.is_bool("")      # True
517			parser.is_bool("abc")   # False
518			```
519		"""
520		if self._trim:
521			value = value.strip()
522
523		if not self._bool_case_sensitive:
524			value = value.lower()
525		if value in self._match_true_values:
526			return True
527		if value in self._match_false_values:
528			return True
529
530		return False

Check if a string represents a bool

Only strings that match the values in parser.true_values and parser.false_values will be interpreted as booleans. The default accepted values are ["true"] and ["false"] respectively. The case sensitivity of this matching depends on parser.bool_case_sensitive, which is False by default.

Arguments

value : string to be checked

Returns

whether it is a bool

Examples

parser = TypeParser()
parser.is_bool("true")  # True
parser.is_bool("")      # True
parser.is_bool("abc")   # False
def is_int( self, value: str, *, allow_negative: bool = True, allow_sign: bool = True, allow_scientific: bool = True) -> bool:
533	def is_int(self, value: str, *, allow_negative: bool=True, allow_sign: bool=True, allow_scientific: bool=True) -> bool:
534		"""
535			Check if a string represents an int
536
537			Arguments
538			---------
539			`value`
540			: string to be checked
541
542			Keyword arguments
543			-----------------
544
545			`allow_negative`
546			: whether to accept negative values
547
548			`allow_sign`
549			: whether to accept values prepended with a sign character. If False, it implies that `allow_negative` is False also.
550
551			`allow_scientific`
552			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>M</var> must be an integer and <var>X</var> must be a non-negative integer, even in cases where the expression would evaluate mathematically to an integer.
553
554			Returns
555			-------
556			whether it is an int
557
558			Examples
559			--------
560			```python
561			parser = TypeParser()
562			parser.is_int("0")    # True
563			parser.is_int("-1")   # True
564			parser.is_int("abc")  # False
565			parser.is_int("")     # False
566			```
567		"""
568		if self._trim:
569			value = value.strip()
570
571		if len(value) == 0:
572			return False
573
574		if allow_scientific:
575			value, exp = _decompose_string_pair(value, self._scientific_char, self._int_case_sensitive)
576			if exp is not None:
577				return self.is_int(
578					value, allow_sign=True, allow_negative=allow_negative, allow_scientific=False
579				) and self.is_int(
580					exp, allow_sign=True, allow_negative=False, allow_scientific=False
581				)
582
583		if value[0] in self._sign_chars:
584			if len(value) == 1:
585				return False
586			if not allow_sign:
587				return False
588			if not allow_negative and value[0] in self._negative_chars:
589				return False
590			value = value[1:]
591		if value[0] in self._digit_separators or value[-1] in self._digit_separators:
592			return False
593
594		prev_separated = False
595		for c in value:
596			if c in self._digit_separators:
597				if prev_separated:
598					return False
599				prev_separated = True
600			else:
601				prev_separated = False
602				if c not in self._digit_chars:
603					return False
604		return True

Check if a string represents an int

Arguments

value : string to be checked

Keyword arguments

allow_negative : whether to accept negative values

allow_sign : whether to accept values prepended with a sign character. If False, it implies that allow_negative is False also.

allow_scientific : whether to accept scientific notation. If True, strings of the form "MeX" will be interpreted as the expression M * (10 ** X), where M is the mantissa/significand and X is the exponent. Note that M must be an integer and X must be a non-negative integer, even in cases where the expression would evaluate mathematically to an integer.

Returns

whether it is an int

Examples

parser = TypeParser()
parser.is_int("0")    # True
parser.is_int("-1")   # True
parser.is_int("abc")  # False
parser.is_int("")     # False
def is_float( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> bool:
607	def is_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
608		"""
609			Check if a string represents a float (or equivalently, a Decimal)
610
611			This function will also return True if the string represents an int.
612
613			Alias: `is_decimal()`
614
615			Arguments
616			---------
617			`value`
618			: string to be checked
619
620			Keyword arguments
621			-----------------
622
623			`allow_scientific`
624			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
625
626			`allow_inf`
627			: whether to accept positive and negative infinity values. If True, strings that match the values in <code><var>parser</var>.inf_values</code> (empty set by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
628
629			`allow_nan`
630			: whether to accept NaN (not a number) representations. If True, strings that match the values in <code><var>parser</var>.nan_values</code> (empty set by default) are interpeted as NaN. The case sensitivity of this matching also depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
631
632			Returns
633			-------
634			whether it is a float or Decimal
635
636			Examples
637			--------
638			```python
639			parser = TypeParser()
640			parser.is_float("1.")       # True
641			parser.is_float("12.3e-2")  # True
642			parser.is_float("abc")      # False
643			parser.is_float("")         # False
644			```
645		"""
646		if self._trim:
647			value = value.strip()
648
649		if len(value) > 0 and value[0] in self._sign_chars:
650			value = value[1:]
651
652		if self._float_case_sensitive:
653			special_value = value
654		else:
655			special_value = value.lower()
656		if allow_inf and special_value in self._match_inf_values:
657			return True
658		if allow_nan and special_value in self._match_nan_values:
659			return True
660
661		if len(value) == 0:
662			return False
663
664		if allow_scientific:
665			value, exp = _decompose_string_pair(value, self._scientific_char, self._float_case_sensitive)
666			if exp is not None:
667				return self.is_float(value, allow_scientific=False, allow_inf=False, allow_nan=False) and self.is_int(exp, allow_sign=True, allow_negative=True, allow_scientific=False)
668
669		value, frac = _decompose_string_pair(value, self._float_separator, self._float_case_sensitive)
670		if frac is not None:
671			if value == "" and frac == "":
672				return False
673			return (
674				self.is_int(value, allow_sign=True, allow_negative=False, allow_scientific=False) or value == ""
675			) and (
676				self.is_int(frac, allow_sign=False, allow_negative=False, allow_scientific=False) or frac == ""
677			)
678
679		return self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=False)

Check if a string represents a float (or equivalently, a Decimal)

This function will also return True if the string represents an int.

Alias: is_decimal()

Arguments

value : string to be checked

Keyword arguments

allow_scientific : whether to accept scientific notation. If True, strings of the form "MeX" will be interpreted as the expression M * (10 ** X), where M is the mantissa/significand and X is the exponent. Note that X must be an integer, but can be negative.

allow_inf : whether to accept positive and negative infinity values. If True, strings that match the values in parser.inf_values (empty set by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on parser.float_case_sensitive, which is False by default.

allow_nan : whether to accept NaN (not a number) representations. If True, strings that match the values in parser.nan_values (empty set by default) are interpeted as NaN. The case sensitivity of this matching also depends on parser.float_case_sensitive, which is False by default.

Returns

whether it is a float or Decimal

Examples

parser = TypeParser()
parser.is_float("1.")       # True
parser.is_float("12.3e-2")  # True
parser.is_float("abc")      # False
parser.is_float("")         # False
def is_decimal( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> bool:
682	def is_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
683		"""
684			Alias of `is_float()`
685		"""
686		return self.is_float(value, allow_scientific=allow_scientific, allow_inf=allow_inf, allow_nan=allow_nan)

Alias of is_float()

def parse_none(self, value: str) -> None:
689	def parse_none(self, value: str) -> None:
690		"""
691			Parse a string and return it as the value None if possible
692
693			Only strings that match the values in <code><var>parser</var>.none_values</code> will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on <code><var>parser</var>.none_case_sensitive</code>, which is False by default.
694
695			Arguments
696			---------
697			`value`
698			: string to be parsed
699
700			Returns
701			-------
702			parsed None value
703
704			Raises
705			------
706			`ValueError` if `value` cannot be parsed
707
708			Examples
709			--------
710			```python
711			parser = TypeParser()
712			parser.parse_none("")     # None
713			parser.parse_none("abc")  # raises ValueError
714			```
715		"""
716		if self.is_none(value):
717			return None
718		else:
719			raise ValueError(f"not a none value: {value}")

Parse a string and return it as the value None if possible

Only strings that match the values in parser.none_values will be interpreted as None. The default accepted values are [""], i.e. an empty string. The case sensitivity of this matching depends on parser.none_case_sensitive, which is False by default.

Arguments

value : string to be parsed

Returns

parsed None value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser()
parser.parse_none("")     # None
parser.parse_none("abc")  # raises ValueError
def parse_bool(self, value: str) -> bool:
722	def parse_bool(self, value: str) -> bool:
723		"""
724			Parse a string and return it as a bool if possible
725
726			Only strings that match the values in <code><var>parser</var>.true_values</code> and <code><var>parser</var>.false_values</code> will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on <code><var>parser</var>.bool_case_sensitive</code>, which is False by default.
727
728			Arguments
729			---------
730			`value`
731			: string to be parsed
732
733			Returns
734			-------
735			parsed bool value
736
737			Raises
738			------
739			`ValueError` if `value` cannot be parsed
740
741			Examples
742			--------
743			```python
744			parser = TypeParser()
745			parser.parse_bool("true")   # True
746			parser.parse_bool("FALSE")  # False
747			```
748		"""
749		if self._trim:
750			value = value.strip()
751
752		if self._bool_case_sensitive:
753			special_value = value
754		else:
755			special_value = value.lower()
756
757		if special_value in self._match_true_values:
758			return True
759		if special_value in self._match_false_values:
760			return False
761
762		raise ValueError(f"not a boolean: {value}")

Parse a string and return it as a bool if possible

Only strings that match the values in parser.true_values and parser.false_values will be interpreted as booleans. The default accepted values are ["true"] and ["false"] respectively. The case sensitivity of this matching depends on parser.bool_case_sensitive, which is False by default.

Arguments

value : string to be parsed

Returns

parsed bool value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser()
parser.parse_bool("true")   # True
parser.parse_bool("FALSE")  # False
def parse_int(self, value: str, *, allow_scientific: bool = True) -> int:
765	def parse_int(self, value: str, *, allow_scientific: bool=True) -> int:
766		"""
767			Parse a string and return it as an int if possible
768
769			If the string represents a bool, it will be converted to `1` for True and `0` for False.
770
771			Arguments
772			---------
773			`value`
774			: string to be parsed
775
776			Keyword arguments
777			-----------------
778
779			`allow_scientific`
780			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>M</var> must be an integer and <var>X</var> must be a non-negative integer, even in cases where the expression would evaluate mathematically to an integer.
781
782			Returns
783			-------
784			parsed int value
785
786			Raises
787			------
788			`ValueError` if `value` cannot be parsed
789
790			Examples
791			--------
792			```python
793			parser = TypeParser()
794			parser.parse_int("0")    # 0
795			parser.parse_int("-1")   # -1
796			parser.parse_int("2e3")  # 2000
797			```
798		"""
799		if self._trim:
800			value = value.strip()
801
802		if self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=allow_scientific):
803			if allow_scientific:
804				value, exp = _decompose_string_pair(value, self._scientific_char, self._int_case_sensitive)
805				if exp is not None:
806					if value[0] in (self._negative_chars - {self._negative_char}):
807						value = self._negative_char + value[1:]
808					return int(value) * (10 ** int(exp))
809
810			if value[0] in (self._negative_chars - {self._negative_char}):
811				value = self._negative_char + value[1:]
812			return int(value)
813
814		elif self.is_bool(value):
815			return int(self.parse_bool(value))
816		else:
817			raise ValueError(f"not an integer: {value}")

Parse a string and return it as an int if possible

If the string represents a bool, it will be converted to 1 for True and 0 for False.

Arguments

value : string to be parsed

Keyword arguments

allow_scientific : whether to accept scientific notation. If True, strings of the form "MeX" will be interpreted as the expression M * (10 ** X), where M is the mantissa/significand and X is the exponent. Note that M must be an integer and X must be a non-negative integer, even in cases where the expression would evaluate mathematically to an integer.

Returns

parsed int value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser()
parser.parse_int("0")    # 0
parser.parse_int("-1")   # -1
parser.parse_int("2e3")  # 2000
def parse_float( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> float:
865	def parse_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> float:
866		"""
867			Parse a string and return it as a (non-exact) float if possible
868
869			If the string represents a bool, it will be converted to `1.` for True and `0.` for False. If the string represents an int, it will be converted to a float also.
870
871			Behaves analogously to `parse_decimal()`, except that that returns an exact Decimal instead.
872
873			Arguments
874			---------
875			`value`
876			: string to be parsed
877
878			Keyword arguments
879			-----------------
880
881			`allow_scientific`
882			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
883
884			`allow_inf`
885			: whether to accept positive and negative infinity values. If True, strings that match the values in <code><var>parser</var>.inf_values</code> (empty set by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
886
887			`allow_nan`
888			: whether to accept NaN (not a number) representations. If True, strings that match the values in <code><var>parser</var>.nan_values</code> (empty set by default) are interpeted as NaN. The case sensitivity of this matching also depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
889
890			Returns
891			-------
892			parsed float value
893
894			Raises
895			------
896			`ValueError` if `value` cannot be parsed
897
898			Examples
899			--------
900			```python
901			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
902			parser.parse_float("1.")       # 1.
903			parser.parse_float("1.23e2")   # 123.
904			parser.parse_float("1.23e-2")  # 0.0123
905			parser.parse_float("inf")      # math.inf
906			```
907		"""
908		return self._parse_floatlike(value, float, math.inf, math.nan,
909			allow_scientific=allow_scientific,
910			allow_inf=allow_inf,
911			allow_nan=allow_nan,
912		)

Parse a string and return it as a (non-exact) float if possible

If the string represents a bool, it will be converted to 1. for True and 0. for False. If the string represents an int, it will be converted to a float also.

Behaves analogously to parse_decimal(), except that that returns an exact Decimal instead.

Arguments

value : string to be parsed

Keyword arguments

allow_scientific : whether to accept scientific notation. If True, strings of the form "MeX" will be interpreted as the expression M * (10 ** X), where M is the mantissa/significand and X is the exponent. Note that X must be an integer, but can be negative.

allow_inf : whether to accept positive and negative infinity values. If True, strings that match the values in parser.inf_values (empty set by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on parser.float_case_sensitive, which is False by default.

allow_nan : whether to accept NaN (not a number) representations. If True, strings that match the values in parser.nan_values (empty set by default) are interpeted as NaN. The case sensitivity of this matching also depends on parser.float_case_sensitive, which is False by default.

Returns

parsed float value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
parser.parse_float("1.")       # 1.
parser.parse_float("1.23e2")   # 123.
parser.parse_float("1.23e-2")  # 0.0123
parser.parse_float("inf")      # math.inf
def parse_decimal( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> decimal.Decimal:
915	def parse_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> Decimal:
916		"""
917			Parse a string and return it as an exact Decimal if possible
918
919			If the string represents a bool, it will be converted to `Decimal(1)` for True and `Decimal(0)` for False. If the string represents an int, it will be converted to a Decimal also.
920
921			Behaves analogously to `parse_float()`, except that that returns a non-exact float instead.
922
923			Arguments
924			---------
925			`value`
926			: string to be parsed
927
928			Keyword arguments
929			-----------------
930
931			`allow_scientific`
932			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
933
934			`allow_inf`
935			: whether to accept positive and negative infinity values. If True, strings that match the values in <code><var>parser</var>.inf_values</code> (empty set by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
936
937			`allow_nan`
938			: whether to accept NaN (not a number) representations. If True, strings that match the values in <code><var>parser</var>.nan_values</code> (empty set by default) are interpeted as NaN. The case sensitivity of this matching also depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
939
940			Returns
941			-------
942			parsed Decimal value
943
944			Raises
945			------
946			`ValueError` if `value` cannot be parsed
947
948			Examples
949			--------
950			```python
951			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
952			parser.parse_decimal("1.")       # Decimal(1)
953			parser.parse_decimal("1.23e2")   # Decimal(123)
954			parser.parse_decimal("1.23e-2")  # Decimal(123) / Decimal(10000)
955			parser.parse_decimal("inf")      # Decimal(math.inf)
956			```
957		"""
958		return self._parse_floatlike(value, Decimal, Decimal(math.inf), Decimal(math.nan),
959			allow_scientific=allow_scientific,
960			allow_inf=allow_inf,
961			allow_nan=allow_nan,
962		)

Parse a string and return it as an exact Decimal if possible

If the string represents a bool, it will be converted to Decimal(1) for True and Decimal(0) for False. If the string represents an int, it will be converted to a Decimal also.

Behaves analogously to parse_float(), except that that returns a non-exact float instead.

Arguments

value : string to be parsed

Keyword arguments

allow_scientific : whether to accept scientific notation. If True, strings of the form "MeX" will be interpreted as the expression M * (10 ** X), where M is the mantissa/significand and X is the exponent. Note that X must be an integer, but can be negative.

allow_inf : whether to accept positive and negative infinity values. If True, strings that match the values in parser.inf_values (empty set by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on parser.float_case_sensitive, which is False by default.

allow_nan : whether to accept NaN (not a number) representations. If True, strings that match the values in parser.nan_values (empty set by default) are interpeted as NaN. The case sensitivity of this matching also depends on parser.float_case_sensitive, which is False by default.

Returns

parsed Decimal value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
parser.parse_decimal("1.")       # Decimal(1)
parser.parse_decimal("1.23e2")   # Decimal(123)
parser.parse_decimal("1.23e-2")  # Decimal(123) / Decimal(10000)
parser.parse_decimal("inf")      # Decimal(math.inf)
def infer( self, value: str) -> Type[Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]:
 965	def infer(self, value: str) -> AnyValueType:
 966		"""
 967			Infer the underlying type of a string
 968
 969			Also check for inline lists if <code><var>parser</var>.list_delimiter</code> is not None.
 970
 971			Arguments
 972			---------
 973			`value`
 974			: the string for which the type should be inferred
 975
 976			Returns
 977			-------
 978			inferred type
 979
 980			Examples
 981			--------
 982			```python
 983			parser = TypeParser()
 984			parser.infer("true")  # bool
 985			parser.infer("2.0")   # float
 986			parser.infer("abc")   # str
 987			```
 988		"""
 989		if self.is_none(value):
 990			return NoneType
 991		if self.is_bool(value):
 992			return bool
 993		if self.is_int(value):
 994			return int
 995		if self.is_float(value):
 996			if self._use_decimal:
 997				return Decimal
 998			else:
 999				return float
1000
1001		if self._trim:
1002			value = value.strip()
1003
1004		if self._list_delimiter is not None and self._list_delimiter in value:
1005			subvalues = value.split(self._list_delimiter)
1006			if self._trim:
1007				subvalues = [subvalue.strip() for subvalue in subvalues]
1008			reduced_type = reduce_types(self.infer(subvalue) for subvalue in subvalues)
1009			r = list[reduced_type]
1010			return r
1011
1012		return GenericValue

Infer the underlying type of a string

Also check for inline lists if parser.list_delimiter is not None.

Arguments

value : the string for which the type should be inferred

Returns

inferred type

Examples

parser = TypeParser()
parser.infer("true")  # bool
parser.infer("2.0")   # float
parser.infer("abc")   # str
def infer_series( self, values: Iterable[str]) -> Type[Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]:
1015	def infer_series(self, values: Iterable[str]) -> AnyValueType:
1016		"""
1017			Infer the underlying common type of a series of strings
1018
1019			If the values in the series do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
1020
1021			Arguments
1022			---------
1023			`values`
1024			: series of strings for which the type should be inferred
1025
1026			Returns
1027			-------
1028			inferred type
1029
1030			Examples
1031			--------
1032			```python
1033			parser = TypeParser()
1034			parser.infer_series(["1", "2", "3.4"])       # float
1035			parser.infer_series(["true", "false", "2"])  # int
1036			parser.infer_series(["1", "2.3", "abc"])     # str
1037			```
1038		"""
1039		return reduce_types(self.infer(value) for value in values)

Infer the underlying common type of a series of strings

If the values in the series do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the series. See parsetypes.reduce_types() for more information.

Arguments

values : series of strings for which the type should be inferred

Returns

inferred type

Examples

parser = TypeParser()
parser.infer_series(["1", "2", "3.4"])       # float
parser.infer_series(["true", "false", "2"])  # int
parser.infer_series(["1", "2.3", "abc"])     # str
def infer_table( self, rows: Iterable[Sequence[str]]) -> list[typing.Type[typing.Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]]:
1042	def infer_table(self, rows: Iterable[Sequence[str]]) -> list[AnyValueType]:
1043		"""
1044			Infer the underlying common type for each column of a table of strings
1045
1046			For each column, if the values do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
1047
1048			Note that the individual inferred types of every value in the table must be able to fit into memory.
1049
1050			Arguments
1051			---------
1052			`rows`
1053			: table of strings for which the types should be inferred, in row-major order
1054
1055			Returns
1056			-------
1057			inferred types
1058
1059			Examples
1060			--------
1061			```python
1062			parser = TypeParser()
1063			parser.infer_table([
1064				["1",   "true",  "1"],
1065				["2",   "false", "2.3"],
1066				["3.4", "2",     "abc"],
1067			])
1068			# [float, int, str]
1069			```
1070		"""
1071		rows_iter = iter(rows)
1072		first_row = next(rows_iter, None)
1073		if first_row is None:
1074			return []
1075
1076		num_cols = len(first_row)
1077		if num_cols == 0:
1078			return []
1079
1080		table = _TypeTable([[self.infer(value)] for value in first_row])
1081		for row in rows_iter:
1082			table.add_row([self.infer(value) for value in row])
1083
1084		return [reduce_types(col) for col in table.cols]

Infer the underlying common type for each column of a table of strings

For each column, if the values do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the column. See parsetypes.reduce_types() for more information.

Note that the individual inferred types of every value in the table must be able to fit into memory.

Arguments

rows : table of strings for which the types should be inferred, in row-major order

Returns

inferred types

Examples

parser = TypeParser()
parser.infer_table([
	["1",   "true",  "1"],
	["2",   "false", "2.3"],
	["3.4", "2",     "abc"],
])
# [float, int, str]
def convert( self, value: str, target_type: Type[Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]) -> Union[str, int, float, decimal.Decimal, bool, NoneType, list]:
1087	def convert(self, value: str, target_type: AnyValueType) -> AnyValue:
1088		"""
1089			Convert a string to the specified target type if possible
1090
1091			Valid values for `target_type` include any return value from `infer()`, `infer_series()` and `infer_table()`. To infer and convert the string automatically, use `parse()`, `parse_series()` or `parse_table()` instead.
1092
1093			Arguments
1094			---------
1095			`value`
1096			: the string to be converted
1097
1098			`target_type`
1099			: type to which the value should be converted
1100
1101			Returns
1102			-------
1103			converted value
1104
1105			Raises
1106			-------
1107			`ValueError`
1108			: if `value` cannot be converted to `target_type`
1109
1110			`TypeError`
1111			: if `target_type` is not a valid type
1112
1113			Examples
1114			--------
1115			```python
1116			parser = TypeParser()
1117			parser.convert("true", bool)  # True
1118			parser.convert("2", int)      # 2
1119			parser.convert("2", float)    # 2.
1120			```
1121		"""
1122		base, type_args = _decompose_type(target_type)
1123		if base == NoneType:
1124			return self.parse_none(value)
1125		elif base == bool:
1126			return self.parse_bool(value)
1127		elif base == int:
1128			return self.parse_int(value)
1129		elif base == Decimal:
1130			return self.parse_decimal(value)
1131		elif base == float:
1132			return self.parse_float(value)
1133		elif base == str:
1134			return value
1135		elif base == Nullable:
1136			if self.is_none(value):
1137				return None
1138			else:
1139				if type_args is not None and len(type_args) == 1 and type_args[0] != str:
1140					inner_type = type_args[0]
1141					return self.convert(value, inner_type)
1142				else:
1143					return value
1144		elif base == list:
1145			subvalues = value.split(self._list_delimiter)
1146			if self._trim:
1147				subvalues = [subvalue.strip() for subvalue in subvalues]
1148			if type_args is not None and len(type_args) == 1 and type_args[0] != str:
1149				subtype = type_args[0]
1150				return [self.convert(subvalue, subtype) for subvalue in subvalues]
1151			else:
1152				return subvalues
1153		else:
1154			raise TypeError(f"cannot convert to type: {target_type}")

Convert a string to the specified target type if possible

Valid values for target_type include any return value from infer(), infer_series() and infer_table(). To infer and convert the string automatically, use parse(), parse_series() or parse_table() instead.

Arguments

value : the string to be converted

target_type : type to which the value should be converted

Returns

converted value

Raises

ValueError : if value cannot be converted to target_type

TypeError : if target_type is not a valid type

Examples

parser = TypeParser()
parser.convert("true", bool)  # True
parser.convert("2", int)      # 2
parser.convert("2", float)    # 2.
def parse( self, value: str) -> Union[str, int, float, decimal.Decimal, bool, NoneType, list]:
1157	def parse(self, value: str) -> AnyValue:
1158		"""
1159			Parse a string and convert it to its underlying type
1160
1161			Arguments
1162			---------
1163			`value`
1164			: the string to be parsed
1165
1166			Returns
1167			-------
1168			converted value
1169
1170			Examples
1171			--------
1172			```python
1173			parser = TypeParser()
1174			parser.parse("true")  # True
1175			parser.parse("2.0")   # 2.
1176			parser.parse("abc")   # "abc"
1177			```
1178		"""
1179		return self.convert(value, self.infer(value))

Parse a string and convert it to its underlying type

Arguments

value : the string to be parsed

Returns

converted value

Examples

parser = TypeParser()
parser.parse("true")  # True
parser.parse("2.0")   # 2.
parser.parse("abc")   # "abc"
def parse_series( self, values: Iterable[str]) -> list[typing.Union[str, int, float, decimal.Decimal, bool, NoneType, list]]:
1182	def parse_series(self, values: Iterable[str]) -> list[AnyValue]:
1183		"""
1184			Parse a series of strings and convert them to their underlying common type
1185
1186			If the values in the series do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
1187
1188			Arguments
1189			---------
1190			`values`
1191			: series of strings to be parsed
1192
1193			Returns
1194			-------
1195			converted values
1196
1197			Examples
1198			--------
1199			```python
1200			parser = TypeParser()
1201			parser.parse_series(["1", "2", "3"])        # [1, 2, 3]
1202			parser.parse_series(["5", "6.7", "8."])     # [5., 6.7, 8.]
1203			parser.parse_series(["true", "false", ""])  # [True, False, None]
1204			parser.parse_series(["1", "2.3", "abc"])    # ["1", "2.3", "abc"]
1205			```
1206		"""
1207		inferred = self.infer_series(values)
1208		return [self.convert(value, inferred) for value in values]

Parse a series of strings and convert them to their underlying common type

If the values in the series do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the series. See parsetypes.reduce_types() for more information.

Arguments

values : series of strings to be parsed

Returns

converted values

Examples

parser = TypeParser()
parser.parse_series(["1", "2", "3"])        # [1, 2, 3]
parser.parse_series(["5", "6.7", "8."])     # [5., 6.7, 8.]
parser.parse_series(["true", "false", ""])  # [True, False, None]
parser.parse_series(["1", "2.3", "abc"])    # ["1", "2.3", "abc"]
def parse_table( self, rows: Iterable[Sequence[str]]) -> list[list[typing.Union[str, int, float, decimal.Decimal, bool, NoneType, list]]]:
1211	def parse_table(self, rows: Iterable[Sequence[str]]) -> list[list[AnyValue]]:
1212		"""
1213			Parse a table of strings and convert them to the underlying common type of each column
1214
1215			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
1216
1217			Note that the type to which the values should be converted is determined by `infer_table()`, and so the individual inferred types of every value in the table must be able to fit into memory.
1218
1219			This is a function that computes the entire table and returns it all at once. The generator function `iterate_table()` behaves analogously, except that it computes and yields each row one at a time instead.
1220
1221			Arguments
1222			---------
1223			`rows`
1224			: table of strings to be parsed, in row-major order
1225
1226			`iterator`
1227			: whether the parsed values should be yielded as an iterator. If False, which is the default, the entire table is computed and returned as a list of lists. If True, this function behaves as a generator, and the rows of the table are computed and yielded one at a time. However, note that even when set to True, the type inference requires that inferred type of each individual value must all be able to fit into memory at once.
1228
1229			Returns
1230			-------
1231			converted table of values, in row-major order
1232
1233			Examples
1234			--------
1235			```python
1236			parser = TypeParser()
1237			table = parser.parse_table([
1238				["1", "5",   "true",  "1"],
1239				["2", "6.7", "false", "2.3"],
1240				["3", "8.0", "",      "abc"],
1241			]):
1242			assert table == [
1243				[1, 5.,  True,  "1"],
1244				[2, 6.7, False, "2.3"],
1245				[3, 8.,  None,  "abc"],
1246			]
1247			```
1248		"""
1249		return [converted_row for converted_row in self.iterate_table(rows)]

Parse a table of strings and convert them to the underlying common type of each column

For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See parsetypes.reduce_types() for more information.

Note that the type to which the values should be converted is determined by infer_table(), and so the individual inferred types of every value in the table must be able to fit into memory.

This is a function that computes the entire table and returns it all at once. The generator function iterate_table() behaves analogously, except that it computes and yields each row one at a time instead.

Arguments

rows : table of strings to be parsed, in row-major order

iterator : whether the parsed values should be yielded as an iterator. If False, which is the default, the entire table is computed and returned as a list of lists. If True, this function behaves as a generator, and the rows of the table are computed and yielded one at a time. However, note that even when set to True, the type inference requires that inferred type of each individual value must all be able to fit into memory at once.

Returns

converted table of values, in row-major order

Examples

parser = TypeParser()
table = parser.parse_table([
	["1", "5",   "true",  "1"],
	["2", "6.7", "false", "2.3"],
	["3", "8.0", "",      "abc"],
]):
assert table == [
	[1, 5.,  True,  "1"],
	[2, 6.7, False, "2.3"],
	[3, 8.,  None,  "abc"],
]
def iterate_table( self, rows: Iterable[Sequence[str]]) -> Iterator[list[Union[str, int, float, decimal.Decimal, bool, NoneType, list]]]:
1252	def iterate_table(self, rows: Iterable[Sequence[str]]) -> Iterator[list[AnyValue]]:
1253		"""
1254			Parse a table of strings for the underlying common type of each column, then convert and yield each row
1255
1256			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
1257
1258			This is a generator function that computes and yields each row one at a time. However, note that in order to determine the types to which each column should be converted, the individual inferred types of every value in the table must still be able to fit into memory.
1259
1260			The function `parse_table()` behaves analogously, except that it computes the entire table and returns it as a list of lists instead.
1261
1262			Arguments
1263			---------
1264			`rows`
1265			: table of strings to be parsed, in row-major order
1266
1267			Yields
1268			------
1269			each row of converted table values
1270
1271			Examples
1272			--------
1273			```python
1274			parser = TypeParser()
1275			table = parser.iterate_table([
1276				["1",   "true",  "1"],
1277				["2",   "false", "2.3"],
1278				["3.4", "2",     "abc"],
1279			]):
1280			assert next(table) == [1.,  1, "1"]
1281			assert next(table) == [2.,  0, "2.3"]
1282			assert next(table) == [3.4, 2, "abc"]
1283			```
1284		"""
1285		inferred_types = self.infer_table(rows)
1286
1287		for row in rows:
1288			yield [self.convert(value, inferred) for value, inferred in zip(row, inferred_types)]

Parse a table of strings for the underlying common type of each column, then convert and yield each row

For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See parsetypes.reduce_types() for more information.

This is a generator function that computes and yields each row one at a time. However, note that in order to determine the types to which each column should be converted, the individual inferred types of every value in the table must still be able to fit into memory.

The function parse_table() behaves analogously, except that it computes the entire table and returns it as a list of lists instead.

Arguments

rows : table of strings to be parsed, in row-major order

Yields

each row of converted table values

Examples

parser = TypeParser()
table = parser.iterate_table([
	["1",   "true",  "1"],
	["2",   "false", "2.3"],
	["3.4", "2",     "abc"],
]):
assert next(table) == [1.,  1, "1"]
assert next(table) == [2.,  0, "2.3"]
assert next(table) == [3.4, 2, "abc"]
Inherited Members
builtins.object
__new__
__repr__
__hash__
__str__
__getattribute__
__setattr__
__delattr__
__lt__
__le__
__eq__
__ne__
__gt__
__ge__
__reduce_ex__
__reduce__
__getstate__
__subclasshook__
__init_subclass__
__format__
__sizeof__
__dir__
def reduce_types( types: Iterable[Type[Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]]) -> Type[Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]:
156def reduce_types(types: Iterable[AnyValueType]) -> AnyValueType:
157	"""
158		Reduce multiple types into a single common type.
159
160		If the input types are not all the same, the resulting type will be narrowest possible type that will encompass all of the input types.
161
162		This operation is useful in cases such as parsing a CSV file where each column should have a consistent type, but where the individual values in a column could be interpreted variously as ints or floats (or other types).
163
164		Arguments
165		---------
166		`types`
167		: types to be reduced
168
169		Returns
170		-------
171		common reduced type
172
173		Examples
174		--------
175		```python
176		reduce_types([int, float])        # float
177		reduce_types([bool, int])         # int
178		reduce_types([int, float, str])   # str
179		```
180	"""
181	reduced_type: Union[AnyValueType, None] = None
182	for t in types:
183		if reduced_type is None:
184			reduced_type = t
185		elif t != reduced_type:
186			reduced_type = _merge_types(reduced_type, t)
187		if reduced_type == _TerminalValue:
188			return _TerminalValue
189
190	if reduced_type is None:
191		# types is empty
192		return GenericValue
193	else:
194		return reduced_type

Reduce multiple types into a single common type.

If the input types are not all the same, the resulting type will be narrowest possible type that will encompass all of the input types.

This operation is useful in cases such as parsing a CSV file where each column should have a consistent type, but where the individual values in a column could be interpreted variously as ints or floats (or other types).

Arguments

types : types to be reduced

Returns

common reduced type

Examples

reduce_types([int, float])        # float
reduce_types([bool, int])         # int
reduce_types([int, float, str])   # str
class Nullable(typing.Generic[~S]):
22class Nullable(Generic[S]):
23	"""
24		Dummy container type that represents a scalar (`S`) that could also be None
25
26		The type annotation `Nullable[S]` is treated as equivalent to `Union[S, types.NoneType]`, which will accept either a value of type `S` or the value `None`.
27
28		This class should not be instantiated.
29	"""
30	pass

Dummy container type that represents a scalar (S) that could also be None

The type annotation Nullable[S] is treated as equivalent to Union[S, types.NoneType], which will accept either a value of type S or the value None.

This class should not be instantiated.

Nullable()
Inherited Members
typing.Generic
__class_getitem__
__init_subclass__
builtins.object
__new__
__repr__
__hash__
__str__
__getattribute__
__setattr__
__delattr__
__lt__
__le__
__eq__
__ne__
__gt__
__ge__
__reduce_ex__
__reduce__
__getstate__
__subclasshook__
__format__
__sizeof__
__dir__