sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 return lambda self, this: self._parse_escape( 47 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 48 ) 49 50 51def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 62 63 64def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 65 def _builder(args: t.List, dialect: Dialect) -> E: 66 expression = expr_type( 67 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 68 ) 69 if len(args) > 2 and expr_type is exp.JSONExtract: 70 expression.set("expressions", args[2:]) 71 72 return expression 73 74 return _builder 75 76 77class _Parser(type): 78 def __new__(cls, clsname, bases, attrs): 79 klass = super().__new__(cls, clsname, bases, attrs) 80 81 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 82 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 83 84 return klass 85 86 87class Parser(metaclass=_Parser): 88 """ 89 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 90 91 Args: 92 error_level: The desired error level. 93 Default: ErrorLevel.IMMEDIATE 94 error_message_context: The amount of context to capture from a query string when displaying 95 the error message (in number of characters). 96 Default: 100 97 max_errors: Maximum number of error messages to include in a raised ParseError. 98 This is only relevant if error_level is ErrorLevel.RAISE. 99 Default: 3 100 """ 101 102 FUNCTIONS: t.Dict[str, t.Callable] = { 103 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 104 "CONCAT": lambda args, dialect: exp.Concat( 105 expressions=args, 106 safe=not dialect.STRICT_STRING_CONCAT, 107 coalesce=dialect.CONCAT_COALESCE, 108 ), 109 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 110 expressions=args, 111 safe=not dialect.STRICT_STRING_CONCAT, 112 coalesce=dialect.CONCAT_COALESCE, 113 ), 114 "DATE_TO_DATE_STR": lambda args: exp.Cast( 115 this=seq_get(args, 0), 116 to=exp.DataType(this=exp.DataType.Type.TEXT), 117 ), 118 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 119 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 120 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 121 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 122 "LIKE": build_like, 123 "LOG": build_logarithm, 124 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 125 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 126 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 127 "TIME_TO_TIME_STR": lambda args: exp.Cast( 128 this=seq_get(args, 0), 129 to=exp.DataType(this=exp.DataType.Type.TEXT), 130 ), 131 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 132 this=exp.Cast( 133 this=seq_get(args, 0), 134 to=exp.DataType(this=exp.DataType.Type.TEXT), 135 ), 136 start=exp.Literal.number(1), 137 length=exp.Literal.number(10), 138 ), 139 "VAR_MAP": build_var_map, 140 } 141 142 NO_PAREN_FUNCTIONS = { 143 TokenType.CURRENT_DATE: exp.CurrentDate, 144 TokenType.CURRENT_DATETIME: exp.CurrentDate, 145 TokenType.CURRENT_TIME: exp.CurrentTime, 146 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 147 TokenType.CURRENT_USER: exp.CurrentUser, 148 } 149 150 STRUCT_TYPE_TOKENS = { 151 TokenType.NESTED, 152 TokenType.OBJECT, 153 TokenType.STRUCT, 154 } 155 156 NESTED_TYPE_TOKENS = { 157 TokenType.ARRAY, 158 TokenType.LOWCARDINALITY, 159 TokenType.MAP, 160 TokenType.NULLABLE, 161 *STRUCT_TYPE_TOKENS, 162 } 163 164 ENUM_TYPE_TOKENS = { 165 TokenType.ENUM, 166 TokenType.ENUM8, 167 TokenType.ENUM16, 168 } 169 170 AGGREGATE_TYPE_TOKENS = { 171 TokenType.AGGREGATEFUNCTION, 172 TokenType.SIMPLEAGGREGATEFUNCTION, 173 } 174 175 TYPE_TOKENS = { 176 TokenType.BIT, 177 TokenType.BOOLEAN, 178 TokenType.TINYINT, 179 TokenType.UTINYINT, 180 TokenType.SMALLINT, 181 TokenType.USMALLINT, 182 TokenType.INT, 183 TokenType.UINT, 184 TokenType.BIGINT, 185 TokenType.UBIGINT, 186 TokenType.INT128, 187 TokenType.UINT128, 188 TokenType.INT256, 189 TokenType.UINT256, 190 TokenType.MEDIUMINT, 191 TokenType.UMEDIUMINT, 192 TokenType.FIXEDSTRING, 193 TokenType.FLOAT, 194 TokenType.DOUBLE, 195 TokenType.CHAR, 196 TokenType.NCHAR, 197 TokenType.VARCHAR, 198 TokenType.NVARCHAR, 199 TokenType.BPCHAR, 200 TokenType.TEXT, 201 TokenType.MEDIUMTEXT, 202 TokenType.LONGTEXT, 203 TokenType.MEDIUMBLOB, 204 TokenType.LONGBLOB, 205 TokenType.BINARY, 206 TokenType.VARBINARY, 207 TokenType.JSON, 208 TokenType.JSONB, 209 TokenType.INTERVAL, 210 TokenType.TINYBLOB, 211 TokenType.TINYTEXT, 212 TokenType.TIME, 213 TokenType.TIMETZ, 214 TokenType.TIMESTAMP, 215 TokenType.TIMESTAMP_S, 216 TokenType.TIMESTAMP_MS, 217 TokenType.TIMESTAMP_NS, 218 TokenType.TIMESTAMPTZ, 219 TokenType.TIMESTAMPLTZ, 220 TokenType.DATETIME, 221 TokenType.DATETIME64, 222 TokenType.DATE, 223 TokenType.DATE32, 224 TokenType.INT4RANGE, 225 TokenType.INT4MULTIRANGE, 226 TokenType.INT8RANGE, 227 TokenType.INT8MULTIRANGE, 228 TokenType.NUMRANGE, 229 TokenType.NUMMULTIRANGE, 230 TokenType.TSRANGE, 231 TokenType.TSMULTIRANGE, 232 TokenType.TSTZRANGE, 233 TokenType.TSTZMULTIRANGE, 234 TokenType.DATERANGE, 235 TokenType.DATEMULTIRANGE, 236 TokenType.DECIMAL, 237 TokenType.UDECIMAL, 238 TokenType.BIGDECIMAL, 239 TokenType.UUID, 240 TokenType.GEOGRAPHY, 241 TokenType.GEOMETRY, 242 TokenType.HLLSKETCH, 243 TokenType.HSTORE, 244 TokenType.PSEUDO_TYPE, 245 TokenType.SUPER, 246 TokenType.SERIAL, 247 TokenType.SMALLSERIAL, 248 TokenType.BIGSERIAL, 249 TokenType.XML, 250 TokenType.YEAR, 251 TokenType.UNIQUEIDENTIFIER, 252 TokenType.USERDEFINED, 253 TokenType.MONEY, 254 TokenType.SMALLMONEY, 255 TokenType.ROWVERSION, 256 TokenType.IMAGE, 257 TokenType.VARIANT, 258 TokenType.OBJECT, 259 TokenType.OBJECT_IDENTIFIER, 260 TokenType.INET, 261 TokenType.IPADDRESS, 262 TokenType.IPPREFIX, 263 TokenType.IPV4, 264 TokenType.IPV6, 265 TokenType.UNKNOWN, 266 TokenType.NULL, 267 TokenType.NAME, 268 *ENUM_TYPE_TOKENS, 269 *NESTED_TYPE_TOKENS, 270 *AGGREGATE_TYPE_TOKENS, 271 } 272 273 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 274 TokenType.BIGINT: TokenType.UBIGINT, 275 TokenType.INT: TokenType.UINT, 276 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 277 TokenType.SMALLINT: TokenType.USMALLINT, 278 TokenType.TINYINT: TokenType.UTINYINT, 279 TokenType.DECIMAL: TokenType.UDECIMAL, 280 } 281 282 SUBQUERY_PREDICATES = { 283 TokenType.ANY: exp.Any, 284 TokenType.ALL: exp.All, 285 TokenType.EXISTS: exp.Exists, 286 TokenType.SOME: exp.Any, 287 } 288 289 RESERVED_TOKENS = { 290 *Tokenizer.SINGLE_TOKENS.values(), 291 TokenType.SELECT, 292 } - {TokenType.IDENTIFIER} 293 294 DB_CREATABLES = { 295 TokenType.DATABASE, 296 TokenType.SCHEMA, 297 TokenType.TABLE, 298 TokenType.VIEW, 299 TokenType.MODEL, 300 TokenType.DICTIONARY, 301 TokenType.SEQUENCE, 302 TokenType.STORAGE_INTEGRATION, 303 } 304 305 CREATABLES = { 306 TokenType.COLUMN, 307 TokenType.CONSTRAINT, 308 TokenType.FUNCTION, 309 TokenType.INDEX, 310 TokenType.PROCEDURE, 311 TokenType.FOREIGN_KEY, 312 *DB_CREATABLES, 313 } 314 315 # Tokens that can represent identifiers 316 ID_VAR_TOKENS = { 317 TokenType.VAR, 318 TokenType.ANTI, 319 TokenType.APPLY, 320 TokenType.ASC, 321 TokenType.ASOF, 322 TokenType.AUTO_INCREMENT, 323 TokenType.BEGIN, 324 TokenType.BPCHAR, 325 TokenType.CACHE, 326 TokenType.CASE, 327 TokenType.COLLATE, 328 TokenType.COMMAND, 329 TokenType.COMMENT, 330 TokenType.COMMIT, 331 TokenType.CONSTRAINT, 332 TokenType.DEFAULT, 333 TokenType.DELETE, 334 TokenType.DESC, 335 TokenType.DESCRIBE, 336 TokenType.DICTIONARY, 337 TokenType.DIV, 338 TokenType.END, 339 TokenType.EXECUTE, 340 TokenType.ESCAPE, 341 TokenType.FALSE, 342 TokenType.FIRST, 343 TokenType.FILTER, 344 TokenType.FINAL, 345 TokenType.FORMAT, 346 TokenType.FULL, 347 TokenType.IDENTIFIER, 348 TokenType.IS, 349 TokenType.ISNULL, 350 TokenType.INTERVAL, 351 TokenType.KEEP, 352 TokenType.KILL, 353 TokenType.LEFT, 354 TokenType.LOAD, 355 TokenType.MERGE, 356 TokenType.NATURAL, 357 TokenType.NEXT, 358 TokenType.OFFSET, 359 TokenType.OPERATOR, 360 TokenType.ORDINALITY, 361 TokenType.OVERLAPS, 362 TokenType.OVERWRITE, 363 TokenType.PARTITION, 364 TokenType.PERCENT, 365 TokenType.PIVOT, 366 TokenType.PRAGMA, 367 TokenType.RANGE, 368 TokenType.RECURSIVE, 369 TokenType.REFERENCES, 370 TokenType.REFRESH, 371 TokenType.REPLACE, 372 TokenType.RIGHT, 373 TokenType.ROW, 374 TokenType.ROWS, 375 TokenType.SEMI, 376 TokenType.SET, 377 TokenType.SETTINGS, 378 TokenType.SHOW, 379 TokenType.TEMPORARY, 380 TokenType.TOP, 381 TokenType.TRUE, 382 TokenType.TRUNCATE, 383 TokenType.UNIQUE, 384 TokenType.UNPIVOT, 385 TokenType.UPDATE, 386 TokenType.USE, 387 TokenType.VOLATILE, 388 TokenType.WINDOW, 389 *CREATABLES, 390 *SUBQUERY_PREDICATES, 391 *TYPE_TOKENS, 392 *NO_PAREN_FUNCTIONS, 393 } 394 395 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 396 397 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 398 TokenType.ANTI, 399 TokenType.APPLY, 400 TokenType.ASOF, 401 TokenType.FULL, 402 TokenType.LEFT, 403 TokenType.LOCK, 404 TokenType.NATURAL, 405 TokenType.OFFSET, 406 TokenType.RIGHT, 407 TokenType.SEMI, 408 TokenType.WINDOW, 409 } 410 411 ALIAS_TOKENS = ID_VAR_TOKENS 412 413 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 414 415 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 416 417 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 418 419 FUNC_TOKENS = { 420 TokenType.COLLATE, 421 TokenType.COMMAND, 422 TokenType.CURRENT_DATE, 423 TokenType.CURRENT_DATETIME, 424 TokenType.CURRENT_TIMESTAMP, 425 TokenType.CURRENT_TIME, 426 TokenType.CURRENT_USER, 427 TokenType.FILTER, 428 TokenType.FIRST, 429 TokenType.FORMAT, 430 TokenType.GLOB, 431 TokenType.IDENTIFIER, 432 TokenType.INDEX, 433 TokenType.ISNULL, 434 TokenType.ILIKE, 435 TokenType.INSERT, 436 TokenType.LIKE, 437 TokenType.MERGE, 438 TokenType.OFFSET, 439 TokenType.PRIMARY_KEY, 440 TokenType.RANGE, 441 TokenType.REPLACE, 442 TokenType.RLIKE, 443 TokenType.ROW, 444 TokenType.UNNEST, 445 TokenType.VAR, 446 TokenType.LEFT, 447 TokenType.RIGHT, 448 TokenType.SEQUENCE, 449 TokenType.DATE, 450 TokenType.DATETIME, 451 TokenType.TABLE, 452 TokenType.TIMESTAMP, 453 TokenType.TIMESTAMPTZ, 454 TokenType.TRUNCATE, 455 TokenType.WINDOW, 456 TokenType.XOR, 457 *TYPE_TOKENS, 458 *SUBQUERY_PREDICATES, 459 } 460 461 CONJUNCTION = { 462 TokenType.AND: exp.And, 463 TokenType.OR: exp.Or, 464 } 465 466 EQUALITY = { 467 TokenType.COLON_EQ: exp.PropertyEQ, 468 TokenType.EQ: exp.EQ, 469 TokenType.NEQ: exp.NEQ, 470 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 471 } 472 473 COMPARISON = { 474 TokenType.GT: exp.GT, 475 TokenType.GTE: exp.GTE, 476 TokenType.LT: exp.LT, 477 TokenType.LTE: exp.LTE, 478 } 479 480 BITWISE = { 481 TokenType.AMP: exp.BitwiseAnd, 482 TokenType.CARET: exp.BitwiseXor, 483 TokenType.PIPE: exp.BitwiseOr, 484 } 485 486 TERM = { 487 TokenType.DASH: exp.Sub, 488 TokenType.PLUS: exp.Add, 489 TokenType.MOD: exp.Mod, 490 TokenType.COLLATE: exp.Collate, 491 } 492 493 FACTOR = { 494 TokenType.DIV: exp.IntDiv, 495 TokenType.LR_ARROW: exp.Distance, 496 TokenType.SLASH: exp.Div, 497 TokenType.STAR: exp.Mul, 498 } 499 500 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 501 502 TIMES = { 503 TokenType.TIME, 504 TokenType.TIMETZ, 505 } 506 507 TIMESTAMPS = { 508 TokenType.TIMESTAMP, 509 TokenType.TIMESTAMPTZ, 510 TokenType.TIMESTAMPLTZ, 511 *TIMES, 512 } 513 514 SET_OPERATIONS = { 515 TokenType.UNION, 516 TokenType.INTERSECT, 517 TokenType.EXCEPT, 518 } 519 520 JOIN_METHODS = { 521 TokenType.ASOF, 522 TokenType.NATURAL, 523 TokenType.POSITIONAL, 524 } 525 526 JOIN_SIDES = { 527 TokenType.LEFT, 528 TokenType.RIGHT, 529 TokenType.FULL, 530 } 531 532 JOIN_KINDS = { 533 TokenType.INNER, 534 TokenType.OUTER, 535 TokenType.CROSS, 536 TokenType.SEMI, 537 TokenType.ANTI, 538 } 539 540 JOIN_HINTS: t.Set[str] = set() 541 542 LAMBDAS = { 543 TokenType.ARROW: lambda self, expressions: self.expression( 544 exp.Lambda, 545 this=self._replace_lambda( 546 self._parse_conjunction(), 547 {node.name for node in expressions}, 548 ), 549 expressions=expressions, 550 ), 551 TokenType.FARROW: lambda self, expressions: self.expression( 552 exp.Kwarg, 553 this=exp.var(expressions[0].name), 554 expression=self._parse_conjunction(), 555 ), 556 } 557 558 COLUMN_OPERATORS = { 559 TokenType.DOT: None, 560 TokenType.DCOLON: lambda self, this, to: self.expression( 561 exp.Cast if self.STRICT_CAST else exp.TryCast, 562 this=this, 563 to=to, 564 ), 565 TokenType.ARROW: lambda self, this, path: self.expression( 566 exp.JSONExtract, 567 this=this, 568 expression=self.dialect.to_json_path(path), 569 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 570 ), 571 TokenType.DARROW: lambda self, this, path: self.expression( 572 exp.JSONExtractScalar, 573 this=this, 574 expression=self.dialect.to_json_path(path), 575 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 576 ), 577 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 578 exp.JSONBExtract, 579 this=this, 580 expression=path, 581 ), 582 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 583 exp.JSONBExtractScalar, 584 this=this, 585 expression=path, 586 ), 587 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 588 exp.JSONBContains, 589 this=this, 590 expression=key, 591 ), 592 } 593 594 EXPRESSION_PARSERS = { 595 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 596 exp.Column: lambda self: self._parse_column(), 597 exp.Condition: lambda self: self._parse_conjunction(), 598 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 599 exp.Expression: lambda self: self._parse_expression(), 600 exp.From: lambda self: self._parse_from(), 601 exp.Group: lambda self: self._parse_group(), 602 exp.Having: lambda self: self._parse_having(), 603 exp.Identifier: lambda self: self._parse_id_var(), 604 exp.Join: lambda self: self._parse_join(), 605 exp.Lambda: lambda self: self._parse_lambda(), 606 exp.Lateral: lambda self: self._parse_lateral(), 607 exp.Limit: lambda self: self._parse_limit(), 608 exp.Offset: lambda self: self._parse_offset(), 609 exp.Order: lambda self: self._parse_order(), 610 exp.Ordered: lambda self: self._parse_ordered(), 611 exp.Properties: lambda self: self._parse_properties(), 612 exp.Qualify: lambda self: self._parse_qualify(), 613 exp.Returning: lambda self: self._parse_returning(), 614 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 615 exp.Table: lambda self: self._parse_table_parts(), 616 exp.TableAlias: lambda self: self._parse_table_alias(), 617 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 618 exp.Where: lambda self: self._parse_where(), 619 exp.Window: lambda self: self._parse_named_window(), 620 exp.With: lambda self: self._parse_with(), 621 "JOIN_TYPE": lambda self: self._parse_join_parts(), 622 } 623 624 STATEMENT_PARSERS = { 625 TokenType.ALTER: lambda self: self._parse_alter(), 626 TokenType.BEGIN: lambda self: self._parse_transaction(), 627 TokenType.CACHE: lambda self: self._parse_cache(), 628 TokenType.COMMENT: lambda self: self._parse_comment(), 629 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 630 TokenType.CREATE: lambda self: self._parse_create(), 631 TokenType.DELETE: lambda self: self._parse_delete(), 632 TokenType.DESC: lambda self: self._parse_describe(), 633 TokenType.DESCRIBE: lambda self: self._parse_describe(), 634 TokenType.DROP: lambda self: self._parse_drop(), 635 TokenType.INSERT: lambda self: self._parse_insert(), 636 TokenType.KILL: lambda self: self._parse_kill(), 637 TokenType.LOAD: lambda self: self._parse_load(), 638 TokenType.MERGE: lambda self: self._parse_merge(), 639 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 640 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 641 TokenType.REFRESH: lambda self: self._parse_refresh(), 642 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 643 TokenType.SET: lambda self: self._parse_set(), 644 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 645 TokenType.UNCACHE: lambda self: self._parse_uncache(), 646 TokenType.UPDATE: lambda self: self._parse_update(), 647 TokenType.USE: lambda self: self.expression( 648 exp.Use, 649 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 650 this=self._parse_table(schema=False), 651 ), 652 } 653 654 UNARY_PARSERS = { 655 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 656 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 657 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 658 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 659 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 660 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 661 } 662 663 STRING_PARSERS = { 664 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 665 exp.RawString, this=token.text 666 ), 667 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 668 exp.National, this=token.text 669 ), 670 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 671 TokenType.STRING: lambda self, token: self.expression( 672 exp.Literal, this=token.text, is_string=True 673 ), 674 TokenType.UNICODE_STRING: lambda self, token: self.expression( 675 exp.UnicodeString, 676 this=token.text, 677 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 678 ), 679 } 680 681 NUMERIC_PARSERS = { 682 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 683 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 684 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 685 TokenType.NUMBER: lambda self, token: self.expression( 686 exp.Literal, this=token.text, is_string=False 687 ), 688 } 689 690 PRIMARY_PARSERS = { 691 **STRING_PARSERS, 692 **NUMERIC_PARSERS, 693 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 694 TokenType.NULL: lambda self, _: self.expression(exp.Null), 695 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 696 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 697 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 698 TokenType.STAR: lambda self, _: self.expression( 699 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 700 ), 701 } 702 703 PLACEHOLDER_PARSERS = { 704 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 705 TokenType.PARAMETER: lambda self: self._parse_parameter(), 706 TokenType.COLON: lambda self: ( 707 self.expression(exp.Placeholder, this=self._prev.text) 708 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 709 else None 710 ), 711 } 712 713 RANGE_PARSERS = { 714 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 715 TokenType.GLOB: binary_range_parser(exp.Glob), 716 TokenType.ILIKE: binary_range_parser(exp.ILike), 717 TokenType.IN: lambda self, this: self._parse_in(this), 718 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 719 TokenType.IS: lambda self, this: self._parse_is(this), 720 TokenType.LIKE: binary_range_parser(exp.Like), 721 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 722 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 723 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 724 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 725 } 726 727 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 728 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 729 "AUTO": lambda self: self._parse_auto_property(), 730 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 731 "BACKUP": lambda self: self.expression( 732 exp.BackupProperty, this=self._parse_var(any_token=True) 733 ), 734 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 735 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 736 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 737 "CHECKSUM": lambda self: self._parse_checksum(), 738 "CLUSTER BY": lambda self: self._parse_cluster(), 739 "CLUSTERED": lambda self: self._parse_clustered_by(), 740 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 741 exp.CollateProperty, **kwargs 742 ), 743 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 744 "CONTAINS": lambda self: self._parse_contains_property(), 745 "COPY": lambda self: self._parse_copy_property(), 746 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 747 "DEFINER": lambda self: self._parse_definer(), 748 "DETERMINISTIC": lambda self: self.expression( 749 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 750 ), 751 "DISTKEY": lambda self: self._parse_distkey(), 752 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 753 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 754 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 755 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 756 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 757 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 758 "FREESPACE": lambda self: self._parse_freespace(), 759 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 760 "HEAP": lambda self: self.expression(exp.HeapProperty), 761 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 762 "IMMUTABLE": lambda self: self.expression( 763 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 764 ), 765 "INHERITS": lambda self: self.expression( 766 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 767 ), 768 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 769 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 770 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 771 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 772 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 773 "LIKE": lambda self: self._parse_create_like(), 774 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 775 "LOCK": lambda self: self._parse_locking(), 776 "LOCKING": lambda self: self._parse_locking(), 777 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 778 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 779 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 780 "MODIFIES": lambda self: self._parse_modifies_property(), 781 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 782 "NO": lambda self: self._parse_no_property(), 783 "ON": lambda self: self._parse_on_property(), 784 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 785 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 786 "PARTITION": lambda self: self._parse_partitioned_of(), 787 "PARTITION BY": lambda self: self._parse_partitioned_by(), 788 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 789 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 790 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 791 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 792 "READS": lambda self: self._parse_reads_property(), 793 "REMOTE": lambda self: self._parse_remote_with_connection(), 794 "RETURNS": lambda self: self._parse_returns(), 795 "ROW": lambda self: self._parse_row(), 796 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 797 "SAMPLE": lambda self: self.expression( 798 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 799 ), 800 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 801 "SETTINGS": lambda self: self.expression( 802 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 803 ), 804 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 805 "SORTKEY": lambda self: self._parse_sortkey(), 806 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 807 "STABLE": lambda self: self.expression( 808 exp.StabilityProperty, this=exp.Literal.string("STABLE") 809 ), 810 "STORED": lambda self: self._parse_stored(), 811 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 812 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 813 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 814 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 815 "TO": lambda self: self._parse_to_table(), 816 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 817 "TRANSFORM": lambda self: self.expression( 818 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 819 ), 820 "TTL": lambda self: self._parse_ttl(), 821 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 822 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 823 "VOLATILE": lambda self: self._parse_volatile_property(), 824 "WITH": lambda self: self._parse_with_property(), 825 } 826 827 CONSTRAINT_PARSERS = { 828 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 829 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 830 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 831 "CHARACTER SET": lambda self: self.expression( 832 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 833 ), 834 "CHECK": lambda self: self.expression( 835 exp.CheckColumnConstraint, 836 this=self._parse_wrapped(self._parse_conjunction), 837 enforced=self._match_text_seq("ENFORCED"), 838 ), 839 "COLLATE": lambda self: self.expression( 840 exp.CollateColumnConstraint, this=self._parse_var() 841 ), 842 "COMMENT": lambda self: self.expression( 843 exp.CommentColumnConstraint, this=self._parse_string() 844 ), 845 "COMPRESS": lambda self: self._parse_compress(), 846 "CLUSTERED": lambda self: self.expression( 847 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 848 ), 849 "NONCLUSTERED": lambda self: self.expression( 850 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 851 ), 852 "DEFAULT": lambda self: self.expression( 853 exp.DefaultColumnConstraint, this=self._parse_bitwise() 854 ), 855 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 856 "EPHEMERAL": lambda self: self.expression( 857 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 858 ), 859 "EXCLUDE": lambda self: self.expression( 860 exp.ExcludeColumnConstraint, this=self._parse_index_params() 861 ), 862 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 863 "FORMAT": lambda self: self.expression( 864 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 865 ), 866 "GENERATED": lambda self: self._parse_generated_as_identity(), 867 "IDENTITY": lambda self: self._parse_auto_increment(), 868 "INLINE": lambda self: self._parse_inline(), 869 "LIKE": lambda self: self._parse_create_like(), 870 "NOT": lambda self: self._parse_not_constraint(), 871 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 872 "ON": lambda self: ( 873 self._match(TokenType.UPDATE) 874 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 875 ) 876 or self.expression(exp.OnProperty, this=self._parse_id_var()), 877 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 878 "PERIOD": lambda self: self._parse_period_for_system_time(), 879 "PRIMARY KEY": lambda self: self._parse_primary_key(), 880 "REFERENCES": lambda self: self._parse_references(match=False), 881 "TITLE": lambda self: self.expression( 882 exp.TitleColumnConstraint, this=self._parse_var_or_string() 883 ), 884 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 885 "UNIQUE": lambda self: self._parse_unique(), 886 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 887 "WITH": lambda self: self.expression( 888 exp.Properties, expressions=self._parse_wrapped_properties() 889 ), 890 } 891 892 ALTER_PARSERS = { 893 "ADD": lambda self: self._parse_alter_table_add(), 894 "ALTER": lambda self: self._parse_alter_table_alter(), 895 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 896 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 897 "DROP": lambda self: self._parse_alter_table_drop(), 898 "RENAME": lambda self: self._parse_alter_table_rename(), 899 } 900 901 SCHEMA_UNNAMED_CONSTRAINTS = { 902 "CHECK", 903 "EXCLUDE", 904 "FOREIGN KEY", 905 "LIKE", 906 "PERIOD", 907 "PRIMARY KEY", 908 "UNIQUE", 909 } 910 911 NO_PAREN_FUNCTION_PARSERS = { 912 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 913 "CASE": lambda self: self._parse_case(), 914 "IF": lambda self: self._parse_if(), 915 "NEXT": lambda self: self._parse_next_value_for(), 916 } 917 918 INVALID_FUNC_NAME_TOKENS = { 919 TokenType.IDENTIFIER, 920 TokenType.STRING, 921 } 922 923 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 924 925 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 926 927 FUNCTION_PARSERS = { 928 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 929 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 930 "DECODE": lambda self: self._parse_decode(), 931 "EXTRACT": lambda self: self._parse_extract(), 932 "JSON_OBJECT": lambda self: self._parse_json_object(), 933 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 934 "JSON_TABLE": lambda self: self._parse_json_table(), 935 "MATCH": lambda self: self._parse_match_against(), 936 "OPENJSON": lambda self: self._parse_open_json(), 937 "POSITION": lambda self: self._parse_position(), 938 "PREDICT": lambda self: self._parse_predict(), 939 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 940 "STRING_AGG": lambda self: self._parse_string_agg(), 941 "SUBSTRING": lambda self: self._parse_substring(), 942 "TRIM": lambda self: self._parse_trim(), 943 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 944 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 945 } 946 947 QUERY_MODIFIER_PARSERS = { 948 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 949 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 950 TokenType.WHERE: lambda self: ("where", self._parse_where()), 951 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 952 TokenType.HAVING: lambda self: ("having", self._parse_having()), 953 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 954 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 955 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 956 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 957 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 958 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 959 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 960 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 961 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 962 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 963 TokenType.CLUSTER_BY: lambda self: ( 964 "cluster", 965 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 966 ), 967 TokenType.DISTRIBUTE_BY: lambda self: ( 968 "distribute", 969 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 970 ), 971 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 972 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 973 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 974 } 975 976 SET_PARSERS = { 977 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 978 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 979 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 980 "TRANSACTION": lambda self: self._parse_set_transaction(), 981 } 982 983 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 984 985 TYPE_LITERAL_PARSERS = { 986 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 987 } 988 989 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 990 991 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 992 993 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 994 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 995 "ISOLATION": ( 996 ("LEVEL", "REPEATABLE", "READ"), 997 ("LEVEL", "READ", "COMMITTED"), 998 ("LEVEL", "READ", "UNCOMITTED"), 999 ("LEVEL", "SERIALIZABLE"), 1000 ), 1001 "READ": ("WRITE", "ONLY"), 1002 } 1003 1004 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1005 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1006 ) 1007 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1008 1009 CREATE_SEQUENCE: OPTIONS_TYPE = { 1010 "SCALE": ("EXTEND", "NOEXTEND"), 1011 "SHARD": ("EXTEND", "NOEXTEND"), 1012 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1013 **dict.fromkeys( 1014 ( 1015 "SESSION", 1016 "GLOBAL", 1017 "KEEP", 1018 "NOKEEP", 1019 "ORDER", 1020 "NOORDER", 1021 "NOCACHE", 1022 "CYCLE", 1023 "NOCYCLE", 1024 "NOMINVALUE", 1025 "NOMAXVALUE", 1026 "NOSCALE", 1027 "NOSHARD", 1028 ), 1029 tuple(), 1030 ), 1031 } 1032 1033 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1034 1035 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1036 1037 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1038 1039 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1040 1041 CLONE_KEYWORDS = {"CLONE", "COPY"} 1042 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1043 1044 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1045 1046 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1047 1048 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1049 1050 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1051 1052 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1053 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1054 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1055 1056 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1057 1058 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1059 1060 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1061 1062 DISTINCT_TOKENS = {TokenType.DISTINCT} 1063 1064 NULL_TOKENS = {TokenType.NULL} 1065 1066 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1067 1068 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1069 1070 STRICT_CAST = True 1071 1072 PREFIXED_PIVOT_COLUMNS = False 1073 IDENTIFY_PIVOT_STRINGS = False 1074 1075 LOG_DEFAULTS_TO_LN = False 1076 1077 # Whether ADD is present for each column added by ALTER TABLE 1078 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1079 1080 # Whether the table sample clause expects CSV syntax 1081 TABLESAMPLE_CSV = False 1082 1083 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1084 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1085 1086 # Whether the TRIM function expects the characters to trim as its first argument 1087 TRIM_PATTERN_FIRST = False 1088 1089 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1090 STRING_ALIASES = False 1091 1092 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1093 MODIFIERS_ATTACHED_TO_UNION = True 1094 UNION_MODIFIERS = {"order", "limit", "offset"} 1095 1096 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1097 NO_PAREN_IF_COMMANDS = True 1098 1099 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1100 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1101 1102 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1103 # If this is True and '(' is not found, the keyword will be treated as an identifier 1104 VALUES_FOLLOWED_BY_PAREN = True 1105 1106 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1107 SUPPORTS_IMPLICIT_UNNEST = False 1108 1109 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1110 INTERVAL_SPANS = True 1111 1112 # Whether a PARTITION clause can follow a table reference 1113 SUPPORTS_PARTITION_SELECTION = False 1114 1115 __slots__ = ( 1116 "error_level", 1117 "error_message_context", 1118 "max_errors", 1119 "dialect", 1120 "sql", 1121 "errors", 1122 "_tokens", 1123 "_index", 1124 "_curr", 1125 "_next", 1126 "_prev", 1127 "_prev_comments", 1128 ) 1129 1130 # Autofilled 1131 SHOW_TRIE: t.Dict = {} 1132 SET_TRIE: t.Dict = {} 1133 1134 def __init__( 1135 self, 1136 error_level: t.Optional[ErrorLevel] = None, 1137 error_message_context: int = 100, 1138 max_errors: int = 3, 1139 dialect: DialectType = None, 1140 ): 1141 from sqlglot.dialects import Dialect 1142 1143 self.error_level = error_level or ErrorLevel.IMMEDIATE 1144 self.error_message_context = error_message_context 1145 self.max_errors = max_errors 1146 self.dialect = Dialect.get_or_raise(dialect) 1147 self.reset() 1148 1149 def reset(self): 1150 self.sql = "" 1151 self.errors = [] 1152 self._tokens = [] 1153 self._index = 0 1154 self._curr = None 1155 self._next = None 1156 self._prev = None 1157 self._prev_comments = None 1158 1159 def parse( 1160 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1161 ) -> t.List[t.Optional[exp.Expression]]: 1162 """ 1163 Parses a list of tokens and returns a list of syntax trees, one tree 1164 per parsed SQL statement. 1165 1166 Args: 1167 raw_tokens: The list of tokens. 1168 sql: The original SQL string, used to produce helpful debug messages. 1169 1170 Returns: 1171 The list of the produced syntax trees. 1172 """ 1173 return self._parse( 1174 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1175 ) 1176 1177 def parse_into( 1178 self, 1179 expression_types: exp.IntoType, 1180 raw_tokens: t.List[Token], 1181 sql: t.Optional[str] = None, 1182 ) -> t.List[t.Optional[exp.Expression]]: 1183 """ 1184 Parses a list of tokens into a given Expression type. If a collection of Expression 1185 types is given instead, this method will try to parse the token list into each one 1186 of them, stopping at the first for which the parsing succeeds. 1187 1188 Args: 1189 expression_types: The expression type(s) to try and parse the token list into. 1190 raw_tokens: The list of tokens. 1191 sql: The original SQL string, used to produce helpful debug messages. 1192 1193 Returns: 1194 The target Expression. 1195 """ 1196 errors = [] 1197 for expression_type in ensure_list(expression_types): 1198 parser = self.EXPRESSION_PARSERS.get(expression_type) 1199 if not parser: 1200 raise TypeError(f"No parser registered for {expression_type}") 1201 1202 try: 1203 return self._parse(parser, raw_tokens, sql) 1204 except ParseError as e: 1205 e.errors[0]["into_expression"] = expression_type 1206 errors.append(e) 1207 1208 raise ParseError( 1209 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1210 errors=merge_errors(errors), 1211 ) from errors[-1] 1212 1213 def _parse( 1214 self, 1215 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1216 raw_tokens: t.List[Token], 1217 sql: t.Optional[str] = None, 1218 ) -> t.List[t.Optional[exp.Expression]]: 1219 self.reset() 1220 self.sql = sql or "" 1221 1222 total = len(raw_tokens) 1223 chunks: t.List[t.List[Token]] = [[]] 1224 1225 for i, token in enumerate(raw_tokens): 1226 if token.token_type == TokenType.SEMICOLON: 1227 if i < total - 1: 1228 chunks.append([]) 1229 else: 1230 chunks[-1].append(token) 1231 1232 expressions = [] 1233 1234 for tokens in chunks: 1235 self._index = -1 1236 self._tokens = tokens 1237 self._advance() 1238 1239 expressions.append(parse_method(self)) 1240 1241 if self._index < len(self._tokens): 1242 self.raise_error("Invalid expression / Unexpected token") 1243 1244 self.check_errors() 1245 1246 return expressions 1247 1248 def check_errors(self) -> None: 1249 """Logs or raises any found errors, depending on the chosen error level setting.""" 1250 if self.error_level == ErrorLevel.WARN: 1251 for error in self.errors: 1252 logger.error(str(error)) 1253 elif self.error_level == ErrorLevel.RAISE and self.errors: 1254 raise ParseError( 1255 concat_messages(self.errors, self.max_errors), 1256 errors=merge_errors(self.errors), 1257 ) 1258 1259 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1260 """ 1261 Appends an error in the list of recorded errors or raises it, depending on the chosen 1262 error level setting. 1263 """ 1264 token = token or self._curr or self._prev or Token.string("") 1265 start = token.start 1266 end = token.end + 1 1267 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1268 highlight = self.sql[start:end] 1269 end_context = self.sql[end : end + self.error_message_context] 1270 1271 error = ParseError.new( 1272 f"{message}. Line {token.line}, Col: {token.col}.\n" 1273 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1274 description=message, 1275 line=token.line, 1276 col=token.col, 1277 start_context=start_context, 1278 highlight=highlight, 1279 end_context=end_context, 1280 ) 1281 1282 if self.error_level == ErrorLevel.IMMEDIATE: 1283 raise error 1284 1285 self.errors.append(error) 1286 1287 def expression( 1288 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1289 ) -> E: 1290 """ 1291 Creates a new, validated Expression. 1292 1293 Args: 1294 exp_class: The expression class to instantiate. 1295 comments: An optional list of comments to attach to the expression. 1296 kwargs: The arguments to set for the expression along with their respective values. 1297 1298 Returns: 1299 The target expression. 1300 """ 1301 instance = exp_class(**kwargs) 1302 instance.add_comments(comments) if comments else self._add_comments(instance) 1303 return self.validate_expression(instance) 1304 1305 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1306 if expression and self._prev_comments: 1307 expression.add_comments(self._prev_comments) 1308 self._prev_comments = None 1309 1310 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1311 """ 1312 Validates an Expression, making sure that all its mandatory arguments are set. 1313 1314 Args: 1315 expression: The expression to validate. 1316 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1317 1318 Returns: 1319 The validated expression. 1320 """ 1321 if self.error_level != ErrorLevel.IGNORE: 1322 for error_message in expression.error_messages(args): 1323 self.raise_error(error_message) 1324 1325 return expression 1326 1327 def _find_sql(self, start: Token, end: Token) -> str: 1328 return self.sql[start.start : end.end + 1] 1329 1330 def _is_connected(self) -> bool: 1331 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1332 1333 def _advance(self, times: int = 1) -> None: 1334 self._index += times 1335 self._curr = seq_get(self._tokens, self._index) 1336 self._next = seq_get(self._tokens, self._index + 1) 1337 1338 if self._index > 0: 1339 self._prev = self._tokens[self._index - 1] 1340 self._prev_comments = self._prev.comments 1341 else: 1342 self._prev = None 1343 self._prev_comments = None 1344 1345 def _retreat(self, index: int) -> None: 1346 if index != self._index: 1347 self._advance(index - self._index) 1348 1349 def _warn_unsupported(self) -> None: 1350 if len(self._tokens) <= 1: 1351 return 1352 1353 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1354 # interested in emitting a warning for the one being currently processed. 1355 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1356 1357 logger.warning( 1358 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1359 ) 1360 1361 def _parse_command(self) -> exp.Command: 1362 self._warn_unsupported() 1363 return self.expression( 1364 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1365 ) 1366 1367 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1368 """ 1369 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1370 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1371 the parser state accordingly 1372 """ 1373 index = self._index 1374 error_level = self.error_level 1375 1376 self.error_level = ErrorLevel.IMMEDIATE 1377 try: 1378 this = parse_method() 1379 except ParseError: 1380 this = None 1381 finally: 1382 if not this or retreat: 1383 self._retreat(index) 1384 self.error_level = error_level 1385 1386 return this 1387 1388 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1389 start = self._prev 1390 exists = self._parse_exists() if allow_exists else None 1391 1392 self._match(TokenType.ON) 1393 1394 materialized = self._match_text_seq("MATERIALIZED") 1395 kind = self._match_set(self.CREATABLES) and self._prev 1396 if not kind: 1397 return self._parse_as_command(start) 1398 1399 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1400 this = self._parse_user_defined_function(kind=kind.token_type) 1401 elif kind.token_type == TokenType.TABLE: 1402 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1403 elif kind.token_type == TokenType.COLUMN: 1404 this = self._parse_column() 1405 else: 1406 this = self._parse_id_var() 1407 1408 self._match(TokenType.IS) 1409 1410 return self.expression( 1411 exp.Comment, 1412 this=this, 1413 kind=kind.text, 1414 expression=self._parse_string(), 1415 exists=exists, 1416 materialized=materialized, 1417 ) 1418 1419 def _parse_to_table( 1420 self, 1421 ) -> exp.ToTableProperty: 1422 table = self._parse_table_parts(schema=True) 1423 return self.expression(exp.ToTableProperty, this=table) 1424 1425 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1426 def _parse_ttl(self) -> exp.Expression: 1427 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1428 this = self._parse_bitwise() 1429 1430 if self._match_text_seq("DELETE"): 1431 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1432 if self._match_text_seq("RECOMPRESS"): 1433 return self.expression( 1434 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1435 ) 1436 if self._match_text_seq("TO", "DISK"): 1437 return self.expression( 1438 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1439 ) 1440 if self._match_text_seq("TO", "VOLUME"): 1441 return self.expression( 1442 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1443 ) 1444 1445 return this 1446 1447 expressions = self._parse_csv(_parse_ttl_action) 1448 where = self._parse_where() 1449 group = self._parse_group() 1450 1451 aggregates = None 1452 if group and self._match(TokenType.SET): 1453 aggregates = self._parse_csv(self._parse_set_item) 1454 1455 return self.expression( 1456 exp.MergeTreeTTL, 1457 expressions=expressions, 1458 where=where, 1459 group=group, 1460 aggregates=aggregates, 1461 ) 1462 1463 def _parse_statement(self) -> t.Optional[exp.Expression]: 1464 if self._curr is None: 1465 return None 1466 1467 if self._match_set(self.STATEMENT_PARSERS): 1468 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1469 1470 if self._match_set(Tokenizer.COMMANDS): 1471 return self._parse_command() 1472 1473 expression = self._parse_expression() 1474 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1475 return self._parse_query_modifiers(expression) 1476 1477 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1478 start = self._prev 1479 temporary = self._match(TokenType.TEMPORARY) 1480 materialized = self._match_text_seq("MATERIALIZED") 1481 1482 kind = self._match_set(self.CREATABLES) and self._prev.text 1483 if not kind: 1484 return self._parse_as_command(start) 1485 1486 if_exists = exists or self._parse_exists() 1487 table = self._parse_table_parts( 1488 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1489 ) 1490 1491 if self._match(TokenType.L_PAREN, advance=False): 1492 expressions = self._parse_wrapped_csv(self._parse_types) 1493 else: 1494 expressions = None 1495 1496 return self.expression( 1497 exp.Drop, 1498 comments=start.comments, 1499 exists=if_exists, 1500 this=table, 1501 expressions=expressions, 1502 kind=kind, 1503 temporary=temporary, 1504 materialized=materialized, 1505 cascade=self._match_text_seq("CASCADE"), 1506 constraints=self._match_text_seq("CONSTRAINTS"), 1507 purge=self._match_text_seq("PURGE"), 1508 ) 1509 1510 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1511 return ( 1512 self._match_text_seq("IF") 1513 and (not not_ or self._match(TokenType.NOT)) 1514 and self._match(TokenType.EXISTS) 1515 ) 1516 1517 def _parse_create(self) -> exp.Create | exp.Command: 1518 # Note: this can't be None because we've matched a statement parser 1519 start = self._prev 1520 comments = self._prev_comments 1521 1522 replace = ( 1523 start.token_type == TokenType.REPLACE 1524 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1525 or self._match_pair(TokenType.OR, TokenType.ALTER) 1526 ) 1527 1528 unique = self._match(TokenType.UNIQUE) 1529 1530 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1531 self._advance() 1532 1533 properties = None 1534 create_token = self._match_set(self.CREATABLES) and self._prev 1535 1536 if not create_token: 1537 # exp.Properties.Location.POST_CREATE 1538 properties = self._parse_properties() 1539 create_token = self._match_set(self.CREATABLES) and self._prev 1540 1541 if not properties or not create_token: 1542 return self._parse_as_command(start) 1543 1544 exists = self._parse_exists(not_=True) 1545 this = None 1546 expression: t.Optional[exp.Expression] = None 1547 indexes = None 1548 no_schema_binding = None 1549 begin = None 1550 end = None 1551 clone = None 1552 1553 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1554 nonlocal properties 1555 if properties and temp_props: 1556 properties.expressions.extend(temp_props.expressions) 1557 elif temp_props: 1558 properties = temp_props 1559 1560 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1561 this = self._parse_user_defined_function(kind=create_token.token_type) 1562 1563 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1564 extend_props(self._parse_properties()) 1565 1566 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1567 1568 if not expression: 1569 if self._match(TokenType.COMMAND): 1570 expression = self._parse_as_command(self._prev) 1571 else: 1572 begin = self._match(TokenType.BEGIN) 1573 return_ = self._match_text_seq("RETURN") 1574 1575 if self._match(TokenType.STRING, advance=False): 1576 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1577 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1578 expression = self._parse_string() 1579 extend_props(self._parse_properties()) 1580 else: 1581 expression = self._parse_statement() 1582 1583 end = self._match_text_seq("END") 1584 1585 if return_: 1586 expression = self.expression(exp.Return, this=expression) 1587 elif create_token.token_type == TokenType.INDEX: 1588 this = self._parse_index(index=self._parse_id_var()) 1589 elif create_token.token_type in self.DB_CREATABLES: 1590 table_parts = self._parse_table_parts( 1591 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1592 ) 1593 1594 # exp.Properties.Location.POST_NAME 1595 self._match(TokenType.COMMA) 1596 extend_props(self._parse_properties(before=True)) 1597 1598 this = self._parse_schema(this=table_parts) 1599 1600 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1601 extend_props(self._parse_properties()) 1602 1603 self._match(TokenType.ALIAS) 1604 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1605 # exp.Properties.Location.POST_ALIAS 1606 extend_props(self._parse_properties()) 1607 1608 if create_token.token_type == TokenType.SEQUENCE: 1609 expression = self._parse_types() 1610 extend_props(self._parse_properties()) 1611 else: 1612 expression = self._parse_ddl_select() 1613 1614 if create_token.token_type == TokenType.TABLE: 1615 # exp.Properties.Location.POST_EXPRESSION 1616 extend_props(self._parse_properties()) 1617 1618 indexes = [] 1619 while True: 1620 index = self._parse_index() 1621 1622 # exp.Properties.Location.POST_INDEX 1623 extend_props(self._parse_properties()) 1624 1625 if not index: 1626 break 1627 else: 1628 self._match(TokenType.COMMA) 1629 indexes.append(index) 1630 elif create_token.token_type == TokenType.VIEW: 1631 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1632 no_schema_binding = True 1633 1634 shallow = self._match_text_seq("SHALLOW") 1635 1636 if self._match_texts(self.CLONE_KEYWORDS): 1637 copy = self._prev.text.lower() == "copy" 1638 clone = self.expression( 1639 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1640 ) 1641 1642 if self._curr: 1643 return self._parse_as_command(start) 1644 1645 return self.expression( 1646 exp.Create, 1647 comments=comments, 1648 this=this, 1649 kind=create_token.text.upper(), 1650 replace=replace, 1651 unique=unique, 1652 expression=expression, 1653 exists=exists, 1654 properties=properties, 1655 indexes=indexes, 1656 no_schema_binding=no_schema_binding, 1657 begin=begin, 1658 end=end, 1659 clone=clone, 1660 ) 1661 1662 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1663 seq = exp.SequenceProperties() 1664 1665 options = [] 1666 index = self._index 1667 1668 while self._curr: 1669 if self._match_text_seq("INCREMENT"): 1670 self._match_text_seq("BY") 1671 self._match_text_seq("=") 1672 seq.set("increment", self._parse_term()) 1673 elif self._match_text_seq("MINVALUE"): 1674 seq.set("minvalue", self._parse_term()) 1675 elif self._match_text_seq("MAXVALUE"): 1676 seq.set("maxvalue", self._parse_term()) 1677 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1678 self._match_text_seq("=") 1679 seq.set("start", self._parse_term()) 1680 elif self._match_text_seq("CACHE"): 1681 # T-SQL allows empty CACHE which is initialized dynamically 1682 seq.set("cache", self._parse_number() or True) 1683 elif self._match_text_seq("OWNED", "BY"): 1684 # "OWNED BY NONE" is the default 1685 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1686 else: 1687 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1688 if opt: 1689 options.append(opt) 1690 else: 1691 break 1692 1693 seq.set("options", options if options else None) 1694 return None if self._index == index else seq 1695 1696 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1697 # only used for teradata currently 1698 self._match(TokenType.COMMA) 1699 1700 kwargs = { 1701 "no": self._match_text_seq("NO"), 1702 "dual": self._match_text_seq("DUAL"), 1703 "before": self._match_text_seq("BEFORE"), 1704 "default": self._match_text_seq("DEFAULT"), 1705 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1706 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1707 "after": self._match_text_seq("AFTER"), 1708 "minimum": self._match_texts(("MIN", "MINIMUM")), 1709 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1710 } 1711 1712 if self._match_texts(self.PROPERTY_PARSERS): 1713 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1714 try: 1715 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1716 except TypeError: 1717 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1718 1719 return None 1720 1721 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1722 return self._parse_wrapped_csv(self._parse_property) 1723 1724 def _parse_property(self) -> t.Optional[exp.Expression]: 1725 if self._match_texts(self.PROPERTY_PARSERS): 1726 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1727 1728 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1729 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1730 1731 if self._match_text_seq("COMPOUND", "SORTKEY"): 1732 return self._parse_sortkey(compound=True) 1733 1734 if self._match_text_seq("SQL", "SECURITY"): 1735 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1736 1737 index = self._index 1738 key = self._parse_column() 1739 1740 if not self._match(TokenType.EQ): 1741 self._retreat(index) 1742 return self._parse_sequence_properties() 1743 1744 return self.expression( 1745 exp.Property, 1746 this=key.to_dot() if isinstance(key, exp.Column) else key, 1747 value=self._parse_bitwise() or self._parse_var(any_token=True), 1748 ) 1749 1750 def _parse_stored(self) -> exp.FileFormatProperty: 1751 self._match(TokenType.ALIAS) 1752 1753 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1754 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1755 1756 return self.expression( 1757 exp.FileFormatProperty, 1758 this=( 1759 self.expression( 1760 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1761 ) 1762 if input_format or output_format 1763 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1764 ), 1765 ) 1766 1767 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1768 self._match(TokenType.EQ) 1769 self._match(TokenType.ALIAS) 1770 field = self._parse_field() 1771 if isinstance(field, exp.Identifier) and not field.quoted: 1772 field = exp.var(field) 1773 1774 return self.expression(exp_class, this=field, **kwargs) 1775 1776 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1777 properties = [] 1778 while True: 1779 if before: 1780 prop = self._parse_property_before() 1781 else: 1782 prop = self._parse_property() 1783 if not prop: 1784 break 1785 for p in ensure_list(prop): 1786 properties.append(p) 1787 1788 if properties: 1789 return self.expression(exp.Properties, expressions=properties) 1790 1791 return None 1792 1793 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1794 return self.expression( 1795 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1796 ) 1797 1798 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1799 if self._index >= 2: 1800 pre_volatile_token = self._tokens[self._index - 2] 1801 else: 1802 pre_volatile_token = None 1803 1804 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1805 return exp.VolatileProperty() 1806 1807 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1808 1809 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1810 self._match_pair(TokenType.EQ, TokenType.ON) 1811 1812 prop = self.expression(exp.WithSystemVersioningProperty) 1813 if self._match(TokenType.L_PAREN): 1814 self._match_text_seq("HISTORY_TABLE", "=") 1815 prop.set("this", self._parse_table_parts()) 1816 1817 if self._match(TokenType.COMMA): 1818 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1819 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1820 1821 self._match_r_paren() 1822 1823 return prop 1824 1825 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1826 if self._match(TokenType.L_PAREN, advance=False): 1827 return self._parse_wrapped_properties() 1828 1829 if self._match_text_seq("JOURNAL"): 1830 return self._parse_withjournaltable() 1831 1832 if self._match_texts(self.VIEW_ATTRIBUTES): 1833 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1834 1835 if self._match_text_seq("DATA"): 1836 return self._parse_withdata(no=False) 1837 elif self._match_text_seq("NO", "DATA"): 1838 return self._parse_withdata(no=True) 1839 1840 if not self._next: 1841 return None 1842 1843 return self._parse_withisolatedloading() 1844 1845 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1846 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1847 self._match(TokenType.EQ) 1848 1849 user = self._parse_id_var() 1850 self._match(TokenType.PARAMETER) 1851 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1852 1853 if not user or not host: 1854 return None 1855 1856 return exp.DefinerProperty(this=f"{user}@{host}") 1857 1858 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1859 self._match(TokenType.TABLE) 1860 self._match(TokenType.EQ) 1861 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1862 1863 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1864 return self.expression(exp.LogProperty, no=no) 1865 1866 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1867 return self.expression(exp.JournalProperty, **kwargs) 1868 1869 def _parse_checksum(self) -> exp.ChecksumProperty: 1870 self._match(TokenType.EQ) 1871 1872 on = None 1873 if self._match(TokenType.ON): 1874 on = True 1875 elif self._match_text_seq("OFF"): 1876 on = False 1877 1878 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1879 1880 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1881 return self.expression( 1882 exp.Cluster, 1883 expressions=( 1884 self._parse_wrapped_csv(self._parse_ordered) 1885 if wrapped 1886 else self._parse_csv(self._parse_ordered) 1887 ), 1888 ) 1889 1890 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1891 self._match_text_seq("BY") 1892 1893 self._match_l_paren() 1894 expressions = self._parse_csv(self._parse_column) 1895 self._match_r_paren() 1896 1897 if self._match_text_seq("SORTED", "BY"): 1898 self._match_l_paren() 1899 sorted_by = self._parse_csv(self._parse_ordered) 1900 self._match_r_paren() 1901 else: 1902 sorted_by = None 1903 1904 self._match(TokenType.INTO) 1905 buckets = self._parse_number() 1906 self._match_text_seq("BUCKETS") 1907 1908 return self.expression( 1909 exp.ClusteredByProperty, 1910 expressions=expressions, 1911 sorted_by=sorted_by, 1912 buckets=buckets, 1913 ) 1914 1915 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1916 if not self._match_text_seq("GRANTS"): 1917 self._retreat(self._index - 1) 1918 return None 1919 1920 return self.expression(exp.CopyGrantsProperty) 1921 1922 def _parse_freespace(self) -> exp.FreespaceProperty: 1923 self._match(TokenType.EQ) 1924 return self.expression( 1925 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1926 ) 1927 1928 def _parse_mergeblockratio( 1929 self, no: bool = False, default: bool = False 1930 ) -> exp.MergeBlockRatioProperty: 1931 if self._match(TokenType.EQ): 1932 return self.expression( 1933 exp.MergeBlockRatioProperty, 1934 this=self._parse_number(), 1935 percent=self._match(TokenType.PERCENT), 1936 ) 1937 1938 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1939 1940 def _parse_datablocksize( 1941 self, 1942 default: t.Optional[bool] = None, 1943 minimum: t.Optional[bool] = None, 1944 maximum: t.Optional[bool] = None, 1945 ) -> exp.DataBlocksizeProperty: 1946 self._match(TokenType.EQ) 1947 size = self._parse_number() 1948 1949 units = None 1950 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1951 units = self._prev.text 1952 1953 return self.expression( 1954 exp.DataBlocksizeProperty, 1955 size=size, 1956 units=units, 1957 default=default, 1958 minimum=minimum, 1959 maximum=maximum, 1960 ) 1961 1962 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1963 self._match(TokenType.EQ) 1964 always = self._match_text_seq("ALWAYS") 1965 manual = self._match_text_seq("MANUAL") 1966 never = self._match_text_seq("NEVER") 1967 default = self._match_text_seq("DEFAULT") 1968 1969 autotemp = None 1970 if self._match_text_seq("AUTOTEMP"): 1971 autotemp = self._parse_schema() 1972 1973 return self.expression( 1974 exp.BlockCompressionProperty, 1975 always=always, 1976 manual=manual, 1977 never=never, 1978 default=default, 1979 autotemp=autotemp, 1980 ) 1981 1982 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 1983 index = self._index 1984 no = self._match_text_seq("NO") 1985 concurrent = self._match_text_seq("CONCURRENT") 1986 1987 if not self._match_text_seq("ISOLATED", "LOADING"): 1988 self._retreat(index) 1989 return None 1990 1991 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 1992 return self.expression( 1993 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 1994 ) 1995 1996 def _parse_locking(self) -> exp.LockingProperty: 1997 if self._match(TokenType.TABLE): 1998 kind = "TABLE" 1999 elif self._match(TokenType.VIEW): 2000 kind = "VIEW" 2001 elif self._match(TokenType.ROW): 2002 kind = "ROW" 2003 elif self._match_text_seq("DATABASE"): 2004 kind = "DATABASE" 2005 else: 2006 kind = None 2007 2008 if kind in ("DATABASE", "TABLE", "VIEW"): 2009 this = self._parse_table_parts() 2010 else: 2011 this = None 2012 2013 if self._match(TokenType.FOR): 2014 for_or_in = "FOR" 2015 elif self._match(TokenType.IN): 2016 for_or_in = "IN" 2017 else: 2018 for_or_in = None 2019 2020 if self._match_text_seq("ACCESS"): 2021 lock_type = "ACCESS" 2022 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2023 lock_type = "EXCLUSIVE" 2024 elif self._match_text_seq("SHARE"): 2025 lock_type = "SHARE" 2026 elif self._match_text_seq("READ"): 2027 lock_type = "READ" 2028 elif self._match_text_seq("WRITE"): 2029 lock_type = "WRITE" 2030 elif self._match_text_seq("CHECKSUM"): 2031 lock_type = "CHECKSUM" 2032 else: 2033 lock_type = None 2034 2035 override = self._match_text_seq("OVERRIDE") 2036 2037 return self.expression( 2038 exp.LockingProperty, 2039 this=this, 2040 kind=kind, 2041 for_or_in=for_or_in, 2042 lock_type=lock_type, 2043 override=override, 2044 ) 2045 2046 def _parse_partition_by(self) -> t.List[exp.Expression]: 2047 if self._match(TokenType.PARTITION_BY): 2048 return self._parse_csv(self._parse_conjunction) 2049 return [] 2050 2051 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2052 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2053 if self._match_text_seq("MINVALUE"): 2054 return exp.var("MINVALUE") 2055 if self._match_text_seq("MAXVALUE"): 2056 return exp.var("MAXVALUE") 2057 return self._parse_bitwise() 2058 2059 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2060 expression = None 2061 from_expressions = None 2062 to_expressions = None 2063 2064 if self._match(TokenType.IN): 2065 this = self._parse_wrapped_csv(self._parse_bitwise) 2066 elif self._match(TokenType.FROM): 2067 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2068 self._match_text_seq("TO") 2069 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2070 elif self._match_text_seq("WITH", "(", "MODULUS"): 2071 this = self._parse_number() 2072 self._match_text_seq(",", "REMAINDER") 2073 expression = self._parse_number() 2074 self._match_r_paren() 2075 else: 2076 self.raise_error("Failed to parse partition bound spec.") 2077 2078 return self.expression( 2079 exp.PartitionBoundSpec, 2080 this=this, 2081 expression=expression, 2082 from_expressions=from_expressions, 2083 to_expressions=to_expressions, 2084 ) 2085 2086 # https://www.postgresql.org/docs/current/sql-createtable.html 2087 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2088 if not self._match_text_seq("OF"): 2089 self._retreat(self._index - 1) 2090 return None 2091 2092 this = self._parse_table(schema=True) 2093 2094 if self._match(TokenType.DEFAULT): 2095 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2096 elif self._match_text_seq("FOR", "VALUES"): 2097 expression = self._parse_partition_bound_spec() 2098 else: 2099 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2100 2101 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2102 2103 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2104 self._match(TokenType.EQ) 2105 return self.expression( 2106 exp.PartitionedByProperty, 2107 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2108 ) 2109 2110 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2111 if self._match_text_seq("AND", "STATISTICS"): 2112 statistics = True 2113 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2114 statistics = False 2115 else: 2116 statistics = None 2117 2118 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2119 2120 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2121 if self._match_text_seq("SQL"): 2122 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2123 return None 2124 2125 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2126 if self._match_text_seq("SQL", "DATA"): 2127 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2128 return None 2129 2130 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2131 if self._match_text_seq("PRIMARY", "INDEX"): 2132 return exp.NoPrimaryIndexProperty() 2133 if self._match_text_seq("SQL"): 2134 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2135 return None 2136 2137 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2138 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2139 return exp.OnCommitProperty() 2140 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2141 return exp.OnCommitProperty(delete=True) 2142 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2143 2144 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2145 if self._match_text_seq("SQL", "DATA"): 2146 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2147 return None 2148 2149 def _parse_distkey(self) -> exp.DistKeyProperty: 2150 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2151 2152 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2153 table = self._parse_table(schema=True) 2154 2155 options = [] 2156 while self._match_texts(("INCLUDING", "EXCLUDING")): 2157 this = self._prev.text.upper() 2158 2159 id_var = self._parse_id_var() 2160 if not id_var: 2161 return None 2162 2163 options.append( 2164 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2165 ) 2166 2167 return self.expression(exp.LikeProperty, this=table, expressions=options) 2168 2169 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2170 return self.expression( 2171 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2172 ) 2173 2174 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2175 self._match(TokenType.EQ) 2176 return self.expression( 2177 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2178 ) 2179 2180 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2181 self._match_text_seq("WITH", "CONNECTION") 2182 return self.expression( 2183 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2184 ) 2185 2186 def _parse_returns(self) -> exp.ReturnsProperty: 2187 value: t.Optional[exp.Expression] 2188 is_table = self._match(TokenType.TABLE) 2189 2190 if is_table: 2191 if self._match(TokenType.LT): 2192 value = self.expression( 2193 exp.Schema, 2194 this="TABLE", 2195 expressions=self._parse_csv(self._parse_struct_types), 2196 ) 2197 if not self._match(TokenType.GT): 2198 self.raise_error("Expecting >") 2199 else: 2200 value = self._parse_schema(exp.var("TABLE")) 2201 else: 2202 value = self._parse_types() 2203 2204 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2205 2206 def _parse_describe(self) -> exp.Describe: 2207 kind = self._match_set(self.CREATABLES) and self._prev.text 2208 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2209 if not self._match_set(self.ID_VAR_TOKENS, advance=False): 2210 style = None 2211 self._retreat(self._index - 1) 2212 this = self._parse_table(schema=True) 2213 properties = self._parse_properties() 2214 expressions = properties.expressions if properties else None 2215 return self.expression( 2216 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2217 ) 2218 2219 def _parse_insert(self) -> exp.Insert: 2220 comments = ensure_list(self._prev_comments) 2221 hint = self._parse_hint() 2222 overwrite = self._match(TokenType.OVERWRITE) 2223 ignore = self._match(TokenType.IGNORE) 2224 local = self._match_text_seq("LOCAL") 2225 alternative = None 2226 is_function = None 2227 2228 if self._match_text_seq("DIRECTORY"): 2229 this: t.Optional[exp.Expression] = self.expression( 2230 exp.Directory, 2231 this=self._parse_var_or_string(), 2232 local=local, 2233 row_format=self._parse_row_format(match_row=True), 2234 ) 2235 else: 2236 if self._match(TokenType.OR): 2237 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2238 2239 self._match(TokenType.INTO) 2240 comments += ensure_list(self._prev_comments) 2241 self._match(TokenType.TABLE) 2242 is_function = self._match(TokenType.FUNCTION) 2243 2244 this = ( 2245 self._parse_table(schema=True, parse_partition=True) 2246 if not is_function 2247 else self._parse_function() 2248 ) 2249 2250 returning = self._parse_returning() 2251 2252 return self.expression( 2253 exp.Insert, 2254 comments=comments, 2255 hint=hint, 2256 is_function=is_function, 2257 this=this, 2258 stored=self._match_text_seq("STORED") and self._parse_stored(), 2259 by_name=self._match_text_seq("BY", "NAME"), 2260 exists=self._parse_exists(), 2261 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2262 and self._parse_conjunction(), 2263 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2264 conflict=self._parse_on_conflict(), 2265 returning=returning or self._parse_returning(), 2266 overwrite=overwrite, 2267 alternative=alternative, 2268 ignore=ignore, 2269 ) 2270 2271 def _parse_kill(self) -> exp.Kill: 2272 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2273 2274 return self.expression( 2275 exp.Kill, 2276 this=self._parse_primary(), 2277 kind=kind, 2278 ) 2279 2280 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2281 conflict = self._match_text_seq("ON", "CONFLICT") 2282 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2283 2284 if not conflict and not duplicate: 2285 return None 2286 2287 conflict_keys = None 2288 constraint = None 2289 2290 if conflict: 2291 if self._match_text_seq("ON", "CONSTRAINT"): 2292 constraint = self._parse_id_var() 2293 elif self._match(TokenType.L_PAREN): 2294 conflict_keys = self._parse_csv(self._parse_id_var) 2295 self._match_r_paren() 2296 2297 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2298 if self._prev.token_type == TokenType.UPDATE: 2299 self._match(TokenType.SET) 2300 expressions = self._parse_csv(self._parse_equality) 2301 else: 2302 expressions = None 2303 2304 return self.expression( 2305 exp.OnConflict, 2306 duplicate=duplicate, 2307 expressions=expressions, 2308 action=action, 2309 conflict_keys=conflict_keys, 2310 constraint=constraint, 2311 ) 2312 2313 def _parse_returning(self) -> t.Optional[exp.Returning]: 2314 if not self._match(TokenType.RETURNING): 2315 return None 2316 return self.expression( 2317 exp.Returning, 2318 expressions=self._parse_csv(self._parse_expression), 2319 into=self._match(TokenType.INTO) and self._parse_table_part(), 2320 ) 2321 2322 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2323 if not self._match(TokenType.FORMAT): 2324 return None 2325 return self._parse_row_format() 2326 2327 def _parse_row_format( 2328 self, match_row: bool = False 2329 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2330 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2331 return None 2332 2333 if self._match_text_seq("SERDE"): 2334 this = self._parse_string() 2335 2336 serde_properties = None 2337 if self._match(TokenType.SERDE_PROPERTIES): 2338 serde_properties = self.expression( 2339 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2340 ) 2341 2342 return self.expression( 2343 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2344 ) 2345 2346 self._match_text_seq("DELIMITED") 2347 2348 kwargs = {} 2349 2350 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2351 kwargs["fields"] = self._parse_string() 2352 if self._match_text_seq("ESCAPED", "BY"): 2353 kwargs["escaped"] = self._parse_string() 2354 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2355 kwargs["collection_items"] = self._parse_string() 2356 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2357 kwargs["map_keys"] = self._parse_string() 2358 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2359 kwargs["lines"] = self._parse_string() 2360 if self._match_text_seq("NULL", "DEFINED", "AS"): 2361 kwargs["null"] = self._parse_string() 2362 2363 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2364 2365 def _parse_load(self) -> exp.LoadData | exp.Command: 2366 if self._match_text_seq("DATA"): 2367 local = self._match_text_seq("LOCAL") 2368 self._match_text_seq("INPATH") 2369 inpath = self._parse_string() 2370 overwrite = self._match(TokenType.OVERWRITE) 2371 self._match_pair(TokenType.INTO, TokenType.TABLE) 2372 2373 return self.expression( 2374 exp.LoadData, 2375 this=self._parse_table(schema=True), 2376 local=local, 2377 overwrite=overwrite, 2378 inpath=inpath, 2379 partition=self._parse_partition(), 2380 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2381 serde=self._match_text_seq("SERDE") and self._parse_string(), 2382 ) 2383 return self._parse_as_command(self._prev) 2384 2385 def _parse_delete(self) -> exp.Delete: 2386 # This handles MySQL's "Multiple-Table Syntax" 2387 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2388 tables = None 2389 comments = self._prev_comments 2390 if not self._match(TokenType.FROM, advance=False): 2391 tables = self._parse_csv(self._parse_table) or None 2392 2393 returning = self._parse_returning() 2394 2395 return self.expression( 2396 exp.Delete, 2397 comments=comments, 2398 tables=tables, 2399 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2400 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2401 where=self._parse_where(), 2402 returning=returning or self._parse_returning(), 2403 limit=self._parse_limit(), 2404 ) 2405 2406 def _parse_update(self) -> exp.Update: 2407 comments = self._prev_comments 2408 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2409 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2410 returning = self._parse_returning() 2411 return self.expression( 2412 exp.Update, 2413 comments=comments, 2414 **{ # type: ignore 2415 "this": this, 2416 "expressions": expressions, 2417 "from": self._parse_from(joins=True), 2418 "where": self._parse_where(), 2419 "returning": returning or self._parse_returning(), 2420 "order": self._parse_order(), 2421 "limit": self._parse_limit(), 2422 }, 2423 ) 2424 2425 def _parse_uncache(self) -> exp.Uncache: 2426 if not self._match(TokenType.TABLE): 2427 self.raise_error("Expecting TABLE after UNCACHE") 2428 2429 return self.expression( 2430 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2431 ) 2432 2433 def _parse_cache(self) -> exp.Cache: 2434 lazy = self._match_text_seq("LAZY") 2435 self._match(TokenType.TABLE) 2436 table = self._parse_table(schema=True) 2437 2438 options = [] 2439 if self._match_text_seq("OPTIONS"): 2440 self._match_l_paren() 2441 k = self._parse_string() 2442 self._match(TokenType.EQ) 2443 v = self._parse_string() 2444 options = [k, v] 2445 self._match_r_paren() 2446 2447 self._match(TokenType.ALIAS) 2448 return self.expression( 2449 exp.Cache, 2450 this=table, 2451 lazy=lazy, 2452 options=options, 2453 expression=self._parse_select(nested=True), 2454 ) 2455 2456 def _parse_partition(self) -> t.Optional[exp.Partition]: 2457 if not self._match(TokenType.PARTITION): 2458 return None 2459 2460 return self.expression( 2461 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2462 ) 2463 2464 def _parse_value(self) -> exp.Tuple: 2465 if self._match(TokenType.L_PAREN): 2466 expressions = self._parse_csv(self._parse_expression) 2467 self._match_r_paren() 2468 return self.expression(exp.Tuple, expressions=expressions) 2469 2470 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2471 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2472 2473 def _parse_projections(self) -> t.List[exp.Expression]: 2474 return self._parse_expressions() 2475 2476 def _parse_select( 2477 self, 2478 nested: bool = False, 2479 table: bool = False, 2480 parse_subquery_alias: bool = True, 2481 parse_set_operation: bool = True, 2482 ) -> t.Optional[exp.Expression]: 2483 cte = self._parse_with() 2484 2485 if cte: 2486 this = self._parse_statement() 2487 2488 if not this: 2489 self.raise_error("Failed to parse any statement following CTE") 2490 return cte 2491 2492 if "with" in this.arg_types: 2493 this.set("with", cte) 2494 else: 2495 self.raise_error(f"{this.key} does not support CTE") 2496 this = cte 2497 2498 return this 2499 2500 # duckdb supports leading with FROM x 2501 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2502 2503 if self._match(TokenType.SELECT): 2504 comments = self._prev_comments 2505 2506 hint = self._parse_hint() 2507 all_ = self._match(TokenType.ALL) 2508 distinct = self._match_set(self.DISTINCT_TOKENS) 2509 2510 kind = ( 2511 self._match(TokenType.ALIAS) 2512 and self._match_texts(("STRUCT", "VALUE")) 2513 and self._prev.text.upper() 2514 ) 2515 2516 if distinct: 2517 distinct = self.expression( 2518 exp.Distinct, 2519 on=self._parse_value() if self._match(TokenType.ON) else None, 2520 ) 2521 2522 if all_ and distinct: 2523 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2524 2525 limit = self._parse_limit(top=True) 2526 projections = self._parse_projections() 2527 2528 this = self.expression( 2529 exp.Select, 2530 kind=kind, 2531 hint=hint, 2532 distinct=distinct, 2533 expressions=projections, 2534 limit=limit, 2535 ) 2536 this.comments = comments 2537 2538 into = self._parse_into() 2539 if into: 2540 this.set("into", into) 2541 2542 if not from_: 2543 from_ = self._parse_from() 2544 2545 if from_: 2546 this.set("from", from_) 2547 2548 this = self._parse_query_modifiers(this) 2549 elif (table or nested) and self._match(TokenType.L_PAREN): 2550 if self._match(TokenType.PIVOT): 2551 this = self._parse_simplified_pivot() 2552 elif self._match(TokenType.FROM): 2553 this = exp.select("*").from_( 2554 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2555 ) 2556 else: 2557 this = ( 2558 self._parse_table() 2559 if table 2560 else self._parse_select(nested=True, parse_set_operation=False) 2561 ) 2562 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2563 2564 self._match_r_paren() 2565 2566 # We return early here so that the UNION isn't attached to the subquery by the 2567 # following call to _parse_set_operations, but instead becomes the parent node 2568 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2569 elif self._match(TokenType.VALUES, advance=False): 2570 this = self._parse_derived_table_values() 2571 elif from_: 2572 this = exp.select("*").from_(from_.this, copy=False) 2573 else: 2574 this = None 2575 2576 if parse_set_operation: 2577 return self._parse_set_operations(this) 2578 return this 2579 2580 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2581 if not skip_with_token and not self._match(TokenType.WITH): 2582 return None 2583 2584 comments = self._prev_comments 2585 recursive = self._match(TokenType.RECURSIVE) 2586 2587 expressions = [] 2588 while True: 2589 expressions.append(self._parse_cte()) 2590 2591 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2592 break 2593 else: 2594 self._match(TokenType.WITH) 2595 2596 return self.expression( 2597 exp.With, comments=comments, expressions=expressions, recursive=recursive 2598 ) 2599 2600 def _parse_cte(self) -> exp.CTE: 2601 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2602 if not alias or not alias.this: 2603 self.raise_error("Expected CTE to have alias") 2604 2605 self._match(TokenType.ALIAS) 2606 2607 if self._match_text_seq("NOT", "MATERIALIZED"): 2608 materialized = False 2609 elif self._match_text_seq("MATERIALIZED"): 2610 materialized = True 2611 else: 2612 materialized = None 2613 2614 return self.expression( 2615 exp.CTE, 2616 this=self._parse_wrapped(self._parse_statement), 2617 alias=alias, 2618 materialized=materialized, 2619 ) 2620 2621 def _parse_table_alias( 2622 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2623 ) -> t.Optional[exp.TableAlias]: 2624 any_token = self._match(TokenType.ALIAS) 2625 alias = ( 2626 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2627 or self._parse_string_as_identifier() 2628 ) 2629 2630 index = self._index 2631 if self._match(TokenType.L_PAREN): 2632 columns = self._parse_csv(self._parse_function_parameter) 2633 self._match_r_paren() if columns else self._retreat(index) 2634 else: 2635 columns = None 2636 2637 if not alias and not columns: 2638 return None 2639 2640 return self.expression(exp.TableAlias, this=alias, columns=columns) 2641 2642 def _parse_subquery( 2643 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2644 ) -> t.Optional[exp.Subquery]: 2645 if not this: 2646 return None 2647 2648 return self.expression( 2649 exp.Subquery, 2650 this=this, 2651 pivots=self._parse_pivots(), 2652 alias=self._parse_table_alias() if parse_alias else None, 2653 ) 2654 2655 def _implicit_unnests_to_explicit(self, this: E) -> E: 2656 from sqlglot.optimizer.normalize_identifiers import ( 2657 normalize_identifiers as _norm, 2658 ) 2659 2660 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2661 for i, join in enumerate(this.args.get("joins") or []): 2662 table = join.this 2663 normalized_table = table.copy() 2664 normalized_table.meta["maybe_column"] = True 2665 normalized_table = _norm(normalized_table, dialect=self.dialect) 2666 2667 if isinstance(table, exp.Table) and not join.args.get("on"): 2668 if normalized_table.parts[0].name in refs: 2669 table_as_column = table.to_column() 2670 unnest = exp.Unnest(expressions=[table_as_column]) 2671 2672 # Table.to_column creates a parent Alias node that we want to convert to 2673 # a TableAlias and attach to the Unnest, so it matches the parser's output 2674 if isinstance(table.args.get("alias"), exp.TableAlias): 2675 table_as_column.replace(table_as_column.this) 2676 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2677 2678 table.replace(unnest) 2679 2680 refs.add(normalized_table.alias_or_name) 2681 2682 return this 2683 2684 def _parse_query_modifiers( 2685 self, this: t.Optional[exp.Expression] 2686 ) -> t.Optional[exp.Expression]: 2687 if isinstance(this, (exp.Query, exp.Table)): 2688 for join in self._parse_joins(): 2689 this.append("joins", join) 2690 for lateral in iter(self._parse_lateral, None): 2691 this.append("laterals", lateral) 2692 2693 while True: 2694 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2695 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2696 key, expression = parser(self) 2697 2698 if expression: 2699 this.set(key, expression) 2700 if key == "limit": 2701 offset = expression.args.pop("offset", None) 2702 2703 if offset: 2704 offset = exp.Offset(expression=offset) 2705 this.set("offset", offset) 2706 2707 limit_by_expressions = expression.expressions 2708 expression.set("expressions", None) 2709 offset.set("expressions", limit_by_expressions) 2710 continue 2711 break 2712 2713 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2714 this = self._implicit_unnests_to_explicit(this) 2715 2716 return this 2717 2718 def _parse_hint(self) -> t.Optional[exp.Hint]: 2719 if self._match(TokenType.HINT): 2720 hints = [] 2721 for hint in iter( 2722 lambda: self._parse_csv( 2723 lambda: self._parse_function() or self._parse_var(upper=True) 2724 ), 2725 [], 2726 ): 2727 hints.extend(hint) 2728 2729 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2730 self.raise_error("Expected */ after HINT") 2731 2732 return self.expression(exp.Hint, expressions=hints) 2733 2734 return None 2735 2736 def _parse_into(self) -> t.Optional[exp.Into]: 2737 if not self._match(TokenType.INTO): 2738 return None 2739 2740 temp = self._match(TokenType.TEMPORARY) 2741 unlogged = self._match_text_seq("UNLOGGED") 2742 self._match(TokenType.TABLE) 2743 2744 return self.expression( 2745 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2746 ) 2747 2748 def _parse_from( 2749 self, joins: bool = False, skip_from_token: bool = False 2750 ) -> t.Optional[exp.From]: 2751 if not skip_from_token and not self._match(TokenType.FROM): 2752 return None 2753 2754 return self.expression( 2755 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2756 ) 2757 2758 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2759 return self.expression( 2760 exp.MatchRecognizeMeasure, 2761 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2762 this=self._parse_expression(), 2763 ) 2764 2765 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2766 if not self._match(TokenType.MATCH_RECOGNIZE): 2767 return None 2768 2769 self._match_l_paren() 2770 2771 partition = self._parse_partition_by() 2772 order = self._parse_order() 2773 2774 measures = ( 2775 self._parse_csv(self._parse_match_recognize_measure) 2776 if self._match_text_seq("MEASURES") 2777 else None 2778 ) 2779 2780 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2781 rows = exp.var("ONE ROW PER MATCH") 2782 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2783 text = "ALL ROWS PER MATCH" 2784 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2785 text += " SHOW EMPTY MATCHES" 2786 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2787 text += " OMIT EMPTY MATCHES" 2788 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2789 text += " WITH UNMATCHED ROWS" 2790 rows = exp.var(text) 2791 else: 2792 rows = None 2793 2794 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2795 text = "AFTER MATCH SKIP" 2796 if self._match_text_seq("PAST", "LAST", "ROW"): 2797 text += " PAST LAST ROW" 2798 elif self._match_text_seq("TO", "NEXT", "ROW"): 2799 text += " TO NEXT ROW" 2800 elif self._match_text_seq("TO", "FIRST"): 2801 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2802 elif self._match_text_seq("TO", "LAST"): 2803 text += f" TO LAST {self._advance_any().text}" # type: ignore 2804 after = exp.var(text) 2805 else: 2806 after = None 2807 2808 if self._match_text_seq("PATTERN"): 2809 self._match_l_paren() 2810 2811 if not self._curr: 2812 self.raise_error("Expecting )", self._curr) 2813 2814 paren = 1 2815 start = self._curr 2816 2817 while self._curr and paren > 0: 2818 if self._curr.token_type == TokenType.L_PAREN: 2819 paren += 1 2820 if self._curr.token_type == TokenType.R_PAREN: 2821 paren -= 1 2822 2823 end = self._prev 2824 self._advance() 2825 2826 if paren > 0: 2827 self.raise_error("Expecting )", self._curr) 2828 2829 pattern = exp.var(self._find_sql(start, end)) 2830 else: 2831 pattern = None 2832 2833 define = ( 2834 self._parse_csv(self._parse_name_as_expression) 2835 if self._match_text_seq("DEFINE") 2836 else None 2837 ) 2838 2839 self._match_r_paren() 2840 2841 return self.expression( 2842 exp.MatchRecognize, 2843 partition_by=partition, 2844 order=order, 2845 measures=measures, 2846 rows=rows, 2847 after=after, 2848 pattern=pattern, 2849 define=define, 2850 alias=self._parse_table_alias(), 2851 ) 2852 2853 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2854 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2855 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2856 cross_apply = False 2857 2858 if cross_apply is not None: 2859 this = self._parse_select(table=True) 2860 view = None 2861 outer = None 2862 elif self._match(TokenType.LATERAL): 2863 this = self._parse_select(table=True) 2864 view = self._match(TokenType.VIEW) 2865 outer = self._match(TokenType.OUTER) 2866 else: 2867 return None 2868 2869 if not this: 2870 this = ( 2871 self._parse_unnest() 2872 or self._parse_function() 2873 or self._parse_id_var(any_token=False) 2874 ) 2875 2876 while self._match(TokenType.DOT): 2877 this = exp.Dot( 2878 this=this, 2879 expression=self._parse_function() or self._parse_id_var(any_token=False), 2880 ) 2881 2882 if view: 2883 table = self._parse_id_var(any_token=False) 2884 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2885 table_alias: t.Optional[exp.TableAlias] = self.expression( 2886 exp.TableAlias, this=table, columns=columns 2887 ) 2888 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2889 # We move the alias from the lateral's child node to the lateral itself 2890 table_alias = this.args["alias"].pop() 2891 else: 2892 table_alias = self._parse_table_alias() 2893 2894 return self.expression( 2895 exp.Lateral, 2896 this=this, 2897 view=view, 2898 outer=outer, 2899 alias=table_alias, 2900 cross_apply=cross_apply, 2901 ) 2902 2903 def _parse_join_parts( 2904 self, 2905 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2906 return ( 2907 self._match_set(self.JOIN_METHODS) and self._prev, 2908 self._match_set(self.JOIN_SIDES) and self._prev, 2909 self._match_set(self.JOIN_KINDS) and self._prev, 2910 ) 2911 2912 def _parse_join( 2913 self, skip_join_token: bool = False, parse_bracket: bool = False 2914 ) -> t.Optional[exp.Join]: 2915 if self._match(TokenType.COMMA): 2916 return self.expression(exp.Join, this=self._parse_table()) 2917 2918 index = self._index 2919 method, side, kind = self._parse_join_parts() 2920 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2921 join = self._match(TokenType.JOIN) 2922 2923 if not skip_join_token and not join: 2924 self._retreat(index) 2925 kind = None 2926 method = None 2927 side = None 2928 2929 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2930 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2931 2932 if not skip_join_token and not join and not outer_apply and not cross_apply: 2933 return None 2934 2935 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2936 2937 if method: 2938 kwargs["method"] = method.text 2939 if side: 2940 kwargs["side"] = side.text 2941 if kind: 2942 kwargs["kind"] = kind.text 2943 if hint: 2944 kwargs["hint"] = hint 2945 2946 if self._match(TokenType.MATCH_CONDITION): 2947 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2948 2949 if self._match(TokenType.ON): 2950 kwargs["on"] = self._parse_conjunction() 2951 elif self._match(TokenType.USING): 2952 kwargs["using"] = self._parse_wrapped_id_vars() 2953 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2954 kind and kind.token_type == TokenType.CROSS 2955 ): 2956 index = self._index 2957 joins: t.Optional[list] = list(self._parse_joins()) 2958 2959 if joins and self._match(TokenType.ON): 2960 kwargs["on"] = self._parse_conjunction() 2961 elif joins and self._match(TokenType.USING): 2962 kwargs["using"] = self._parse_wrapped_id_vars() 2963 else: 2964 joins = None 2965 self._retreat(index) 2966 2967 kwargs["this"].set("joins", joins if joins else None) 2968 2969 comments = [c for token in (method, side, kind) if token for c in token.comments] 2970 return self.expression(exp.Join, comments=comments, **kwargs) 2971 2972 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2973 this = self._parse_conjunction() 2974 2975 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2976 return this 2977 2978 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2979 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2980 2981 return this 2982 2983 def _parse_index_params(self) -> exp.IndexParameters: 2984 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2985 2986 if self._match(TokenType.L_PAREN, advance=False): 2987 columns = self._parse_wrapped_csv(self._parse_with_operator) 2988 else: 2989 columns = None 2990 2991 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2992 partition_by = self._parse_partition_by() 2993 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2994 tablespace = ( 2995 self._parse_var(any_token=True) 2996 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2997 else None 2998 ) 2999 where = self._parse_where() 3000 3001 return self.expression( 3002 exp.IndexParameters, 3003 using=using, 3004 columns=columns, 3005 include=include, 3006 partition_by=partition_by, 3007 where=where, 3008 with_storage=with_storage, 3009 tablespace=tablespace, 3010 ) 3011 3012 def _parse_index( 3013 self, 3014 index: t.Optional[exp.Expression] = None, 3015 ) -> t.Optional[exp.Index]: 3016 if index: 3017 unique = None 3018 primary = None 3019 amp = None 3020 3021 self._match(TokenType.ON) 3022 self._match(TokenType.TABLE) # hive 3023 table = self._parse_table_parts(schema=True) 3024 else: 3025 unique = self._match(TokenType.UNIQUE) 3026 primary = self._match_text_seq("PRIMARY") 3027 amp = self._match_text_seq("AMP") 3028 3029 if not self._match(TokenType.INDEX): 3030 return None 3031 3032 index = self._parse_id_var() 3033 table = None 3034 3035 params = self._parse_index_params() 3036 3037 return self.expression( 3038 exp.Index, 3039 this=index, 3040 table=table, 3041 unique=unique, 3042 primary=primary, 3043 amp=amp, 3044 params=params, 3045 ) 3046 3047 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3048 hints: t.List[exp.Expression] = [] 3049 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3050 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3051 hints.append( 3052 self.expression( 3053 exp.WithTableHint, 3054 expressions=self._parse_csv( 3055 lambda: self._parse_function() or self._parse_var(any_token=True) 3056 ), 3057 ) 3058 ) 3059 self._match_r_paren() 3060 else: 3061 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3062 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3063 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3064 3065 self._match_texts(("INDEX", "KEY")) 3066 if self._match(TokenType.FOR): 3067 hint.set("target", self._advance_any() and self._prev.text.upper()) 3068 3069 hint.set("expressions", self._parse_wrapped_id_vars()) 3070 hints.append(hint) 3071 3072 return hints or None 3073 3074 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3075 return ( 3076 (not schema and self._parse_function(optional_parens=False)) 3077 or self._parse_id_var(any_token=False) 3078 or self._parse_string_as_identifier() 3079 or self._parse_placeholder() 3080 ) 3081 3082 def _parse_table_parts( 3083 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3084 ) -> exp.Table: 3085 catalog = None 3086 db = None 3087 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3088 3089 while self._match(TokenType.DOT): 3090 if catalog: 3091 # This allows nesting the table in arbitrarily many dot expressions if needed 3092 table = self.expression( 3093 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3094 ) 3095 else: 3096 catalog = db 3097 db = table 3098 # "" used for tsql FROM a..b case 3099 table = self._parse_table_part(schema=schema) or "" 3100 3101 if ( 3102 wildcard 3103 and self._is_connected() 3104 and (isinstance(table, exp.Identifier) or not table) 3105 and self._match(TokenType.STAR) 3106 ): 3107 if isinstance(table, exp.Identifier): 3108 table.args["this"] += "*" 3109 else: 3110 table = exp.Identifier(this="*") 3111 3112 if is_db_reference: 3113 catalog = db 3114 db = table 3115 table = None 3116 3117 if not table and not is_db_reference: 3118 self.raise_error(f"Expected table name but got {self._curr}") 3119 if not db and is_db_reference: 3120 self.raise_error(f"Expected database name but got {self._curr}") 3121 3122 return self.expression( 3123 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3124 ) 3125 3126 def _parse_table( 3127 self, 3128 schema: bool = False, 3129 joins: bool = False, 3130 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3131 parse_bracket: bool = False, 3132 is_db_reference: bool = False, 3133 parse_partition: bool = False, 3134 ) -> t.Optional[exp.Expression]: 3135 lateral = self._parse_lateral() 3136 if lateral: 3137 return lateral 3138 3139 unnest = self._parse_unnest() 3140 if unnest: 3141 return unnest 3142 3143 values = self._parse_derived_table_values() 3144 if values: 3145 return values 3146 3147 subquery = self._parse_select(table=True) 3148 if subquery: 3149 if not subquery.args.get("pivots"): 3150 subquery.set("pivots", self._parse_pivots()) 3151 return subquery 3152 3153 bracket = parse_bracket and self._parse_bracket(None) 3154 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3155 3156 only = self._match(TokenType.ONLY) 3157 3158 this = t.cast( 3159 exp.Expression, 3160 bracket 3161 or self._parse_bracket( 3162 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3163 ), 3164 ) 3165 3166 if only: 3167 this.set("only", only) 3168 3169 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3170 self._match_text_seq("*") 3171 3172 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3173 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3174 this.set("partition", self._parse_partition()) 3175 3176 if schema: 3177 return self._parse_schema(this=this) 3178 3179 version = self._parse_version() 3180 3181 if version: 3182 this.set("version", version) 3183 3184 if self.dialect.ALIAS_POST_TABLESAMPLE: 3185 table_sample = self._parse_table_sample() 3186 3187 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3188 if alias: 3189 this.set("alias", alias) 3190 3191 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3192 return self.expression( 3193 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3194 ) 3195 3196 this.set("hints", self._parse_table_hints()) 3197 3198 if not this.args.get("pivots"): 3199 this.set("pivots", self._parse_pivots()) 3200 3201 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3202 table_sample = self._parse_table_sample() 3203 3204 if table_sample: 3205 table_sample.set("this", this) 3206 this = table_sample 3207 3208 if joins: 3209 for join in self._parse_joins(): 3210 this.append("joins", join) 3211 3212 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3213 this.set("ordinality", True) 3214 this.set("alias", self._parse_table_alias()) 3215 3216 return this 3217 3218 def _parse_version(self) -> t.Optional[exp.Version]: 3219 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3220 this = "TIMESTAMP" 3221 elif self._match(TokenType.VERSION_SNAPSHOT): 3222 this = "VERSION" 3223 else: 3224 return None 3225 3226 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3227 kind = self._prev.text.upper() 3228 start = self._parse_bitwise() 3229 self._match_texts(("TO", "AND")) 3230 end = self._parse_bitwise() 3231 expression: t.Optional[exp.Expression] = self.expression( 3232 exp.Tuple, expressions=[start, end] 3233 ) 3234 elif self._match_text_seq("CONTAINED", "IN"): 3235 kind = "CONTAINED IN" 3236 expression = self.expression( 3237 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3238 ) 3239 elif self._match(TokenType.ALL): 3240 kind = "ALL" 3241 expression = None 3242 else: 3243 self._match_text_seq("AS", "OF") 3244 kind = "AS OF" 3245 expression = self._parse_type() 3246 3247 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3248 3249 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3250 if not self._match(TokenType.UNNEST): 3251 return None 3252 3253 expressions = self._parse_wrapped_csv(self._parse_equality) 3254 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3255 3256 alias = self._parse_table_alias() if with_alias else None 3257 3258 if alias: 3259 if self.dialect.UNNEST_COLUMN_ONLY: 3260 if alias.args.get("columns"): 3261 self.raise_error("Unexpected extra column alias in unnest.") 3262 3263 alias.set("columns", [alias.this]) 3264 alias.set("this", None) 3265 3266 columns = alias.args.get("columns") or [] 3267 if offset and len(expressions) < len(columns): 3268 offset = columns.pop() 3269 3270 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3271 self._match(TokenType.ALIAS) 3272 offset = self._parse_id_var( 3273 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3274 ) or exp.to_identifier("offset") 3275 3276 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3277 3278 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3279 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3280 if not is_derived and not self._match_text_seq("VALUES"): 3281 return None 3282 3283 expressions = self._parse_csv(self._parse_value) 3284 alias = self._parse_table_alias() 3285 3286 if is_derived: 3287 self._match_r_paren() 3288 3289 return self.expression( 3290 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3291 ) 3292 3293 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3294 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3295 as_modifier and self._match_text_seq("USING", "SAMPLE") 3296 ): 3297 return None 3298 3299 bucket_numerator = None 3300 bucket_denominator = None 3301 bucket_field = None 3302 percent = None 3303 size = None 3304 seed = None 3305 3306 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3307 matched_l_paren = self._match(TokenType.L_PAREN) 3308 3309 if self.TABLESAMPLE_CSV: 3310 num = None 3311 expressions = self._parse_csv(self._parse_primary) 3312 else: 3313 expressions = None 3314 num = ( 3315 self._parse_factor() 3316 if self._match(TokenType.NUMBER, advance=False) 3317 else self._parse_primary() or self._parse_placeholder() 3318 ) 3319 3320 if self._match_text_seq("BUCKET"): 3321 bucket_numerator = self._parse_number() 3322 self._match_text_seq("OUT", "OF") 3323 bucket_denominator = bucket_denominator = self._parse_number() 3324 self._match(TokenType.ON) 3325 bucket_field = self._parse_field() 3326 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3327 percent = num 3328 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3329 size = num 3330 else: 3331 percent = num 3332 3333 if matched_l_paren: 3334 self._match_r_paren() 3335 3336 if self._match(TokenType.L_PAREN): 3337 method = self._parse_var(upper=True) 3338 seed = self._match(TokenType.COMMA) and self._parse_number() 3339 self._match_r_paren() 3340 elif self._match_texts(("SEED", "REPEATABLE")): 3341 seed = self._parse_wrapped(self._parse_number) 3342 3343 return self.expression( 3344 exp.TableSample, 3345 expressions=expressions, 3346 method=method, 3347 bucket_numerator=bucket_numerator, 3348 bucket_denominator=bucket_denominator, 3349 bucket_field=bucket_field, 3350 percent=percent, 3351 size=size, 3352 seed=seed, 3353 ) 3354 3355 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3356 return list(iter(self._parse_pivot, None)) or None 3357 3358 def _parse_joins(self) -> t.Iterator[exp.Join]: 3359 return iter(self._parse_join, None) 3360 3361 # https://duckdb.org/docs/sql/statements/pivot 3362 def _parse_simplified_pivot(self) -> exp.Pivot: 3363 def _parse_on() -> t.Optional[exp.Expression]: 3364 this = self._parse_bitwise() 3365 return self._parse_in(this) if self._match(TokenType.IN) else this 3366 3367 this = self._parse_table() 3368 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3369 using = self._match(TokenType.USING) and self._parse_csv( 3370 lambda: self._parse_alias(self._parse_function()) 3371 ) 3372 group = self._parse_group() 3373 return self.expression( 3374 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3375 ) 3376 3377 def _parse_pivot_in(self) -> exp.In: 3378 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3379 this = self._parse_conjunction() 3380 3381 self._match(TokenType.ALIAS) 3382 alias = self._parse_field() 3383 if alias: 3384 return self.expression(exp.PivotAlias, this=this, alias=alias) 3385 3386 return this 3387 3388 value = self._parse_column() 3389 3390 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3391 self.raise_error("Expecting IN (") 3392 3393 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3394 3395 self._match_r_paren() 3396 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3397 3398 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3399 index = self._index 3400 include_nulls = None 3401 3402 if self._match(TokenType.PIVOT): 3403 unpivot = False 3404 elif self._match(TokenType.UNPIVOT): 3405 unpivot = True 3406 3407 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3408 if self._match_text_seq("INCLUDE", "NULLS"): 3409 include_nulls = True 3410 elif self._match_text_seq("EXCLUDE", "NULLS"): 3411 include_nulls = False 3412 else: 3413 return None 3414 3415 expressions = [] 3416 3417 if not self._match(TokenType.L_PAREN): 3418 self._retreat(index) 3419 return None 3420 3421 if unpivot: 3422 expressions = self._parse_csv(self._parse_column) 3423 else: 3424 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3425 3426 if not expressions: 3427 self.raise_error("Failed to parse PIVOT's aggregation list") 3428 3429 if not self._match(TokenType.FOR): 3430 self.raise_error("Expecting FOR") 3431 3432 field = self._parse_pivot_in() 3433 3434 self._match_r_paren() 3435 3436 pivot = self.expression( 3437 exp.Pivot, 3438 expressions=expressions, 3439 field=field, 3440 unpivot=unpivot, 3441 include_nulls=include_nulls, 3442 ) 3443 3444 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3445 pivot.set("alias", self._parse_table_alias()) 3446 3447 if not unpivot: 3448 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3449 3450 columns: t.List[exp.Expression] = [] 3451 for fld in pivot.args["field"].expressions: 3452 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3453 for name in names: 3454 if self.PREFIXED_PIVOT_COLUMNS: 3455 name = f"{name}_{field_name}" if name else field_name 3456 else: 3457 name = f"{field_name}_{name}" if name else field_name 3458 3459 columns.append(exp.to_identifier(name)) 3460 3461 pivot.set("columns", columns) 3462 3463 return pivot 3464 3465 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3466 return [agg.alias for agg in aggregations] 3467 3468 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3469 if not skip_where_token and not self._match(TokenType.PREWHERE): 3470 return None 3471 3472 return self.expression( 3473 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3474 ) 3475 3476 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3477 if not skip_where_token and not self._match(TokenType.WHERE): 3478 return None 3479 3480 return self.expression( 3481 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3482 ) 3483 3484 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3485 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3486 return None 3487 3488 elements: t.Dict[str, t.Any] = defaultdict(list) 3489 3490 if self._match(TokenType.ALL): 3491 elements["all"] = True 3492 elif self._match(TokenType.DISTINCT): 3493 elements["all"] = False 3494 3495 while True: 3496 expressions = self._parse_csv(self._parse_conjunction) 3497 if expressions: 3498 elements["expressions"].extend(expressions) 3499 3500 grouping_sets = self._parse_grouping_sets() 3501 if grouping_sets: 3502 elements["grouping_sets"].extend(grouping_sets) 3503 3504 rollup = None 3505 cube = None 3506 totals = None 3507 3508 index = self._index 3509 with_ = self._match(TokenType.WITH) 3510 if self._match(TokenType.ROLLUP): 3511 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3512 elements["rollup"].extend(ensure_list(rollup)) 3513 3514 if self._match(TokenType.CUBE): 3515 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3516 elements["cube"].extend(ensure_list(cube)) 3517 3518 if self._match_text_seq("TOTALS"): 3519 totals = True 3520 elements["totals"] = True # type: ignore 3521 3522 if not (grouping_sets or rollup or cube or totals): 3523 if with_: 3524 self._retreat(index) 3525 break 3526 3527 return self.expression(exp.Group, **elements) # type: ignore 3528 3529 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3530 if not self._match(TokenType.GROUPING_SETS): 3531 return None 3532 3533 return self._parse_wrapped_csv(self._parse_grouping_set) 3534 3535 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3536 if self._match(TokenType.L_PAREN): 3537 grouping_set = self._parse_csv(self._parse_column) 3538 self._match_r_paren() 3539 return self.expression(exp.Tuple, expressions=grouping_set) 3540 3541 return self._parse_column() 3542 3543 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3544 if not skip_having_token and not self._match(TokenType.HAVING): 3545 return None 3546 return self.expression(exp.Having, this=self._parse_conjunction()) 3547 3548 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3549 if not self._match(TokenType.QUALIFY): 3550 return None 3551 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3552 3553 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3554 if skip_start_token: 3555 start = None 3556 elif self._match(TokenType.START_WITH): 3557 start = self._parse_conjunction() 3558 else: 3559 return None 3560 3561 self._match(TokenType.CONNECT_BY) 3562 nocycle = self._match_text_seq("NOCYCLE") 3563 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3564 exp.Prior, this=self._parse_bitwise() 3565 ) 3566 connect = self._parse_conjunction() 3567 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3568 3569 if not start and self._match(TokenType.START_WITH): 3570 start = self._parse_conjunction() 3571 3572 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3573 3574 def _parse_name_as_expression(self) -> exp.Alias: 3575 return self.expression( 3576 exp.Alias, 3577 alias=self._parse_id_var(any_token=True), 3578 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3579 ) 3580 3581 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3582 if self._match_text_seq("INTERPOLATE"): 3583 return self._parse_wrapped_csv(self._parse_name_as_expression) 3584 return None 3585 3586 def _parse_order( 3587 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3588 ) -> t.Optional[exp.Expression]: 3589 siblings = None 3590 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3591 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3592 return this 3593 3594 siblings = True 3595 3596 return self.expression( 3597 exp.Order, 3598 this=this, 3599 expressions=self._parse_csv(self._parse_ordered), 3600 interpolate=self._parse_interpolate(), 3601 siblings=siblings, 3602 ) 3603 3604 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3605 if not self._match(token): 3606 return None 3607 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3608 3609 def _parse_ordered( 3610 self, parse_method: t.Optional[t.Callable] = None 3611 ) -> t.Optional[exp.Ordered]: 3612 this = parse_method() if parse_method else self._parse_conjunction() 3613 if not this: 3614 return None 3615 3616 asc = self._match(TokenType.ASC) 3617 desc = self._match(TokenType.DESC) or (asc and False) 3618 3619 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3620 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3621 3622 nulls_first = is_nulls_first or False 3623 explicitly_null_ordered = is_nulls_first or is_nulls_last 3624 3625 if ( 3626 not explicitly_null_ordered 3627 and ( 3628 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3629 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3630 ) 3631 and self.dialect.NULL_ORDERING != "nulls_are_last" 3632 ): 3633 nulls_first = True 3634 3635 if self._match_text_seq("WITH", "FILL"): 3636 with_fill = self.expression( 3637 exp.WithFill, 3638 **{ # type: ignore 3639 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3640 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3641 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3642 }, 3643 ) 3644 else: 3645 with_fill = None 3646 3647 return self.expression( 3648 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3649 ) 3650 3651 def _parse_limit( 3652 self, 3653 this: t.Optional[exp.Expression] = None, 3654 top: bool = False, 3655 skip_limit_token: bool = False, 3656 ) -> t.Optional[exp.Expression]: 3657 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3658 comments = self._prev_comments 3659 if top: 3660 limit_paren = self._match(TokenType.L_PAREN) 3661 expression = self._parse_term() if limit_paren else self._parse_number() 3662 3663 if limit_paren: 3664 self._match_r_paren() 3665 else: 3666 expression = self._parse_term() 3667 3668 if self._match(TokenType.COMMA): 3669 offset = expression 3670 expression = self._parse_term() 3671 else: 3672 offset = None 3673 3674 limit_exp = self.expression( 3675 exp.Limit, 3676 this=this, 3677 expression=expression, 3678 offset=offset, 3679 comments=comments, 3680 expressions=self._parse_limit_by(), 3681 ) 3682 3683 return limit_exp 3684 3685 if self._match(TokenType.FETCH): 3686 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3687 direction = self._prev.text.upper() if direction else "FIRST" 3688 3689 count = self._parse_field(tokens=self.FETCH_TOKENS) 3690 percent = self._match(TokenType.PERCENT) 3691 3692 self._match_set((TokenType.ROW, TokenType.ROWS)) 3693 3694 only = self._match_text_seq("ONLY") 3695 with_ties = self._match_text_seq("WITH", "TIES") 3696 3697 if only and with_ties: 3698 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3699 3700 return self.expression( 3701 exp.Fetch, 3702 direction=direction, 3703 count=count, 3704 percent=percent, 3705 with_ties=with_ties, 3706 ) 3707 3708 return this 3709 3710 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3711 if not self._match(TokenType.OFFSET): 3712 return this 3713 3714 count = self._parse_term() 3715 self._match_set((TokenType.ROW, TokenType.ROWS)) 3716 3717 return self.expression( 3718 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3719 ) 3720 3721 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3722 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3723 3724 def _parse_locks(self) -> t.List[exp.Lock]: 3725 locks = [] 3726 while True: 3727 if self._match_text_seq("FOR", "UPDATE"): 3728 update = True 3729 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3730 "LOCK", "IN", "SHARE", "MODE" 3731 ): 3732 update = False 3733 else: 3734 break 3735 3736 expressions = None 3737 if self._match_text_seq("OF"): 3738 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3739 3740 wait: t.Optional[bool | exp.Expression] = None 3741 if self._match_text_seq("NOWAIT"): 3742 wait = True 3743 elif self._match_text_seq("WAIT"): 3744 wait = self._parse_primary() 3745 elif self._match_text_seq("SKIP", "LOCKED"): 3746 wait = False 3747 3748 locks.append( 3749 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3750 ) 3751 3752 return locks 3753 3754 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3755 while this and self._match_set(self.SET_OPERATIONS): 3756 token_type = self._prev.token_type 3757 3758 if token_type == TokenType.UNION: 3759 operation = exp.Union 3760 elif token_type == TokenType.EXCEPT: 3761 operation = exp.Except 3762 else: 3763 operation = exp.Intersect 3764 3765 comments = self._prev.comments 3766 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3767 by_name = self._match_text_seq("BY", "NAME") 3768 expression = self._parse_select(nested=True, parse_set_operation=False) 3769 3770 this = self.expression( 3771 operation, 3772 comments=comments, 3773 this=this, 3774 distinct=distinct, 3775 by_name=by_name, 3776 expression=expression, 3777 ) 3778 3779 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3780 expression = this.expression 3781 3782 if expression: 3783 for arg in self.UNION_MODIFIERS: 3784 expr = expression.args.get(arg) 3785 if expr: 3786 this.set(arg, expr.pop()) 3787 3788 return this 3789 3790 def _parse_expression(self) -> t.Optional[exp.Expression]: 3791 return self._parse_alias(self._parse_conjunction()) 3792 3793 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3794 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3795 3796 def _parse_equality(self) -> t.Optional[exp.Expression]: 3797 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3798 3799 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3800 return self._parse_tokens(self._parse_range, self.COMPARISON) 3801 3802 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3803 this = this or self._parse_bitwise() 3804 negate = self._match(TokenType.NOT) 3805 3806 if self._match_set(self.RANGE_PARSERS): 3807 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3808 if not expression: 3809 return this 3810 3811 this = expression 3812 elif self._match(TokenType.ISNULL): 3813 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3814 3815 # Postgres supports ISNULL and NOTNULL for conditions. 3816 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3817 if self._match(TokenType.NOTNULL): 3818 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3819 this = self.expression(exp.Not, this=this) 3820 3821 if negate: 3822 this = self.expression(exp.Not, this=this) 3823 3824 if self._match(TokenType.IS): 3825 this = self._parse_is(this) 3826 3827 return this 3828 3829 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3830 index = self._index - 1 3831 negate = self._match(TokenType.NOT) 3832 3833 if self._match_text_seq("DISTINCT", "FROM"): 3834 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3835 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3836 3837 expression = self._parse_null() or self._parse_boolean() 3838 if not expression: 3839 self._retreat(index) 3840 return None 3841 3842 this = self.expression(exp.Is, this=this, expression=expression) 3843 return self.expression(exp.Not, this=this) if negate else this 3844 3845 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3846 unnest = self._parse_unnest(with_alias=False) 3847 if unnest: 3848 this = self.expression(exp.In, this=this, unnest=unnest) 3849 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3850 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3851 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3852 3853 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3854 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3855 else: 3856 this = self.expression(exp.In, this=this, expressions=expressions) 3857 3858 if matched_l_paren: 3859 self._match_r_paren(this) 3860 elif not self._match(TokenType.R_BRACKET, expression=this): 3861 self.raise_error("Expecting ]") 3862 else: 3863 this = self.expression(exp.In, this=this, field=self._parse_field()) 3864 3865 return this 3866 3867 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3868 low = self._parse_bitwise() 3869 self._match(TokenType.AND) 3870 high = self._parse_bitwise() 3871 return self.expression(exp.Between, this=this, low=low, high=high) 3872 3873 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3874 if not self._match(TokenType.ESCAPE): 3875 return this 3876 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3877 3878 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3879 index = self._index 3880 3881 if not self._match(TokenType.INTERVAL) and match_interval: 3882 return None 3883 3884 if self._match(TokenType.STRING, advance=False): 3885 this = self._parse_primary() 3886 else: 3887 this = self._parse_term() 3888 3889 if not this or ( 3890 isinstance(this, exp.Column) 3891 and not this.table 3892 and not this.this.quoted 3893 and this.name.upper() == "IS" 3894 ): 3895 self._retreat(index) 3896 return None 3897 3898 unit = self._parse_function() or ( 3899 not self._match(TokenType.ALIAS, advance=False) 3900 and self._parse_var(any_token=True, upper=True) 3901 ) 3902 3903 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3904 # each INTERVAL expression into this canonical form so it's easy to transpile 3905 if this and this.is_number: 3906 this = exp.Literal.string(this.name) 3907 elif this and this.is_string: 3908 parts = this.name.split() 3909 3910 if len(parts) == 2: 3911 if unit: 3912 # This is not actually a unit, it's something else (e.g. a "window side") 3913 unit = None 3914 self._retreat(self._index - 1) 3915 3916 this = exp.Literal.string(parts[0]) 3917 unit = self.expression(exp.Var, this=parts[1].upper()) 3918 3919 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3920 unit = self.expression( 3921 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3922 ) 3923 3924 return self.expression(exp.Interval, this=this, unit=unit) 3925 3926 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3927 this = self._parse_term() 3928 3929 while True: 3930 if self._match_set(self.BITWISE): 3931 this = self.expression( 3932 self.BITWISE[self._prev.token_type], 3933 this=this, 3934 expression=self._parse_term(), 3935 ) 3936 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3937 this = self.expression( 3938 exp.DPipe, 3939 this=this, 3940 expression=self._parse_term(), 3941 safe=not self.dialect.STRICT_STRING_CONCAT, 3942 ) 3943 elif self._match(TokenType.DQMARK): 3944 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3945 elif self._match_pair(TokenType.LT, TokenType.LT): 3946 this = self.expression( 3947 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3948 ) 3949 elif self._match_pair(TokenType.GT, TokenType.GT): 3950 this = self.expression( 3951 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3952 ) 3953 else: 3954 break 3955 3956 return this 3957 3958 def _parse_term(self) -> t.Optional[exp.Expression]: 3959 return self._parse_tokens(self._parse_factor, self.TERM) 3960 3961 def _parse_factor(self) -> t.Optional[exp.Expression]: 3962 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3963 this = parse_method() 3964 3965 while self._match_set(self.FACTOR): 3966 this = self.expression( 3967 self.FACTOR[self._prev.token_type], 3968 this=this, 3969 comments=self._prev_comments, 3970 expression=parse_method(), 3971 ) 3972 if isinstance(this, exp.Div): 3973 this.args["typed"] = self.dialect.TYPED_DIVISION 3974 this.args["safe"] = self.dialect.SAFE_DIVISION 3975 3976 return this 3977 3978 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3979 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3980 3981 def _parse_unary(self) -> t.Optional[exp.Expression]: 3982 if self._match_set(self.UNARY_PARSERS): 3983 return self.UNARY_PARSERS[self._prev.token_type](self) 3984 return self._parse_at_time_zone(self._parse_type()) 3985 3986 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3987 interval = parse_interval and self._parse_interval() 3988 if interval: 3989 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3990 while True: 3991 index = self._index 3992 self._match(TokenType.PLUS) 3993 3994 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3995 self._retreat(index) 3996 break 3997 3998 interval = self.expression( # type: ignore 3999 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4000 ) 4001 4002 return interval 4003 4004 index = self._index 4005 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4006 this = self._parse_column() 4007 4008 if data_type: 4009 if isinstance(this, exp.Literal): 4010 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4011 if parser: 4012 return parser(self, this, data_type) 4013 return self.expression(exp.Cast, this=this, to=data_type) 4014 if not data_type.expressions: 4015 self._retreat(index) 4016 return self._parse_column() 4017 return self._parse_column_ops(data_type) 4018 4019 return this and self._parse_column_ops(this) 4020 4021 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4022 this = self._parse_type() 4023 if not this: 4024 return None 4025 4026 if isinstance(this, exp.Column) and not this.table: 4027 this = exp.var(this.name.upper()) 4028 4029 return self.expression( 4030 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4031 ) 4032 4033 def _parse_types( 4034 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4035 ) -> t.Optional[exp.Expression]: 4036 index = self._index 4037 4038 prefix = self._match_text_seq("SYSUDTLIB", ".") 4039 4040 if not self._match_set(self.TYPE_TOKENS): 4041 identifier = allow_identifiers and self._parse_id_var( 4042 any_token=False, tokens=(TokenType.VAR,) 4043 ) 4044 if identifier: 4045 tokens = self.dialect.tokenize(identifier.name) 4046 4047 if len(tokens) != 1: 4048 self.raise_error("Unexpected identifier", self._prev) 4049 4050 if tokens[0].token_type in self.TYPE_TOKENS: 4051 self._prev = tokens[0] 4052 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4053 type_name = identifier.name 4054 4055 while self._match(TokenType.DOT): 4056 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4057 4058 return exp.DataType.build(type_name, udt=True) 4059 else: 4060 self._retreat(self._index - 1) 4061 return None 4062 else: 4063 return None 4064 4065 type_token = self._prev.token_type 4066 4067 if type_token == TokenType.PSEUDO_TYPE: 4068 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4069 4070 if type_token == TokenType.OBJECT_IDENTIFIER: 4071 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4072 4073 nested = type_token in self.NESTED_TYPE_TOKENS 4074 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4075 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4076 expressions = None 4077 maybe_func = False 4078 4079 if self._match(TokenType.L_PAREN): 4080 if is_struct: 4081 expressions = self._parse_csv(self._parse_struct_types) 4082 elif nested: 4083 expressions = self._parse_csv( 4084 lambda: self._parse_types( 4085 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4086 ) 4087 ) 4088 elif type_token in self.ENUM_TYPE_TOKENS: 4089 expressions = self._parse_csv(self._parse_equality) 4090 elif is_aggregate: 4091 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4092 any_token=False, tokens=(TokenType.VAR,) 4093 ) 4094 if not func_or_ident or not self._match(TokenType.COMMA): 4095 return None 4096 expressions = self._parse_csv( 4097 lambda: self._parse_types( 4098 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4099 ) 4100 ) 4101 expressions.insert(0, func_or_ident) 4102 else: 4103 expressions = self._parse_csv(self._parse_type_size) 4104 4105 if not expressions or not self._match(TokenType.R_PAREN): 4106 self._retreat(index) 4107 return None 4108 4109 maybe_func = True 4110 4111 this: t.Optional[exp.Expression] = None 4112 values: t.Optional[t.List[exp.Expression]] = None 4113 4114 if nested and self._match(TokenType.LT): 4115 if is_struct: 4116 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4117 else: 4118 expressions = self._parse_csv( 4119 lambda: self._parse_types( 4120 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4121 ) 4122 ) 4123 4124 if not self._match(TokenType.GT): 4125 self.raise_error("Expecting >") 4126 4127 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4128 values = self._parse_csv(self._parse_conjunction) 4129 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4130 4131 if type_token in self.TIMESTAMPS: 4132 if self._match_text_seq("WITH", "TIME", "ZONE"): 4133 maybe_func = False 4134 tz_type = ( 4135 exp.DataType.Type.TIMETZ 4136 if type_token in self.TIMES 4137 else exp.DataType.Type.TIMESTAMPTZ 4138 ) 4139 this = exp.DataType(this=tz_type, expressions=expressions) 4140 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4141 maybe_func = False 4142 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4143 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4144 maybe_func = False 4145 elif type_token == TokenType.INTERVAL: 4146 unit = self._parse_var(upper=True) 4147 if unit: 4148 if self._match_text_seq("TO"): 4149 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4150 4151 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4152 else: 4153 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4154 4155 if maybe_func and check_func: 4156 index2 = self._index 4157 peek = self._parse_string() 4158 4159 if not peek: 4160 self._retreat(index) 4161 return None 4162 4163 self._retreat(index2) 4164 4165 if not this: 4166 if self._match_text_seq("UNSIGNED"): 4167 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4168 if not unsigned_type_token: 4169 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4170 4171 type_token = unsigned_type_token or type_token 4172 4173 this = exp.DataType( 4174 this=exp.DataType.Type[type_token.value], 4175 expressions=expressions, 4176 nested=nested, 4177 values=values, 4178 prefix=prefix, 4179 ) 4180 4181 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4182 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4183 4184 return this 4185 4186 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4187 index = self._index 4188 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4189 self._match(TokenType.COLON) 4190 column_def = self._parse_column_def(this) 4191 4192 if type_required and ( 4193 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4194 ): 4195 self._retreat(index) 4196 return self._parse_types() 4197 4198 return column_def 4199 4200 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4201 if not self._match_text_seq("AT", "TIME", "ZONE"): 4202 return this 4203 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4204 4205 def _parse_column(self) -> t.Optional[exp.Expression]: 4206 this = self._parse_column_reference() 4207 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4208 4209 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4210 this = self._parse_field() 4211 if ( 4212 not this 4213 and self._match(TokenType.VALUES, advance=False) 4214 and self.VALUES_FOLLOWED_BY_PAREN 4215 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4216 ): 4217 this = self._parse_id_var() 4218 4219 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4220 4221 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4222 this = self._parse_bracket(this) 4223 4224 while self._match_set(self.COLUMN_OPERATORS): 4225 op_token = self._prev.token_type 4226 op = self.COLUMN_OPERATORS.get(op_token) 4227 4228 if op_token == TokenType.DCOLON: 4229 field = self._parse_types() 4230 if not field: 4231 self.raise_error("Expected type") 4232 elif op and self._curr: 4233 field = self._parse_column_reference() 4234 else: 4235 field = self._parse_field(any_token=True, anonymous_func=True) 4236 4237 if isinstance(field, exp.Func) and this: 4238 # bigquery allows function calls like x.y.count(...) 4239 # SAFE.SUBSTR(...) 4240 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4241 this = exp.replace_tree( 4242 this, 4243 lambda n: ( 4244 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4245 if n.table 4246 else n.this 4247 ) 4248 if isinstance(n, exp.Column) 4249 else n, 4250 ) 4251 4252 if op: 4253 this = op(self, this, field) 4254 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4255 this = self.expression( 4256 exp.Column, 4257 this=field, 4258 table=this.this, 4259 db=this.args.get("table"), 4260 catalog=this.args.get("db"), 4261 ) 4262 else: 4263 this = self.expression(exp.Dot, this=this, expression=field) 4264 this = self._parse_bracket(this) 4265 return this 4266 4267 def _parse_primary(self) -> t.Optional[exp.Expression]: 4268 if self._match_set(self.PRIMARY_PARSERS): 4269 token_type = self._prev.token_type 4270 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4271 4272 if token_type == TokenType.STRING: 4273 expressions = [primary] 4274 while self._match(TokenType.STRING): 4275 expressions.append(exp.Literal.string(self._prev.text)) 4276 4277 if len(expressions) > 1: 4278 return self.expression(exp.Concat, expressions=expressions) 4279 4280 return primary 4281 4282 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4283 return exp.Literal.number(f"0.{self._prev.text}") 4284 4285 if self._match(TokenType.L_PAREN): 4286 comments = self._prev_comments 4287 query = self._parse_select() 4288 4289 if query: 4290 expressions = [query] 4291 else: 4292 expressions = self._parse_expressions() 4293 4294 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4295 4296 if isinstance(this, exp.UNWRAPPED_QUERIES): 4297 this = self._parse_set_operations( 4298 self._parse_subquery(this=this, parse_alias=False) 4299 ) 4300 elif isinstance(this, exp.Subquery): 4301 this = self._parse_subquery( 4302 this=self._parse_set_operations(this), parse_alias=False 4303 ) 4304 elif len(expressions) > 1: 4305 this = self.expression(exp.Tuple, expressions=expressions) 4306 else: 4307 this = self.expression(exp.Paren, this=this) 4308 4309 if this: 4310 this.add_comments(comments) 4311 4312 self._match_r_paren(expression=this) 4313 return this 4314 4315 return None 4316 4317 def _parse_field( 4318 self, 4319 any_token: bool = False, 4320 tokens: t.Optional[t.Collection[TokenType]] = None, 4321 anonymous_func: bool = False, 4322 ) -> t.Optional[exp.Expression]: 4323 if anonymous_func: 4324 field = ( 4325 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4326 or self._parse_primary() 4327 ) 4328 else: 4329 field = self._parse_primary() or self._parse_function( 4330 anonymous=anonymous_func, any_token=any_token 4331 ) 4332 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4333 4334 def _parse_function( 4335 self, 4336 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4337 anonymous: bool = False, 4338 optional_parens: bool = True, 4339 any_token: bool = False, 4340 ) -> t.Optional[exp.Expression]: 4341 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4342 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4343 fn_syntax = False 4344 if ( 4345 self._match(TokenType.L_BRACE, advance=False) 4346 and self._next 4347 and self._next.text.upper() == "FN" 4348 ): 4349 self._advance(2) 4350 fn_syntax = True 4351 4352 func = self._parse_function_call( 4353 functions=functions, 4354 anonymous=anonymous, 4355 optional_parens=optional_parens, 4356 any_token=any_token, 4357 ) 4358 4359 if fn_syntax: 4360 self._match(TokenType.R_BRACE) 4361 4362 return func 4363 4364 def _parse_function_call( 4365 self, 4366 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4367 anonymous: bool = False, 4368 optional_parens: bool = True, 4369 any_token: bool = False, 4370 ) -> t.Optional[exp.Expression]: 4371 if not self._curr: 4372 return None 4373 4374 comments = self._curr.comments 4375 token_type = self._curr.token_type 4376 this = self._curr.text 4377 upper = this.upper() 4378 4379 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4380 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4381 self._advance() 4382 return self._parse_window(parser(self)) 4383 4384 if not self._next or self._next.token_type != TokenType.L_PAREN: 4385 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4386 self._advance() 4387 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4388 4389 return None 4390 4391 if any_token: 4392 if token_type in self.RESERVED_TOKENS: 4393 return None 4394 elif token_type not in self.FUNC_TOKENS: 4395 return None 4396 4397 self._advance(2) 4398 4399 parser = self.FUNCTION_PARSERS.get(upper) 4400 if parser and not anonymous: 4401 this = parser(self) 4402 else: 4403 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4404 4405 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4406 this = self.expression(subquery_predicate, this=self._parse_select()) 4407 self._match_r_paren() 4408 return this 4409 4410 if functions is None: 4411 functions = self.FUNCTIONS 4412 4413 function = functions.get(upper) 4414 4415 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4416 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4417 4418 if alias: 4419 args = self._kv_to_prop_eq(args) 4420 4421 if function and not anonymous: 4422 if "dialect" in function.__code__.co_varnames: 4423 func = function(args, dialect=self.dialect) 4424 else: 4425 func = function(args) 4426 4427 func = self.validate_expression(func, args) 4428 if not self.dialect.NORMALIZE_FUNCTIONS: 4429 func.meta["name"] = this 4430 4431 this = func 4432 else: 4433 if token_type == TokenType.IDENTIFIER: 4434 this = exp.Identifier(this=this, quoted=True) 4435 this = self.expression(exp.Anonymous, this=this, expressions=args) 4436 4437 if isinstance(this, exp.Expression): 4438 this.add_comments(comments) 4439 4440 self._match_r_paren(this) 4441 return self._parse_window(this) 4442 4443 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4444 transformed = [] 4445 4446 for e in expressions: 4447 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4448 if isinstance(e, exp.Alias): 4449 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4450 4451 if not isinstance(e, exp.PropertyEQ): 4452 e = self.expression( 4453 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4454 ) 4455 4456 if isinstance(e.this, exp.Column): 4457 e.this.replace(e.this.this) 4458 4459 transformed.append(e) 4460 4461 return transformed 4462 4463 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4464 return self._parse_column_def(self._parse_id_var()) 4465 4466 def _parse_user_defined_function( 4467 self, kind: t.Optional[TokenType] = None 4468 ) -> t.Optional[exp.Expression]: 4469 this = self._parse_id_var() 4470 4471 while self._match(TokenType.DOT): 4472 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4473 4474 if not self._match(TokenType.L_PAREN): 4475 return this 4476 4477 expressions = self._parse_csv(self._parse_function_parameter) 4478 self._match_r_paren() 4479 return self.expression( 4480 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4481 ) 4482 4483 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4484 literal = self._parse_primary() 4485 if literal: 4486 return self.expression(exp.Introducer, this=token.text, expression=literal) 4487 4488 return self.expression(exp.Identifier, this=token.text) 4489 4490 def _parse_session_parameter(self) -> exp.SessionParameter: 4491 kind = None 4492 this = self._parse_id_var() or self._parse_primary() 4493 4494 if this and self._match(TokenType.DOT): 4495 kind = this.name 4496 this = self._parse_var() or self._parse_primary() 4497 4498 return self.expression(exp.SessionParameter, this=this, kind=kind) 4499 4500 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4501 index = self._index 4502 4503 if self._match(TokenType.L_PAREN): 4504 expressions = t.cast( 4505 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4506 ) 4507 4508 if not self._match(TokenType.R_PAREN): 4509 self._retreat(index) 4510 else: 4511 expressions = [self._parse_id_var()] 4512 4513 if self._match_set(self.LAMBDAS): 4514 return self.LAMBDAS[self._prev.token_type](self, expressions) 4515 4516 self._retreat(index) 4517 4518 this: t.Optional[exp.Expression] 4519 4520 if self._match(TokenType.DISTINCT): 4521 this = self.expression( 4522 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4523 ) 4524 else: 4525 this = self._parse_select_or_expression(alias=alias) 4526 4527 return self._parse_limit( 4528 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4529 ) 4530 4531 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4532 index = self._index 4533 if not self._match(TokenType.L_PAREN): 4534 return this 4535 4536 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4537 # expr can be of both types 4538 if self._match_set(self.SELECT_START_TOKENS): 4539 self._retreat(index) 4540 return this 4541 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4542 self._match_r_paren() 4543 return self.expression(exp.Schema, this=this, expressions=args) 4544 4545 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4546 return self._parse_column_def(self._parse_field(any_token=True)) 4547 4548 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4549 # column defs are not really columns, they're identifiers 4550 if isinstance(this, exp.Column): 4551 this = this.this 4552 4553 kind = self._parse_types(schema=True) 4554 4555 if self._match_text_seq("FOR", "ORDINALITY"): 4556 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4557 4558 constraints: t.List[exp.Expression] = [] 4559 4560 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4561 ("ALIAS", "MATERIALIZED") 4562 ): 4563 persisted = self._prev.text.upper() == "MATERIALIZED" 4564 constraints.append( 4565 self.expression( 4566 exp.ComputedColumnConstraint, 4567 this=self._parse_conjunction(), 4568 persisted=persisted or self._match_text_seq("PERSISTED"), 4569 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4570 ) 4571 ) 4572 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4573 self._match(TokenType.ALIAS) 4574 constraints.append( 4575 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4576 ) 4577 4578 while True: 4579 constraint = self._parse_column_constraint() 4580 if not constraint: 4581 break 4582 constraints.append(constraint) 4583 4584 if not kind and not constraints: 4585 return this 4586 4587 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4588 4589 def _parse_auto_increment( 4590 self, 4591 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4592 start = None 4593 increment = None 4594 4595 if self._match(TokenType.L_PAREN, advance=False): 4596 args = self._parse_wrapped_csv(self._parse_bitwise) 4597 start = seq_get(args, 0) 4598 increment = seq_get(args, 1) 4599 elif self._match_text_seq("START"): 4600 start = self._parse_bitwise() 4601 self._match_text_seq("INCREMENT") 4602 increment = self._parse_bitwise() 4603 4604 if start and increment: 4605 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4606 4607 return exp.AutoIncrementColumnConstraint() 4608 4609 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4610 if not self._match_text_seq("REFRESH"): 4611 self._retreat(self._index - 1) 4612 return None 4613 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4614 4615 def _parse_compress(self) -> exp.CompressColumnConstraint: 4616 if self._match(TokenType.L_PAREN, advance=False): 4617 return self.expression( 4618 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4619 ) 4620 4621 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4622 4623 def _parse_generated_as_identity( 4624 self, 4625 ) -> ( 4626 exp.GeneratedAsIdentityColumnConstraint 4627 | exp.ComputedColumnConstraint 4628 | exp.GeneratedAsRowColumnConstraint 4629 ): 4630 if self._match_text_seq("BY", "DEFAULT"): 4631 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4632 this = self.expression( 4633 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4634 ) 4635 else: 4636 self._match_text_seq("ALWAYS") 4637 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4638 4639 self._match(TokenType.ALIAS) 4640 4641 if self._match_text_seq("ROW"): 4642 start = self._match_text_seq("START") 4643 if not start: 4644 self._match(TokenType.END) 4645 hidden = self._match_text_seq("HIDDEN") 4646 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4647 4648 identity = self._match_text_seq("IDENTITY") 4649 4650 if self._match(TokenType.L_PAREN): 4651 if self._match(TokenType.START_WITH): 4652 this.set("start", self._parse_bitwise()) 4653 if self._match_text_seq("INCREMENT", "BY"): 4654 this.set("increment", self._parse_bitwise()) 4655 if self._match_text_seq("MINVALUE"): 4656 this.set("minvalue", self._parse_bitwise()) 4657 if self._match_text_seq("MAXVALUE"): 4658 this.set("maxvalue", self._parse_bitwise()) 4659 4660 if self._match_text_seq("CYCLE"): 4661 this.set("cycle", True) 4662 elif self._match_text_seq("NO", "CYCLE"): 4663 this.set("cycle", False) 4664 4665 if not identity: 4666 this.set("expression", self._parse_bitwise()) 4667 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4668 args = self._parse_csv(self._parse_bitwise) 4669 this.set("start", seq_get(args, 0)) 4670 this.set("increment", seq_get(args, 1)) 4671 4672 self._match_r_paren() 4673 4674 return this 4675 4676 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4677 self._match_text_seq("LENGTH") 4678 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4679 4680 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4681 if self._match_text_seq("NULL"): 4682 return self.expression(exp.NotNullColumnConstraint) 4683 if self._match_text_seq("CASESPECIFIC"): 4684 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4685 if self._match_text_seq("FOR", "REPLICATION"): 4686 return self.expression(exp.NotForReplicationColumnConstraint) 4687 return None 4688 4689 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4690 if self._match(TokenType.CONSTRAINT): 4691 this = self._parse_id_var() 4692 else: 4693 this = None 4694 4695 if self._match_texts(self.CONSTRAINT_PARSERS): 4696 return self.expression( 4697 exp.ColumnConstraint, 4698 this=this, 4699 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4700 ) 4701 4702 return this 4703 4704 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4705 if not self._match(TokenType.CONSTRAINT): 4706 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4707 4708 return self.expression( 4709 exp.Constraint, 4710 this=self._parse_id_var(), 4711 expressions=self._parse_unnamed_constraints(), 4712 ) 4713 4714 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4715 constraints = [] 4716 while True: 4717 constraint = self._parse_unnamed_constraint() or self._parse_function() 4718 if not constraint: 4719 break 4720 constraints.append(constraint) 4721 4722 return constraints 4723 4724 def _parse_unnamed_constraint( 4725 self, constraints: t.Optional[t.Collection[str]] = None 4726 ) -> t.Optional[exp.Expression]: 4727 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4728 constraints or self.CONSTRAINT_PARSERS 4729 ): 4730 return None 4731 4732 constraint = self._prev.text.upper() 4733 if constraint not in self.CONSTRAINT_PARSERS: 4734 self.raise_error(f"No parser found for schema constraint {constraint}.") 4735 4736 return self.CONSTRAINT_PARSERS[constraint](self) 4737 4738 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4739 self._match_text_seq("KEY") 4740 return self.expression( 4741 exp.UniqueColumnConstraint, 4742 this=self._parse_schema(self._parse_id_var(any_token=False)), 4743 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4744 on_conflict=self._parse_on_conflict(), 4745 ) 4746 4747 def _parse_key_constraint_options(self) -> t.List[str]: 4748 options = [] 4749 while True: 4750 if not self._curr: 4751 break 4752 4753 if self._match(TokenType.ON): 4754 action = None 4755 on = self._advance_any() and self._prev.text 4756 4757 if self._match_text_seq("NO", "ACTION"): 4758 action = "NO ACTION" 4759 elif self._match_text_seq("CASCADE"): 4760 action = "CASCADE" 4761 elif self._match_text_seq("RESTRICT"): 4762 action = "RESTRICT" 4763 elif self._match_pair(TokenType.SET, TokenType.NULL): 4764 action = "SET NULL" 4765 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4766 action = "SET DEFAULT" 4767 else: 4768 self.raise_error("Invalid key constraint") 4769 4770 options.append(f"ON {on} {action}") 4771 elif self._match_text_seq("NOT", "ENFORCED"): 4772 options.append("NOT ENFORCED") 4773 elif self._match_text_seq("DEFERRABLE"): 4774 options.append("DEFERRABLE") 4775 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4776 options.append("INITIALLY DEFERRED") 4777 elif self._match_text_seq("NORELY"): 4778 options.append("NORELY") 4779 elif self._match_text_seq("MATCH", "FULL"): 4780 options.append("MATCH FULL") 4781 else: 4782 break 4783 4784 return options 4785 4786 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4787 if match and not self._match(TokenType.REFERENCES): 4788 return None 4789 4790 expressions = None 4791 this = self._parse_table(schema=True) 4792 options = self._parse_key_constraint_options() 4793 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4794 4795 def _parse_foreign_key(self) -> exp.ForeignKey: 4796 expressions = self._parse_wrapped_id_vars() 4797 reference = self._parse_references() 4798 options = {} 4799 4800 while self._match(TokenType.ON): 4801 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4802 self.raise_error("Expected DELETE or UPDATE") 4803 4804 kind = self._prev.text.lower() 4805 4806 if self._match_text_seq("NO", "ACTION"): 4807 action = "NO ACTION" 4808 elif self._match(TokenType.SET): 4809 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4810 action = "SET " + self._prev.text.upper() 4811 else: 4812 self._advance() 4813 action = self._prev.text.upper() 4814 4815 options[kind] = action 4816 4817 return self.expression( 4818 exp.ForeignKey, 4819 expressions=expressions, 4820 reference=reference, 4821 **options, # type: ignore 4822 ) 4823 4824 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4825 return self._parse_field() 4826 4827 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4828 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4829 self._retreat(self._index - 1) 4830 return None 4831 4832 id_vars = self._parse_wrapped_id_vars() 4833 return self.expression( 4834 exp.PeriodForSystemTimeConstraint, 4835 this=seq_get(id_vars, 0), 4836 expression=seq_get(id_vars, 1), 4837 ) 4838 4839 def _parse_primary_key( 4840 self, wrapped_optional: bool = False, in_props: bool = False 4841 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4842 desc = ( 4843 self._match_set((TokenType.ASC, TokenType.DESC)) 4844 and self._prev.token_type == TokenType.DESC 4845 ) 4846 4847 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4848 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4849 4850 expressions = self._parse_wrapped_csv( 4851 self._parse_primary_key_part, optional=wrapped_optional 4852 ) 4853 options = self._parse_key_constraint_options() 4854 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4855 4856 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4857 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4858 4859 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4860 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4861 return this 4862 4863 bracket_kind = self._prev.token_type 4864 expressions = self._parse_csv( 4865 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4866 ) 4867 4868 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4869 self.raise_error("Expected ]") 4870 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4871 self.raise_error("Expected }") 4872 4873 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4874 if bracket_kind == TokenType.L_BRACE: 4875 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4876 elif not this or this.name.upper() == "ARRAY": 4877 this = self.expression(exp.Array, expressions=expressions) 4878 else: 4879 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4880 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4881 4882 self._add_comments(this) 4883 return self._parse_bracket(this) 4884 4885 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4886 if self._match(TokenType.COLON): 4887 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4888 return this 4889 4890 def _parse_case(self) -> t.Optional[exp.Expression]: 4891 ifs = [] 4892 default = None 4893 4894 comments = self._prev_comments 4895 expression = self._parse_conjunction() 4896 4897 while self._match(TokenType.WHEN): 4898 this = self._parse_conjunction() 4899 self._match(TokenType.THEN) 4900 then = self._parse_conjunction() 4901 ifs.append(self.expression(exp.If, this=this, true=then)) 4902 4903 if self._match(TokenType.ELSE): 4904 default = self._parse_conjunction() 4905 4906 if not self._match(TokenType.END): 4907 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4908 default = exp.column("interval") 4909 else: 4910 self.raise_error("Expected END after CASE", self._prev) 4911 4912 return self.expression( 4913 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4914 ) 4915 4916 def _parse_if(self) -> t.Optional[exp.Expression]: 4917 if self._match(TokenType.L_PAREN): 4918 args = self._parse_csv(self._parse_conjunction) 4919 this = self.validate_expression(exp.If.from_arg_list(args), args) 4920 self._match_r_paren() 4921 else: 4922 index = self._index - 1 4923 4924 if self.NO_PAREN_IF_COMMANDS and index == 0: 4925 return self._parse_as_command(self._prev) 4926 4927 condition = self._parse_conjunction() 4928 4929 if not condition: 4930 self._retreat(index) 4931 return None 4932 4933 self._match(TokenType.THEN) 4934 true = self._parse_conjunction() 4935 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4936 self._match(TokenType.END) 4937 this = self.expression(exp.If, this=condition, true=true, false=false) 4938 4939 return this 4940 4941 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4942 if not self._match_text_seq("VALUE", "FOR"): 4943 self._retreat(self._index - 1) 4944 return None 4945 4946 return self.expression( 4947 exp.NextValueFor, 4948 this=self._parse_column(), 4949 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4950 ) 4951 4952 def _parse_extract(self) -> exp.Extract: 4953 this = self._parse_function() or self._parse_var() or self._parse_type() 4954 4955 if self._match(TokenType.FROM): 4956 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4957 4958 if not self._match(TokenType.COMMA): 4959 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4960 4961 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4962 4963 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4964 this = self._parse_conjunction() 4965 4966 if not self._match(TokenType.ALIAS): 4967 if self._match(TokenType.COMMA): 4968 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4969 4970 self.raise_error("Expected AS after CAST") 4971 4972 fmt = None 4973 to = self._parse_types() 4974 4975 if self._match(TokenType.FORMAT): 4976 fmt_string = self._parse_string() 4977 fmt = self._parse_at_time_zone(fmt_string) 4978 4979 if not to: 4980 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4981 if to.this in exp.DataType.TEMPORAL_TYPES: 4982 this = self.expression( 4983 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4984 this=this, 4985 format=exp.Literal.string( 4986 format_time( 4987 fmt_string.this if fmt_string else "", 4988 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4989 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4990 ) 4991 ), 4992 ) 4993 4994 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4995 this.set("zone", fmt.args["zone"]) 4996 return this 4997 elif not to: 4998 self.raise_error("Expected TYPE after CAST") 4999 elif isinstance(to, exp.Identifier): 5000 to = exp.DataType.build(to.name, udt=True) 5001 elif to.this == exp.DataType.Type.CHAR: 5002 if self._match(TokenType.CHARACTER_SET): 5003 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5004 5005 return self.expression( 5006 exp.Cast if strict else exp.TryCast, 5007 this=this, 5008 to=to, 5009 format=fmt, 5010 safe=safe, 5011 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5012 ) 5013 5014 def _parse_string_agg(self) -> exp.Expression: 5015 if self._match(TokenType.DISTINCT): 5016 args: t.List[t.Optional[exp.Expression]] = [ 5017 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5018 ] 5019 if self._match(TokenType.COMMA): 5020 args.extend(self._parse_csv(self._parse_conjunction)) 5021 else: 5022 args = self._parse_csv(self._parse_conjunction) # type: ignore 5023 5024 index = self._index 5025 if not self._match(TokenType.R_PAREN) and args: 5026 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5027 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5028 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5029 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5030 5031 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5032 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5033 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5034 if not self._match_text_seq("WITHIN", "GROUP"): 5035 self._retreat(index) 5036 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5037 5038 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5039 order = self._parse_order(this=seq_get(args, 0)) 5040 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5041 5042 def _parse_convert( 5043 self, strict: bool, safe: t.Optional[bool] = None 5044 ) -> t.Optional[exp.Expression]: 5045 this = self._parse_bitwise() 5046 5047 if self._match(TokenType.USING): 5048 to: t.Optional[exp.Expression] = self.expression( 5049 exp.CharacterSet, this=self._parse_var() 5050 ) 5051 elif self._match(TokenType.COMMA): 5052 to = self._parse_types() 5053 else: 5054 to = None 5055 5056 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5057 5058 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5059 """ 5060 There are generally two variants of the DECODE function: 5061 5062 - DECODE(bin, charset) 5063 - DECODE(expression, search, result [, search, result] ... [, default]) 5064 5065 The second variant will always be parsed into a CASE expression. Note that NULL 5066 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5067 instead of relying on pattern matching. 5068 """ 5069 args = self._parse_csv(self._parse_conjunction) 5070 5071 if len(args) < 3: 5072 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5073 5074 expression, *expressions = args 5075 if not expression: 5076 return None 5077 5078 ifs = [] 5079 for search, result in zip(expressions[::2], expressions[1::2]): 5080 if not search or not result: 5081 return None 5082 5083 if isinstance(search, exp.Literal): 5084 ifs.append( 5085 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5086 ) 5087 elif isinstance(search, exp.Null): 5088 ifs.append( 5089 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5090 ) 5091 else: 5092 cond = exp.or_( 5093 exp.EQ(this=expression.copy(), expression=search), 5094 exp.and_( 5095 exp.Is(this=expression.copy(), expression=exp.Null()), 5096 exp.Is(this=search.copy(), expression=exp.Null()), 5097 copy=False, 5098 ), 5099 copy=False, 5100 ) 5101 ifs.append(exp.If(this=cond, true=result)) 5102 5103 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5104 5105 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5106 self._match_text_seq("KEY") 5107 key = self._parse_column() 5108 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5109 self._match_text_seq("VALUE") 5110 value = self._parse_bitwise() 5111 5112 if not key and not value: 5113 return None 5114 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5115 5116 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5117 if not this or not self._match_text_seq("FORMAT", "JSON"): 5118 return this 5119 5120 return self.expression(exp.FormatJson, this=this) 5121 5122 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5123 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5124 for value in values: 5125 if self._match_text_seq(value, "ON", on): 5126 return f"{value} ON {on}" 5127 5128 return None 5129 5130 @t.overload 5131 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5132 5133 @t.overload 5134 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5135 5136 def _parse_json_object(self, agg=False): 5137 star = self._parse_star() 5138 expressions = ( 5139 [star] 5140 if star 5141 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5142 ) 5143 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5144 5145 unique_keys = None 5146 if self._match_text_seq("WITH", "UNIQUE"): 5147 unique_keys = True 5148 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5149 unique_keys = False 5150 5151 self._match_text_seq("KEYS") 5152 5153 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5154 self._parse_type() 5155 ) 5156 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5157 5158 return self.expression( 5159 exp.JSONObjectAgg if agg else exp.JSONObject, 5160 expressions=expressions, 5161 null_handling=null_handling, 5162 unique_keys=unique_keys, 5163 return_type=return_type, 5164 encoding=encoding, 5165 ) 5166 5167 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5168 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5169 if not self._match_text_seq("NESTED"): 5170 this = self._parse_id_var() 5171 kind = self._parse_types(allow_identifiers=False) 5172 nested = None 5173 else: 5174 this = None 5175 kind = None 5176 nested = True 5177 5178 path = self._match_text_seq("PATH") and self._parse_string() 5179 nested_schema = nested and self._parse_json_schema() 5180 5181 return self.expression( 5182 exp.JSONColumnDef, 5183 this=this, 5184 kind=kind, 5185 path=path, 5186 nested_schema=nested_schema, 5187 ) 5188 5189 def _parse_json_schema(self) -> exp.JSONSchema: 5190 self._match_text_seq("COLUMNS") 5191 return self.expression( 5192 exp.JSONSchema, 5193 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5194 ) 5195 5196 def _parse_json_table(self) -> exp.JSONTable: 5197 this = self._parse_format_json(self._parse_bitwise()) 5198 path = self._match(TokenType.COMMA) and self._parse_string() 5199 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5200 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5201 schema = self._parse_json_schema() 5202 5203 return exp.JSONTable( 5204 this=this, 5205 schema=schema, 5206 path=path, 5207 error_handling=error_handling, 5208 empty_handling=empty_handling, 5209 ) 5210 5211 def _parse_match_against(self) -> exp.MatchAgainst: 5212 expressions = self._parse_csv(self._parse_column) 5213 5214 self._match_text_seq(")", "AGAINST", "(") 5215 5216 this = self._parse_string() 5217 5218 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5219 modifier = "IN NATURAL LANGUAGE MODE" 5220 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5221 modifier = f"{modifier} WITH QUERY EXPANSION" 5222 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5223 modifier = "IN BOOLEAN MODE" 5224 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5225 modifier = "WITH QUERY EXPANSION" 5226 else: 5227 modifier = None 5228 5229 return self.expression( 5230 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5231 ) 5232 5233 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5234 def _parse_open_json(self) -> exp.OpenJSON: 5235 this = self._parse_bitwise() 5236 path = self._match(TokenType.COMMA) and self._parse_string() 5237 5238 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5239 this = self._parse_field(any_token=True) 5240 kind = self._parse_types() 5241 path = self._parse_string() 5242 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5243 5244 return self.expression( 5245 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5246 ) 5247 5248 expressions = None 5249 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5250 self._match_l_paren() 5251 expressions = self._parse_csv(_parse_open_json_column_def) 5252 5253 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5254 5255 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5256 args = self._parse_csv(self._parse_bitwise) 5257 5258 if self._match(TokenType.IN): 5259 return self.expression( 5260 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5261 ) 5262 5263 if haystack_first: 5264 haystack = seq_get(args, 0) 5265 needle = seq_get(args, 1) 5266 else: 5267 needle = seq_get(args, 0) 5268 haystack = seq_get(args, 1) 5269 5270 return self.expression( 5271 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5272 ) 5273 5274 def _parse_predict(self) -> exp.Predict: 5275 self._match_text_seq("MODEL") 5276 this = self._parse_table() 5277 5278 self._match(TokenType.COMMA) 5279 self._match_text_seq("TABLE") 5280 5281 return self.expression( 5282 exp.Predict, 5283 this=this, 5284 expression=self._parse_table(), 5285 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5286 ) 5287 5288 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5289 args = self._parse_csv(self._parse_table) 5290 return exp.JoinHint(this=func_name.upper(), expressions=args) 5291 5292 def _parse_substring(self) -> exp.Substring: 5293 # Postgres supports the form: substring(string [from int] [for int]) 5294 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5295 5296 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5297 5298 if self._match(TokenType.FROM): 5299 args.append(self._parse_bitwise()) 5300 if self._match(TokenType.FOR): 5301 args.append(self._parse_bitwise()) 5302 5303 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5304 5305 def _parse_trim(self) -> exp.Trim: 5306 # https://www.w3resource.com/sql/character-functions/trim.php 5307 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5308 5309 position = None 5310 collation = None 5311 expression = None 5312 5313 if self._match_texts(self.TRIM_TYPES): 5314 position = self._prev.text.upper() 5315 5316 this = self._parse_bitwise() 5317 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5318 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5319 expression = self._parse_bitwise() 5320 5321 if invert_order: 5322 this, expression = expression, this 5323 5324 if self._match(TokenType.COLLATE): 5325 collation = self._parse_bitwise() 5326 5327 return self.expression( 5328 exp.Trim, this=this, position=position, expression=expression, collation=collation 5329 ) 5330 5331 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5332 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5333 5334 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5335 return self._parse_window(self._parse_id_var(), alias=True) 5336 5337 def _parse_respect_or_ignore_nulls( 5338 self, this: t.Optional[exp.Expression] 5339 ) -> t.Optional[exp.Expression]: 5340 if self._match_text_seq("IGNORE", "NULLS"): 5341 return self.expression(exp.IgnoreNulls, this=this) 5342 if self._match_text_seq("RESPECT", "NULLS"): 5343 return self.expression(exp.RespectNulls, this=this) 5344 return this 5345 5346 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5347 if self._match(TokenType.HAVING): 5348 self._match_texts(("MAX", "MIN")) 5349 max = self._prev.text.upper() != "MIN" 5350 return self.expression( 5351 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5352 ) 5353 5354 return this 5355 5356 def _parse_window( 5357 self, this: t.Optional[exp.Expression], alias: bool = False 5358 ) -> t.Optional[exp.Expression]: 5359 func = this 5360 comments = func.comments if isinstance(func, exp.Expression) else None 5361 5362 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5363 self._match(TokenType.WHERE) 5364 this = self.expression( 5365 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5366 ) 5367 self._match_r_paren() 5368 5369 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5370 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5371 if self._match_text_seq("WITHIN", "GROUP"): 5372 order = self._parse_wrapped(self._parse_order) 5373 this = self.expression(exp.WithinGroup, this=this, expression=order) 5374 5375 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5376 # Some dialects choose to implement and some do not. 5377 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5378 5379 # There is some code above in _parse_lambda that handles 5380 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5381 5382 # The below changes handle 5383 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5384 5385 # Oracle allows both formats 5386 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5387 # and Snowflake chose to do the same for familiarity 5388 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5389 if isinstance(this, exp.AggFunc): 5390 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5391 5392 if ignore_respect and ignore_respect is not this: 5393 ignore_respect.replace(ignore_respect.this) 5394 this = self.expression(ignore_respect.__class__, this=this) 5395 5396 this = self._parse_respect_or_ignore_nulls(this) 5397 5398 # bigquery select from window x AS (partition by ...) 5399 if alias: 5400 over = None 5401 self._match(TokenType.ALIAS) 5402 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5403 return this 5404 else: 5405 over = self._prev.text.upper() 5406 5407 if comments: 5408 func.comments = None # type: ignore 5409 5410 if not self._match(TokenType.L_PAREN): 5411 return self.expression( 5412 exp.Window, 5413 comments=comments, 5414 this=this, 5415 alias=self._parse_id_var(False), 5416 over=over, 5417 ) 5418 5419 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5420 5421 first = self._match(TokenType.FIRST) 5422 if self._match_text_seq("LAST"): 5423 first = False 5424 5425 partition, order = self._parse_partition_and_order() 5426 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5427 5428 if kind: 5429 self._match(TokenType.BETWEEN) 5430 start = self._parse_window_spec() 5431 self._match(TokenType.AND) 5432 end = self._parse_window_spec() 5433 5434 spec = self.expression( 5435 exp.WindowSpec, 5436 kind=kind, 5437 start=start["value"], 5438 start_side=start["side"], 5439 end=end["value"], 5440 end_side=end["side"], 5441 ) 5442 else: 5443 spec = None 5444 5445 self._match_r_paren() 5446 5447 window = self.expression( 5448 exp.Window, 5449 comments=comments, 5450 this=this, 5451 partition_by=partition, 5452 order=order, 5453 spec=spec, 5454 alias=window_alias, 5455 over=over, 5456 first=first, 5457 ) 5458 5459 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5460 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5461 return self._parse_window(window, alias=alias) 5462 5463 return window 5464 5465 def _parse_partition_and_order( 5466 self, 5467 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5468 return self._parse_partition_by(), self._parse_order() 5469 5470 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5471 self._match(TokenType.BETWEEN) 5472 5473 return { 5474 "value": ( 5475 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5476 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5477 or self._parse_bitwise() 5478 ), 5479 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5480 } 5481 5482 def _parse_alias( 5483 self, this: t.Optional[exp.Expression], explicit: bool = False 5484 ) -> t.Optional[exp.Expression]: 5485 any_token = self._match(TokenType.ALIAS) 5486 comments = self._prev_comments 5487 5488 if explicit and not any_token: 5489 return this 5490 5491 if self._match(TokenType.L_PAREN): 5492 aliases = self.expression( 5493 exp.Aliases, 5494 comments=comments, 5495 this=this, 5496 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5497 ) 5498 self._match_r_paren(aliases) 5499 return aliases 5500 5501 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5502 self.STRING_ALIASES and self._parse_string_as_identifier() 5503 ) 5504 5505 if alias: 5506 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5507 column = this.this 5508 5509 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5510 if not this.comments and column and column.comments: 5511 this.comments = column.comments 5512 column.comments = None 5513 5514 return this 5515 5516 def _parse_id_var( 5517 self, 5518 any_token: bool = True, 5519 tokens: t.Optional[t.Collection[TokenType]] = None, 5520 ) -> t.Optional[exp.Expression]: 5521 identifier = self._parse_identifier() 5522 if identifier: 5523 return identifier 5524 5525 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5526 quoted = self._prev.token_type == TokenType.STRING 5527 return exp.Identifier(this=self._prev.text, quoted=quoted) 5528 5529 return None 5530 5531 def _parse_string(self) -> t.Optional[exp.Expression]: 5532 if self._match_set(self.STRING_PARSERS): 5533 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5534 return self._parse_placeholder() 5535 5536 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5537 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5538 5539 def _parse_number(self) -> t.Optional[exp.Expression]: 5540 if self._match_set(self.NUMERIC_PARSERS): 5541 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5542 return self._parse_placeholder() 5543 5544 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5545 if self._match(TokenType.IDENTIFIER): 5546 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5547 return self._parse_placeholder() 5548 5549 def _parse_var( 5550 self, 5551 any_token: bool = False, 5552 tokens: t.Optional[t.Collection[TokenType]] = None, 5553 upper: bool = False, 5554 ) -> t.Optional[exp.Expression]: 5555 if ( 5556 (any_token and self._advance_any()) 5557 or self._match(TokenType.VAR) 5558 or (self._match_set(tokens) if tokens else False) 5559 ): 5560 return self.expression( 5561 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5562 ) 5563 return self._parse_placeholder() 5564 5565 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5566 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5567 self._advance() 5568 return self._prev 5569 return None 5570 5571 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5572 return self._parse_var() or self._parse_string() 5573 5574 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5575 return self._parse_primary() or self._parse_var(any_token=True) 5576 5577 def _parse_null(self) -> t.Optional[exp.Expression]: 5578 if self._match_set(self.NULL_TOKENS): 5579 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5580 return self._parse_placeholder() 5581 5582 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5583 if self._match(TokenType.TRUE): 5584 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5585 if self._match(TokenType.FALSE): 5586 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5587 return self._parse_placeholder() 5588 5589 def _parse_star(self) -> t.Optional[exp.Expression]: 5590 if self._match(TokenType.STAR): 5591 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5592 return self._parse_placeholder() 5593 5594 def _parse_parameter(self) -> exp.Parameter: 5595 self._match(TokenType.L_BRACE) 5596 this = self._parse_identifier() or self._parse_primary_or_var() 5597 expression = self._match(TokenType.COLON) and ( 5598 self._parse_identifier() or self._parse_primary_or_var() 5599 ) 5600 self._match(TokenType.R_BRACE) 5601 return self.expression(exp.Parameter, this=this, expression=expression) 5602 5603 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5604 if self._match_set(self.PLACEHOLDER_PARSERS): 5605 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5606 if placeholder: 5607 return placeholder 5608 self._advance(-1) 5609 return None 5610 5611 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5612 if not self._match(TokenType.EXCEPT): 5613 return None 5614 if self._match(TokenType.L_PAREN, advance=False): 5615 return self._parse_wrapped_csv(self._parse_column) 5616 5617 except_column = self._parse_column() 5618 return [except_column] if except_column else None 5619 5620 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5621 if not self._match(TokenType.REPLACE): 5622 return None 5623 if self._match(TokenType.L_PAREN, advance=False): 5624 return self._parse_wrapped_csv(self._parse_expression) 5625 5626 replace_expression = self._parse_expression() 5627 return [replace_expression] if replace_expression else None 5628 5629 def _parse_csv( 5630 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5631 ) -> t.List[exp.Expression]: 5632 parse_result = parse_method() 5633 items = [parse_result] if parse_result is not None else [] 5634 5635 while self._match(sep): 5636 self._add_comments(parse_result) 5637 parse_result = parse_method() 5638 if parse_result is not None: 5639 items.append(parse_result) 5640 5641 return items 5642 5643 def _parse_tokens( 5644 self, parse_method: t.Callable, expressions: t.Dict 5645 ) -> t.Optional[exp.Expression]: 5646 this = parse_method() 5647 5648 while self._match_set(expressions): 5649 this = self.expression( 5650 expressions[self._prev.token_type], 5651 this=this, 5652 comments=self._prev_comments, 5653 expression=parse_method(), 5654 ) 5655 5656 return this 5657 5658 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5659 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5660 5661 def _parse_wrapped_csv( 5662 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5663 ) -> t.List[exp.Expression]: 5664 return self._parse_wrapped( 5665 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5666 ) 5667 5668 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5669 wrapped = self._match(TokenType.L_PAREN) 5670 if not wrapped and not optional: 5671 self.raise_error("Expecting (") 5672 parse_result = parse_method() 5673 if wrapped: 5674 self._match_r_paren() 5675 return parse_result 5676 5677 def _parse_expressions(self) -> t.List[exp.Expression]: 5678 return self._parse_csv(self._parse_expression) 5679 5680 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5681 return self._parse_select() or self._parse_set_operations( 5682 self._parse_expression() if alias else self._parse_conjunction() 5683 ) 5684 5685 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5686 return self._parse_query_modifiers( 5687 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5688 ) 5689 5690 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5691 this = None 5692 if self._match_texts(self.TRANSACTION_KIND): 5693 this = self._prev.text 5694 5695 self._match_texts(("TRANSACTION", "WORK")) 5696 5697 modes = [] 5698 while True: 5699 mode = [] 5700 while self._match(TokenType.VAR): 5701 mode.append(self._prev.text) 5702 5703 if mode: 5704 modes.append(" ".join(mode)) 5705 if not self._match(TokenType.COMMA): 5706 break 5707 5708 return self.expression(exp.Transaction, this=this, modes=modes) 5709 5710 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5711 chain = None 5712 savepoint = None 5713 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5714 5715 self._match_texts(("TRANSACTION", "WORK")) 5716 5717 if self._match_text_seq("TO"): 5718 self._match_text_seq("SAVEPOINT") 5719 savepoint = self._parse_id_var() 5720 5721 if self._match(TokenType.AND): 5722 chain = not self._match_text_seq("NO") 5723 self._match_text_seq("CHAIN") 5724 5725 if is_rollback: 5726 return self.expression(exp.Rollback, savepoint=savepoint) 5727 5728 return self.expression(exp.Commit, chain=chain) 5729 5730 def _parse_refresh(self) -> exp.Refresh: 5731 self._match(TokenType.TABLE) 5732 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5733 5734 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5735 if not self._match_text_seq("ADD"): 5736 return None 5737 5738 self._match(TokenType.COLUMN) 5739 exists_column = self._parse_exists(not_=True) 5740 expression = self._parse_field_def() 5741 5742 if expression: 5743 expression.set("exists", exists_column) 5744 5745 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5746 if self._match_texts(("FIRST", "AFTER")): 5747 position = self._prev.text 5748 column_position = self.expression( 5749 exp.ColumnPosition, this=self._parse_column(), position=position 5750 ) 5751 expression.set("position", column_position) 5752 5753 return expression 5754 5755 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5756 drop = self._match(TokenType.DROP) and self._parse_drop() 5757 if drop and not isinstance(drop, exp.Command): 5758 drop.set("kind", drop.args.get("kind", "COLUMN")) 5759 return drop 5760 5761 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5762 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5763 return self.expression( 5764 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5765 ) 5766 5767 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5768 index = self._index - 1 5769 5770 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5771 return self._parse_csv( 5772 lambda: self.expression( 5773 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5774 ) 5775 ) 5776 5777 self._retreat(index) 5778 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5779 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5780 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5781 5782 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5783 self._match(TokenType.COLUMN) 5784 column = self._parse_field(any_token=True) 5785 5786 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5787 return self.expression(exp.AlterColumn, this=column, drop=True) 5788 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5789 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5790 if self._match(TokenType.COMMENT): 5791 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5792 5793 self._match_text_seq("SET", "DATA") 5794 self._match_text_seq("TYPE") 5795 return self.expression( 5796 exp.AlterColumn, 5797 this=column, 5798 dtype=self._parse_types(), 5799 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5800 using=self._match(TokenType.USING) and self._parse_conjunction(), 5801 ) 5802 5803 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5804 index = self._index - 1 5805 5806 partition_exists = self._parse_exists() 5807 if self._match(TokenType.PARTITION, advance=False): 5808 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5809 5810 self._retreat(index) 5811 return self._parse_csv(self._parse_drop_column) 5812 5813 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5814 if self._match(TokenType.COLUMN): 5815 exists = self._parse_exists() 5816 old_column = self._parse_column() 5817 to = self._match_text_seq("TO") 5818 new_column = self._parse_column() 5819 5820 if old_column is None or to is None or new_column is None: 5821 return None 5822 5823 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5824 5825 self._match_text_seq("TO") 5826 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5827 5828 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5829 start = self._prev 5830 5831 if not self._match(TokenType.TABLE): 5832 return self._parse_as_command(start) 5833 5834 exists = self._parse_exists() 5835 only = self._match_text_seq("ONLY") 5836 this = self._parse_table(schema=True) 5837 5838 if self._next: 5839 self._advance() 5840 5841 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5842 if parser: 5843 actions = ensure_list(parser(self)) 5844 options = self._parse_csv(self._parse_property) 5845 5846 if not self._curr and actions: 5847 return self.expression( 5848 exp.AlterTable, 5849 this=this, 5850 exists=exists, 5851 actions=actions, 5852 only=only, 5853 options=options, 5854 ) 5855 5856 return self._parse_as_command(start) 5857 5858 def _parse_merge(self) -> exp.Merge: 5859 self._match(TokenType.INTO) 5860 target = self._parse_table() 5861 5862 if target and self._match(TokenType.ALIAS, advance=False): 5863 target.set("alias", self._parse_table_alias()) 5864 5865 self._match(TokenType.USING) 5866 using = self._parse_table() 5867 5868 self._match(TokenType.ON) 5869 on = self._parse_conjunction() 5870 5871 return self.expression( 5872 exp.Merge, 5873 this=target, 5874 using=using, 5875 on=on, 5876 expressions=self._parse_when_matched(), 5877 ) 5878 5879 def _parse_when_matched(self) -> t.List[exp.When]: 5880 whens = [] 5881 5882 while self._match(TokenType.WHEN): 5883 matched = not self._match(TokenType.NOT) 5884 self._match_text_seq("MATCHED") 5885 source = ( 5886 False 5887 if self._match_text_seq("BY", "TARGET") 5888 else self._match_text_seq("BY", "SOURCE") 5889 ) 5890 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5891 5892 self._match(TokenType.THEN) 5893 5894 if self._match(TokenType.INSERT): 5895 _this = self._parse_star() 5896 if _this: 5897 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5898 else: 5899 then = self.expression( 5900 exp.Insert, 5901 this=self._parse_value(), 5902 expression=self._match_text_seq("VALUES") and self._parse_value(), 5903 ) 5904 elif self._match(TokenType.UPDATE): 5905 expressions = self._parse_star() 5906 if expressions: 5907 then = self.expression(exp.Update, expressions=expressions) 5908 else: 5909 then = self.expression( 5910 exp.Update, 5911 expressions=self._match(TokenType.SET) 5912 and self._parse_csv(self._parse_equality), 5913 ) 5914 elif self._match(TokenType.DELETE): 5915 then = self.expression(exp.Var, this=self._prev.text) 5916 else: 5917 then = None 5918 5919 whens.append( 5920 self.expression( 5921 exp.When, 5922 matched=matched, 5923 source=source, 5924 condition=condition, 5925 then=then, 5926 ) 5927 ) 5928 return whens 5929 5930 def _parse_show(self) -> t.Optional[exp.Expression]: 5931 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5932 if parser: 5933 return parser(self) 5934 return self._parse_as_command(self._prev) 5935 5936 def _parse_set_item_assignment( 5937 self, kind: t.Optional[str] = None 5938 ) -> t.Optional[exp.Expression]: 5939 index = self._index 5940 5941 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5942 return self._parse_set_transaction(global_=kind == "GLOBAL") 5943 5944 left = self._parse_primary() or self._parse_id_var() 5945 assignment_delimiter = self._match_texts(("=", "TO")) 5946 5947 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5948 self._retreat(index) 5949 return None 5950 5951 right = self._parse_statement() or self._parse_id_var() 5952 this = self.expression(exp.EQ, this=left, expression=right) 5953 5954 return self.expression(exp.SetItem, this=this, kind=kind) 5955 5956 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5957 self._match_text_seq("TRANSACTION") 5958 characteristics = self._parse_csv( 5959 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5960 ) 5961 return self.expression( 5962 exp.SetItem, 5963 expressions=characteristics, 5964 kind="TRANSACTION", 5965 **{"global": global_}, # type: ignore 5966 ) 5967 5968 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5969 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5970 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5971 5972 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5973 index = self._index 5974 set_ = self.expression( 5975 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5976 ) 5977 5978 if self._curr: 5979 self._retreat(index) 5980 return self._parse_as_command(self._prev) 5981 5982 return set_ 5983 5984 def _parse_var_from_options( 5985 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5986 ) -> t.Optional[exp.Var]: 5987 start = self._curr 5988 if not start: 5989 return None 5990 5991 option = start.text.upper() 5992 continuations = options.get(option) 5993 5994 index = self._index 5995 self._advance() 5996 for keywords in continuations or []: 5997 if isinstance(keywords, str): 5998 keywords = (keywords,) 5999 6000 if self._match_text_seq(*keywords): 6001 option = f"{option} {' '.join(keywords)}" 6002 break 6003 else: 6004 if continuations or continuations is None: 6005 if raise_unmatched: 6006 self.raise_error(f"Unknown option {option}") 6007 6008 self._retreat(index) 6009 return None 6010 6011 return exp.var(option) 6012 6013 def _parse_as_command(self, start: Token) -> exp.Command: 6014 while self._curr: 6015 self._advance() 6016 text = self._find_sql(start, self._prev) 6017 size = len(start.text) 6018 self._warn_unsupported() 6019 return exp.Command(this=text[:size], expression=text[size:]) 6020 6021 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6022 settings = [] 6023 6024 self._match_l_paren() 6025 kind = self._parse_id_var() 6026 6027 if self._match(TokenType.L_PAREN): 6028 while True: 6029 key = self._parse_id_var() 6030 value = self._parse_primary() 6031 6032 if not key and value is None: 6033 break 6034 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6035 self._match(TokenType.R_PAREN) 6036 6037 self._match_r_paren() 6038 6039 return self.expression( 6040 exp.DictProperty, 6041 this=this, 6042 kind=kind.this if kind else None, 6043 settings=settings, 6044 ) 6045 6046 def _parse_dict_range(self, this: str) -> exp.DictRange: 6047 self._match_l_paren() 6048 has_min = self._match_text_seq("MIN") 6049 if has_min: 6050 min = self._parse_var() or self._parse_primary() 6051 self._match_text_seq("MAX") 6052 max = self._parse_var() or self._parse_primary() 6053 else: 6054 max = self._parse_var() or self._parse_primary() 6055 min = exp.Literal.number(0) 6056 self._match_r_paren() 6057 return self.expression(exp.DictRange, this=this, min=min, max=max) 6058 6059 def _parse_comprehension( 6060 self, this: t.Optional[exp.Expression] 6061 ) -> t.Optional[exp.Comprehension]: 6062 index = self._index 6063 expression = self._parse_column() 6064 if not self._match(TokenType.IN): 6065 self._retreat(index - 1) 6066 return None 6067 iterator = self._parse_column() 6068 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6069 return self.expression( 6070 exp.Comprehension, 6071 this=this, 6072 expression=expression, 6073 iterator=iterator, 6074 condition=condition, 6075 ) 6076 6077 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6078 if self._match(TokenType.HEREDOC_STRING): 6079 return self.expression(exp.Heredoc, this=self._prev.text) 6080 6081 if not self._match_text_seq("$"): 6082 return None 6083 6084 tags = ["$"] 6085 tag_text = None 6086 6087 if self._is_connected(): 6088 self._advance() 6089 tags.append(self._prev.text.upper()) 6090 else: 6091 self.raise_error("No closing $ found") 6092 6093 if tags[-1] != "$": 6094 if self._is_connected() and self._match_text_seq("$"): 6095 tag_text = tags[-1] 6096 tags.append("$") 6097 else: 6098 self.raise_error("No closing $ found") 6099 6100 heredoc_start = self._curr 6101 6102 while self._curr: 6103 if self._match_text_seq(*tags, advance=False): 6104 this = self._find_sql(heredoc_start, self._prev) 6105 self._advance(len(tags)) 6106 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6107 6108 self._advance() 6109 6110 self.raise_error(f"No closing {''.join(tags)} found") 6111 return None 6112 6113 def _find_parser( 6114 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6115 ) -> t.Optional[t.Callable]: 6116 if not self._curr: 6117 return None 6118 6119 index = self._index 6120 this = [] 6121 while True: 6122 # The current token might be multiple words 6123 curr = self._curr.text.upper() 6124 key = curr.split(" ") 6125 this.append(curr) 6126 6127 self._advance() 6128 result, trie = in_trie(trie, key) 6129 if result == TrieResult.FAILED: 6130 break 6131 6132 if result == TrieResult.EXISTS: 6133 subparser = parsers[" ".join(this)] 6134 return subparser 6135 6136 self._retreat(index) 6137 return None 6138 6139 def _match(self, token_type, advance=True, expression=None): 6140 if not self._curr: 6141 return None 6142 6143 if self._curr.token_type == token_type: 6144 if advance: 6145 self._advance() 6146 self._add_comments(expression) 6147 return True 6148 6149 return None 6150 6151 def _match_set(self, types, advance=True): 6152 if not self._curr: 6153 return None 6154 6155 if self._curr.token_type in types: 6156 if advance: 6157 self._advance() 6158 return True 6159 6160 return None 6161 6162 def _match_pair(self, token_type_a, token_type_b, advance=True): 6163 if not self._curr or not self._next: 6164 return None 6165 6166 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6167 if advance: 6168 self._advance(2) 6169 return True 6170 6171 return None 6172 6173 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6174 if not self._match(TokenType.L_PAREN, expression=expression): 6175 self.raise_error("Expecting (") 6176 6177 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6178 if not self._match(TokenType.R_PAREN, expression=expression): 6179 self.raise_error("Expecting )") 6180 6181 def _match_texts(self, texts, advance=True): 6182 if self._curr and self._curr.text.upper() in texts: 6183 if advance: 6184 self._advance() 6185 return True 6186 return None 6187 6188 def _match_text_seq(self, *texts, advance=True): 6189 index = self._index 6190 for text in texts: 6191 if self._curr and self._curr.text.upper() == text: 6192 self._advance() 6193 else: 6194 self._retreat(index) 6195 return None 6196 6197 if not advance: 6198 self._retreat(index) 6199 6200 return True 6201 6202 def _replace_lambda( 6203 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6204 ) -> t.Optional[exp.Expression]: 6205 if not node: 6206 return node 6207 6208 for column in node.find_all(exp.Column): 6209 if column.parts[0].name in lambda_variables: 6210 dot_or_id = column.to_dot() if column.table else column.this 6211 parent = column.parent 6212 6213 while isinstance(parent, exp.Dot): 6214 if not isinstance(parent.parent, exp.Dot): 6215 parent.replace(dot_or_id) 6216 break 6217 parent = parent.parent 6218 else: 6219 if column is node: 6220 node = dot_or_id 6221 else: 6222 column.replace(dot_or_id) 6223 return node 6224 6225 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6226 start = self._prev 6227 6228 # Not to be confused with TRUNCATE(number, decimals) function call 6229 if self._match(TokenType.L_PAREN): 6230 self._retreat(self._index - 2) 6231 return self._parse_function() 6232 6233 # Clickhouse supports TRUNCATE DATABASE as well 6234 is_database = self._match(TokenType.DATABASE) 6235 6236 self._match(TokenType.TABLE) 6237 6238 exists = self._parse_exists(not_=False) 6239 6240 expressions = self._parse_csv( 6241 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6242 ) 6243 6244 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6245 6246 if self._match_text_seq("RESTART", "IDENTITY"): 6247 identity = "RESTART" 6248 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6249 identity = "CONTINUE" 6250 else: 6251 identity = None 6252 6253 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6254 option = self._prev.text 6255 else: 6256 option = None 6257 6258 partition = self._parse_partition() 6259 6260 # Fallback case 6261 if self._curr: 6262 return self._parse_as_command(start) 6263 6264 return self.expression( 6265 exp.TruncateTable, 6266 expressions=expressions, 6267 is_database=is_database, 6268 exists=exists, 6269 cluster=cluster, 6270 identity=identity, 6271 option=option, 6272 partition=partition, 6273 ) 6274 6275 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6276 this = self._parse_ordered(self._parse_opclass) 6277 6278 if not self._match(TokenType.WITH): 6279 return this 6280 6281 op = self._parse_var(any_token=True) 6282 6283 return self.expression(exp.WithOperator, this=this, op=op)
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
52def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 53 # Default argument order is base, expression 54 this = seq_get(args, 0) 55 expression = seq_get(args, 1) 56 57 if expression: 58 if not dialect.LOG_BASE_FIRST: 59 this, expression = expression, this 60 return exp.Log(this=this, expression=expression) 61 62 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
65def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 66 def _builder(args: t.List, dialect: Dialect) -> E: 67 expression = expr_type( 68 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 69 ) 70 if len(args) > 2 and expr_type is exp.JSONExtract: 71 expression.set("expressions", args[2:]) 72 73 return expression 74 75 return _builder
88class Parser(metaclass=_Parser): 89 """ 90 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 91 92 Args: 93 error_level: The desired error level. 94 Default: ErrorLevel.IMMEDIATE 95 error_message_context: The amount of context to capture from a query string when displaying 96 the error message (in number of characters). 97 Default: 100 98 max_errors: Maximum number of error messages to include in a raised ParseError. 99 This is only relevant if error_level is ErrorLevel.RAISE. 100 Default: 3 101 """ 102 103 FUNCTIONS: t.Dict[str, t.Callable] = { 104 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 105 "CONCAT": lambda args, dialect: exp.Concat( 106 expressions=args, 107 safe=not dialect.STRICT_STRING_CONCAT, 108 coalesce=dialect.CONCAT_COALESCE, 109 ), 110 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 111 expressions=args, 112 safe=not dialect.STRICT_STRING_CONCAT, 113 coalesce=dialect.CONCAT_COALESCE, 114 ), 115 "DATE_TO_DATE_STR": lambda args: exp.Cast( 116 this=seq_get(args, 0), 117 to=exp.DataType(this=exp.DataType.Type.TEXT), 118 ), 119 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 120 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 121 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 122 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 123 "LIKE": build_like, 124 "LOG": build_logarithm, 125 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 126 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 127 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 128 "TIME_TO_TIME_STR": lambda args: exp.Cast( 129 this=seq_get(args, 0), 130 to=exp.DataType(this=exp.DataType.Type.TEXT), 131 ), 132 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 133 this=exp.Cast( 134 this=seq_get(args, 0), 135 to=exp.DataType(this=exp.DataType.Type.TEXT), 136 ), 137 start=exp.Literal.number(1), 138 length=exp.Literal.number(10), 139 ), 140 "VAR_MAP": build_var_map, 141 } 142 143 NO_PAREN_FUNCTIONS = { 144 TokenType.CURRENT_DATE: exp.CurrentDate, 145 TokenType.CURRENT_DATETIME: exp.CurrentDate, 146 TokenType.CURRENT_TIME: exp.CurrentTime, 147 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 148 TokenType.CURRENT_USER: exp.CurrentUser, 149 } 150 151 STRUCT_TYPE_TOKENS = { 152 TokenType.NESTED, 153 TokenType.OBJECT, 154 TokenType.STRUCT, 155 } 156 157 NESTED_TYPE_TOKENS = { 158 TokenType.ARRAY, 159 TokenType.LOWCARDINALITY, 160 TokenType.MAP, 161 TokenType.NULLABLE, 162 *STRUCT_TYPE_TOKENS, 163 } 164 165 ENUM_TYPE_TOKENS = { 166 TokenType.ENUM, 167 TokenType.ENUM8, 168 TokenType.ENUM16, 169 } 170 171 AGGREGATE_TYPE_TOKENS = { 172 TokenType.AGGREGATEFUNCTION, 173 TokenType.SIMPLEAGGREGATEFUNCTION, 174 } 175 176 TYPE_TOKENS = { 177 TokenType.BIT, 178 TokenType.BOOLEAN, 179 TokenType.TINYINT, 180 TokenType.UTINYINT, 181 TokenType.SMALLINT, 182 TokenType.USMALLINT, 183 TokenType.INT, 184 TokenType.UINT, 185 TokenType.BIGINT, 186 TokenType.UBIGINT, 187 TokenType.INT128, 188 TokenType.UINT128, 189 TokenType.INT256, 190 TokenType.UINT256, 191 TokenType.MEDIUMINT, 192 TokenType.UMEDIUMINT, 193 TokenType.FIXEDSTRING, 194 TokenType.FLOAT, 195 TokenType.DOUBLE, 196 TokenType.CHAR, 197 TokenType.NCHAR, 198 TokenType.VARCHAR, 199 TokenType.NVARCHAR, 200 TokenType.BPCHAR, 201 TokenType.TEXT, 202 TokenType.MEDIUMTEXT, 203 TokenType.LONGTEXT, 204 TokenType.MEDIUMBLOB, 205 TokenType.LONGBLOB, 206 TokenType.BINARY, 207 TokenType.VARBINARY, 208 TokenType.JSON, 209 TokenType.JSONB, 210 TokenType.INTERVAL, 211 TokenType.TINYBLOB, 212 TokenType.TINYTEXT, 213 TokenType.TIME, 214 TokenType.TIMETZ, 215 TokenType.TIMESTAMP, 216 TokenType.TIMESTAMP_S, 217 TokenType.TIMESTAMP_MS, 218 TokenType.TIMESTAMP_NS, 219 TokenType.TIMESTAMPTZ, 220 TokenType.TIMESTAMPLTZ, 221 TokenType.DATETIME, 222 TokenType.DATETIME64, 223 TokenType.DATE, 224 TokenType.DATE32, 225 TokenType.INT4RANGE, 226 TokenType.INT4MULTIRANGE, 227 TokenType.INT8RANGE, 228 TokenType.INT8MULTIRANGE, 229 TokenType.NUMRANGE, 230 TokenType.NUMMULTIRANGE, 231 TokenType.TSRANGE, 232 TokenType.TSMULTIRANGE, 233 TokenType.TSTZRANGE, 234 TokenType.TSTZMULTIRANGE, 235 TokenType.DATERANGE, 236 TokenType.DATEMULTIRANGE, 237 TokenType.DECIMAL, 238 TokenType.UDECIMAL, 239 TokenType.BIGDECIMAL, 240 TokenType.UUID, 241 TokenType.GEOGRAPHY, 242 TokenType.GEOMETRY, 243 TokenType.HLLSKETCH, 244 TokenType.HSTORE, 245 TokenType.PSEUDO_TYPE, 246 TokenType.SUPER, 247 TokenType.SERIAL, 248 TokenType.SMALLSERIAL, 249 TokenType.BIGSERIAL, 250 TokenType.XML, 251 TokenType.YEAR, 252 TokenType.UNIQUEIDENTIFIER, 253 TokenType.USERDEFINED, 254 TokenType.MONEY, 255 TokenType.SMALLMONEY, 256 TokenType.ROWVERSION, 257 TokenType.IMAGE, 258 TokenType.VARIANT, 259 TokenType.OBJECT, 260 TokenType.OBJECT_IDENTIFIER, 261 TokenType.INET, 262 TokenType.IPADDRESS, 263 TokenType.IPPREFIX, 264 TokenType.IPV4, 265 TokenType.IPV6, 266 TokenType.UNKNOWN, 267 TokenType.NULL, 268 TokenType.NAME, 269 *ENUM_TYPE_TOKENS, 270 *NESTED_TYPE_TOKENS, 271 *AGGREGATE_TYPE_TOKENS, 272 } 273 274 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 275 TokenType.BIGINT: TokenType.UBIGINT, 276 TokenType.INT: TokenType.UINT, 277 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 278 TokenType.SMALLINT: TokenType.USMALLINT, 279 TokenType.TINYINT: TokenType.UTINYINT, 280 TokenType.DECIMAL: TokenType.UDECIMAL, 281 } 282 283 SUBQUERY_PREDICATES = { 284 TokenType.ANY: exp.Any, 285 TokenType.ALL: exp.All, 286 TokenType.EXISTS: exp.Exists, 287 TokenType.SOME: exp.Any, 288 } 289 290 RESERVED_TOKENS = { 291 *Tokenizer.SINGLE_TOKENS.values(), 292 TokenType.SELECT, 293 } - {TokenType.IDENTIFIER} 294 295 DB_CREATABLES = { 296 TokenType.DATABASE, 297 TokenType.SCHEMA, 298 TokenType.TABLE, 299 TokenType.VIEW, 300 TokenType.MODEL, 301 TokenType.DICTIONARY, 302 TokenType.SEQUENCE, 303 TokenType.STORAGE_INTEGRATION, 304 } 305 306 CREATABLES = { 307 TokenType.COLUMN, 308 TokenType.CONSTRAINT, 309 TokenType.FUNCTION, 310 TokenType.INDEX, 311 TokenType.PROCEDURE, 312 TokenType.FOREIGN_KEY, 313 *DB_CREATABLES, 314 } 315 316 # Tokens that can represent identifiers 317 ID_VAR_TOKENS = { 318 TokenType.VAR, 319 TokenType.ANTI, 320 TokenType.APPLY, 321 TokenType.ASC, 322 TokenType.ASOF, 323 TokenType.AUTO_INCREMENT, 324 TokenType.BEGIN, 325 TokenType.BPCHAR, 326 TokenType.CACHE, 327 TokenType.CASE, 328 TokenType.COLLATE, 329 TokenType.COMMAND, 330 TokenType.COMMENT, 331 TokenType.COMMIT, 332 TokenType.CONSTRAINT, 333 TokenType.DEFAULT, 334 TokenType.DELETE, 335 TokenType.DESC, 336 TokenType.DESCRIBE, 337 TokenType.DICTIONARY, 338 TokenType.DIV, 339 TokenType.END, 340 TokenType.EXECUTE, 341 TokenType.ESCAPE, 342 TokenType.FALSE, 343 TokenType.FIRST, 344 TokenType.FILTER, 345 TokenType.FINAL, 346 TokenType.FORMAT, 347 TokenType.FULL, 348 TokenType.IDENTIFIER, 349 TokenType.IS, 350 TokenType.ISNULL, 351 TokenType.INTERVAL, 352 TokenType.KEEP, 353 TokenType.KILL, 354 TokenType.LEFT, 355 TokenType.LOAD, 356 TokenType.MERGE, 357 TokenType.NATURAL, 358 TokenType.NEXT, 359 TokenType.OFFSET, 360 TokenType.OPERATOR, 361 TokenType.ORDINALITY, 362 TokenType.OVERLAPS, 363 TokenType.OVERWRITE, 364 TokenType.PARTITION, 365 TokenType.PERCENT, 366 TokenType.PIVOT, 367 TokenType.PRAGMA, 368 TokenType.RANGE, 369 TokenType.RECURSIVE, 370 TokenType.REFERENCES, 371 TokenType.REFRESH, 372 TokenType.REPLACE, 373 TokenType.RIGHT, 374 TokenType.ROW, 375 TokenType.ROWS, 376 TokenType.SEMI, 377 TokenType.SET, 378 TokenType.SETTINGS, 379 TokenType.SHOW, 380 TokenType.TEMPORARY, 381 TokenType.TOP, 382 TokenType.TRUE, 383 TokenType.TRUNCATE, 384 TokenType.UNIQUE, 385 TokenType.UNPIVOT, 386 TokenType.UPDATE, 387 TokenType.USE, 388 TokenType.VOLATILE, 389 TokenType.WINDOW, 390 *CREATABLES, 391 *SUBQUERY_PREDICATES, 392 *TYPE_TOKENS, 393 *NO_PAREN_FUNCTIONS, 394 } 395 396 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 397 398 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 399 TokenType.ANTI, 400 TokenType.APPLY, 401 TokenType.ASOF, 402 TokenType.FULL, 403 TokenType.LEFT, 404 TokenType.LOCK, 405 TokenType.NATURAL, 406 TokenType.OFFSET, 407 TokenType.RIGHT, 408 TokenType.SEMI, 409 TokenType.WINDOW, 410 } 411 412 ALIAS_TOKENS = ID_VAR_TOKENS 413 414 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 415 416 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 417 418 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 419 420 FUNC_TOKENS = { 421 TokenType.COLLATE, 422 TokenType.COMMAND, 423 TokenType.CURRENT_DATE, 424 TokenType.CURRENT_DATETIME, 425 TokenType.CURRENT_TIMESTAMP, 426 TokenType.CURRENT_TIME, 427 TokenType.CURRENT_USER, 428 TokenType.FILTER, 429 TokenType.FIRST, 430 TokenType.FORMAT, 431 TokenType.GLOB, 432 TokenType.IDENTIFIER, 433 TokenType.INDEX, 434 TokenType.ISNULL, 435 TokenType.ILIKE, 436 TokenType.INSERT, 437 TokenType.LIKE, 438 TokenType.MERGE, 439 TokenType.OFFSET, 440 TokenType.PRIMARY_KEY, 441 TokenType.RANGE, 442 TokenType.REPLACE, 443 TokenType.RLIKE, 444 TokenType.ROW, 445 TokenType.UNNEST, 446 TokenType.VAR, 447 TokenType.LEFT, 448 TokenType.RIGHT, 449 TokenType.SEQUENCE, 450 TokenType.DATE, 451 TokenType.DATETIME, 452 TokenType.TABLE, 453 TokenType.TIMESTAMP, 454 TokenType.TIMESTAMPTZ, 455 TokenType.TRUNCATE, 456 TokenType.WINDOW, 457 TokenType.XOR, 458 *TYPE_TOKENS, 459 *SUBQUERY_PREDICATES, 460 } 461 462 CONJUNCTION = { 463 TokenType.AND: exp.And, 464 TokenType.OR: exp.Or, 465 } 466 467 EQUALITY = { 468 TokenType.COLON_EQ: exp.PropertyEQ, 469 TokenType.EQ: exp.EQ, 470 TokenType.NEQ: exp.NEQ, 471 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 472 } 473 474 COMPARISON = { 475 TokenType.GT: exp.GT, 476 TokenType.GTE: exp.GTE, 477 TokenType.LT: exp.LT, 478 TokenType.LTE: exp.LTE, 479 } 480 481 BITWISE = { 482 TokenType.AMP: exp.BitwiseAnd, 483 TokenType.CARET: exp.BitwiseXor, 484 TokenType.PIPE: exp.BitwiseOr, 485 } 486 487 TERM = { 488 TokenType.DASH: exp.Sub, 489 TokenType.PLUS: exp.Add, 490 TokenType.MOD: exp.Mod, 491 TokenType.COLLATE: exp.Collate, 492 } 493 494 FACTOR = { 495 TokenType.DIV: exp.IntDiv, 496 TokenType.LR_ARROW: exp.Distance, 497 TokenType.SLASH: exp.Div, 498 TokenType.STAR: exp.Mul, 499 } 500 501 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 502 503 TIMES = { 504 TokenType.TIME, 505 TokenType.TIMETZ, 506 } 507 508 TIMESTAMPS = { 509 TokenType.TIMESTAMP, 510 TokenType.TIMESTAMPTZ, 511 TokenType.TIMESTAMPLTZ, 512 *TIMES, 513 } 514 515 SET_OPERATIONS = { 516 TokenType.UNION, 517 TokenType.INTERSECT, 518 TokenType.EXCEPT, 519 } 520 521 JOIN_METHODS = { 522 TokenType.ASOF, 523 TokenType.NATURAL, 524 TokenType.POSITIONAL, 525 } 526 527 JOIN_SIDES = { 528 TokenType.LEFT, 529 TokenType.RIGHT, 530 TokenType.FULL, 531 } 532 533 JOIN_KINDS = { 534 TokenType.INNER, 535 TokenType.OUTER, 536 TokenType.CROSS, 537 TokenType.SEMI, 538 TokenType.ANTI, 539 } 540 541 JOIN_HINTS: t.Set[str] = set() 542 543 LAMBDAS = { 544 TokenType.ARROW: lambda self, expressions: self.expression( 545 exp.Lambda, 546 this=self._replace_lambda( 547 self._parse_conjunction(), 548 {node.name for node in expressions}, 549 ), 550 expressions=expressions, 551 ), 552 TokenType.FARROW: lambda self, expressions: self.expression( 553 exp.Kwarg, 554 this=exp.var(expressions[0].name), 555 expression=self._parse_conjunction(), 556 ), 557 } 558 559 COLUMN_OPERATORS = { 560 TokenType.DOT: None, 561 TokenType.DCOLON: lambda self, this, to: self.expression( 562 exp.Cast if self.STRICT_CAST else exp.TryCast, 563 this=this, 564 to=to, 565 ), 566 TokenType.ARROW: lambda self, this, path: self.expression( 567 exp.JSONExtract, 568 this=this, 569 expression=self.dialect.to_json_path(path), 570 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 571 ), 572 TokenType.DARROW: lambda self, this, path: self.expression( 573 exp.JSONExtractScalar, 574 this=this, 575 expression=self.dialect.to_json_path(path), 576 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 577 ), 578 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 579 exp.JSONBExtract, 580 this=this, 581 expression=path, 582 ), 583 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 584 exp.JSONBExtractScalar, 585 this=this, 586 expression=path, 587 ), 588 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 589 exp.JSONBContains, 590 this=this, 591 expression=key, 592 ), 593 } 594 595 EXPRESSION_PARSERS = { 596 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 597 exp.Column: lambda self: self._parse_column(), 598 exp.Condition: lambda self: self._parse_conjunction(), 599 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 600 exp.Expression: lambda self: self._parse_expression(), 601 exp.From: lambda self: self._parse_from(), 602 exp.Group: lambda self: self._parse_group(), 603 exp.Having: lambda self: self._parse_having(), 604 exp.Identifier: lambda self: self._parse_id_var(), 605 exp.Join: lambda self: self._parse_join(), 606 exp.Lambda: lambda self: self._parse_lambda(), 607 exp.Lateral: lambda self: self._parse_lateral(), 608 exp.Limit: lambda self: self._parse_limit(), 609 exp.Offset: lambda self: self._parse_offset(), 610 exp.Order: lambda self: self._parse_order(), 611 exp.Ordered: lambda self: self._parse_ordered(), 612 exp.Properties: lambda self: self._parse_properties(), 613 exp.Qualify: lambda self: self._parse_qualify(), 614 exp.Returning: lambda self: self._parse_returning(), 615 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 616 exp.Table: lambda self: self._parse_table_parts(), 617 exp.TableAlias: lambda self: self._parse_table_alias(), 618 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 619 exp.Where: lambda self: self._parse_where(), 620 exp.Window: lambda self: self._parse_named_window(), 621 exp.With: lambda self: self._parse_with(), 622 "JOIN_TYPE": lambda self: self._parse_join_parts(), 623 } 624 625 STATEMENT_PARSERS = { 626 TokenType.ALTER: lambda self: self._parse_alter(), 627 TokenType.BEGIN: lambda self: self._parse_transaction(), 628 TokenType.CACHE: lambda self: self._parse_cache(), 629 TokenType.COMMENT: lambda self: self._parse_comment(), 630 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 631 TokenType.CREATE: lambda self: self._parse_create(), 632 TokenType.DELETE: lambda self: self._parse_delete(), 633 TokenType.DESC: lambda self: self._parse_describe(), 634 TokenType.DESCRIBE: lambda self: self._parse_describe(), 635 TokenType.DROP: lambda self: self._parse_drop(), 636 TokenType.INSERT: lambda self: self._parse_insert(), 637 TokenType.KILL: lambda self: self._parse_kill(), 638 TokenType.LOAD: lambda self: self._parse_load(), 639 TokenType.MERGE: lambda self: self._parse_merge(), 640 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 641 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 642 TokenType.REFRESH: lambda self: self._parse_refresh(), 643 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 644 TokenType.SET: lambda self: self._parse_set(), 645 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 646 TokenType.UNCACHE: lambda self: self._parse_uncache(), 647 TokenType.UPDATE: lambda self: self._parse_update(), 648 TokenType.USE: lambda self: self.expression( 649 exp.Use, 650 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 651 this=self._parse_table(schema=False), 652 ), 653 } 654 655 UNARY_PARSERS = { 656 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 657 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 658 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 659 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 660 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 661 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 662 } 663 664 STRING_PARSERS = { 665 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 666 exp.RawString, this=token.text 667 ), 668 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 669 exp.National, this=token.text 670 ), 671 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 672 TokenType.STRING: lambda self, token: self.expression( 673 exp.Literal, this=token.text, is_string=True 674 ), 675 TokenType.UNICODE_STRING: lambda self, token: self.expression( 676 exp.UnicodeString, 677 this=token.text, 678 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 679 ), 680 } 681 682 NUMERIC_PARSERS = { 683 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 684 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 685 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 686 TokenType.NUMBER: lambda self, token: self.expression( 687 exp.Literal, this=token.text, is_string=False 688 ), 689 } 690 691 PRIMARY_PARSERS = { 692 **STRING_PARSERS, 693 **NUMERIC_PARSERS, 694 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 695 TokenType.NULL: lambda self, _: self.expression(exp.Null), 696 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 697 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 698 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 699 TokenType.STAR: lambda self, _: self.expression( 700 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 701 ), 702 } 703 704 PLACEHOLDER_PARSERS = { 705 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 706 TokenType.PARAMETER: lambda self: self._parse_parameter(), 707 TokenType.COLON: lambda self: ( 708 self.expression(exp.Placeholder, this=self._prev.text) 709 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 710 else None 711 ), 712 } 713 714 RANGE_PARSERS = { 715 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 716 TokenType.GLOB: binary_range_parser(exp.Glob), 717 TokenType.ILIKE: binary_range_parser(exp.ILike), 718 TokenType.IN: lambda self, this: self._parse_in(this), 719 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 720 TokenType.IS: lambda self, this: self._parse_is(this), 721 TokenType.LIKE: binary_range_parser(exp.Like), 722 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 723 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 724 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 725 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 726 } 727 728 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 729 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 730 "AUTO": lambda self: self._parse_auto_property(), 731 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 732 "BACKUP": lambda self: self.expression( 733 exp.BackupProperty, this=self._parse_var(any_token=True) 734 ), 735 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 736 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 737 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 738 "CHECKSUM": lambda self: self._parse_checksum(), 739 "CLUSTER BY": lambda self: self._parse_cluster(), 740 "CLUSTERED": lambda self: self._parse_clustered_by(), 741 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 742 exp.CollateProperty, **kwargs 743 ), 744 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 745 "CONTAINS": lambda self: self._parse_contains_property(), 746 "COPY": lambda self: self._parse_copy_property(), 747 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 748 "DEFINER": lambda self: self._parse_definer(), 749 "DETERMINISTIC": lambda self: self.expression( 750 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 751 ), 752 "DISTKEY": lambda self: self._parse_distkey(), 753 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 754 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 755 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 756 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 757 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 758 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 759 "FREESPACE": lambda self: self._parse_freespace(), 760 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 761 "HEAP": lambda self: self.expression(exp.HeapProperty), 762 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 763 "IMMUTABLE": lambda self: self.expression( 764 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 765 ), 766 "INHERITS": lambda self: self.expression( 767 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 768 ), 769 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 770 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 771 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 772 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 773 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 774 "LIKE": lambda self: self._parse_create_like(), 775 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 776 "LOCK": lambda self: self._parse_locking(), 777 "LOCKING": lambda self: self._parse_locking(), 778 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 779 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 780 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 781 "MODIFIES": lambda self: self._parse_modifies_property(), 782 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 783 "NO": lambda self: self._parse_no_property(), 784 "ON": lambda self: self._parse_on_property(), 785 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 786 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 787 "PARTITION": lambda self: self._parse_partitioned_of(), 788 "PARTITION BY": lambda self: self._parse_partitioned_by(), 789 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 790 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 791 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 792 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 793 "READS": lambda self: self._parse_reads_property(), 794 "REMOTE": lambda self: self._parse_remote_with_connection(), 795 "RETURNS": lambda self: self._parse_returns(), 796 "ROW": lambda self: self._parse_row(), 797 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 798 "SAMPLE": lambda self: self.expression( 799 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 800 ), 801 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 802 "SETTINGS": lambda self: self.expression( 803 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 804 ), 805 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 806 "SORTKEY": lambda self: self._parse_sortkey(), 807 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 808 "STABLE": lambda self: self.expression( 809 exp.StabilityProperty, this=exp.Literal.string("STABLE") 810 ), 811 "STORED": lambda self: self._parse_stored(), 812 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 813 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 814 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 815 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 816 "TO": lambda self: self._parse_to_table(), 817 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 818 "TRANSFORM": lambda self: self.expression( 819 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 820 ), 821 "TTL": lambda self: self._parse_ttl(), 822 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 823 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 824 "VOLATILE": lambda self: self._parse_volatile_property(), 825 "WITH": lambda self: self._parse_with_property(), 826 } 827 828 CONSTRAINT_PARSERS = { 829 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 830 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 831 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 832 "CHARACTER SET": lambda self: self.expression( 833 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 834 ), 835 "CHECK": lambda self: self.expression( 836 exp.CheckColumnConstraint, 837 this=self._parse_wrapped(self._parse_conjunction), 838 enforced=self._match_text_seq("ENFORCED"), 839 ), 840 "COLLATE": lambda self: self.expression( 841 exp.CollateColumnConstraint, this=self._parse_var() 842 ), 843 "COMMENT": lambda self: self.expression( 844 exp.CommentColumnConstraint, this=self._parse_string() 845 ), 846 "COMPRESS": lambda self: self._parse_compress(), 847 "CLUSTERED": lambda self: self.expression( 848 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 849 ), 850 "NONCLUSTERED": lambda self: self.expression( 851 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 852 ), 853 "DEFAULT": lambda self: self.expression( 854 exp.DefaultColumnConstraint, this=self._parse_bitwise() 855 ), 856 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 857 "EPHEMERAL": lambda self: self.expression( 858 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 859 ), 860 "EXCLUDE": lambda self: self.expression( 861 exp.ExcludeColumnConstraint, this=self._parse_index_params() 862 ), 863 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 864 "FORMAT": lambda self: self.expression( 865 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 866 ), 867 "GENERATED": lambda self: self._parse_generated_as_identity(), 868 "IDENTITY": lambda self: self._parse_auto_increment(), 869 "INLINE": lambda self: self._parse_inline(), 870 "LIKE": lambda self: self._parse_create_like(), 871 "NOT": lambda self: self._parse_not_constraint(), 872 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 873 "ON": lambda self: ( 874 self._match(TokenType.UPDATE) 875 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 876 ) 877 or self.expression(exp.OnProperty, this=self._parse_id_var()), 878 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 879 "PERIOD": lambda self: self._parse_period_for_system_time(), 880 "PRIMARY KEY": lambda self: self._parse_primary_key(), 881 "REFERENCES": lambda self: self._parse_references(match=False), 882 "TITLE": lambda self: self.expression( 883 exp.TitleColumnConstraint, this=self._parse_var_or_string() 884 ), 885 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 886 "UNIQUE": lambda self: self._parse_unique(), 887 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 888 "WITH": lambda self: self.expression( 889 exp.Properties, expressions=self._parse_wrapped_properties() 890 ), 891 } 892 893 ALTER_PARSERS = { 894 "ADD": lambda self: self._parse_alter_table_add(), 895 "ALTER": lambda self: self._parse_alter_table_alter(), 896 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 897 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 898 "DROP": lambda self: self._parse_alter_table_drop(), 899 "RENAME": lambda self: self._parse_alter_table_rename(), 900 } 901 902 SCHEMA_UNNAMED_CONSTRAINTS = { 903 "CHECK", 904 "EXCLUDE", 905 "FOREIGN KEY", 906 "LIKE", 907 "PERIOD", 908 "PRIMARY KEY", 909 "UNIQUE", 910 } 911 912 NO_PAREN_FUNCTION_PARSERS = { 913 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 914 "CASE": lambda self: self._parse_case(), 915 "IF": lambda self: self._parse_if(), 916 "NEXT": lambda self: self._parse_next_value_for(), 917 } 918 919 INVALID_FUNC_NAME_TOKENS = { 920 TokenType.IDENTIFIER, 921 TokenType.STRING, 922 } 923 924 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 925 926 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 927 928 FUNCTION_PARSERS = { 929 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 930 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 931 "DECODE": lambda self: self._parse_decode(), 932 "EXTRACT": lambda self: self._parse_extract(), 933 "JSON_OBJECT": lambda self: self._parse_json_object(), 934 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 935 "JSON_TABLE": lambda self: self._parse_json_table(), 936 "MATCH": lambda self: self._parse_match_against(), 937 "OPENJSON": lambda self: self._parse_open_json(), 938 "POSITION": lambda self: self._parse_position(), 939 "PREDICT": lambda self: self._parse_predict(), 940 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 941 "STRING_AGG": lambda self: self._parse_string_agg(), 942 "SUBSTRING": lambda self: self._parse_substring(), 943 "TRIM": lambda self: self._parse_trim(), 944 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 945 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 946 } 947 948 QUERY_MODIFIER_PARSERS = { 949 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 950 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 951 TokenType.WHERE: lambda self: ("where", self._parse_where()), 952 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 953 TokenType.HAVING: lambda self: ("having", self._parse_having()), 954 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 955 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 956 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 957 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 958 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 959 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 960 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 961 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 962 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 963 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 964 TokenType.CLUSTER_BY: lambda self: ( 965 "cluster", 966 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 967 ), 968 TokenType.DISTRIBUTE_BY: lambda self: ( 969 "distribute", 970 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 971 ), 972 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 973 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 974 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 975 } 976 977 SET_PARSERS = { 978 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 979 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 980 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 981 "TRANSACTION": lambda self: self._parse_set_transaction(), 982 } 983 984 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 985 986 TYPE_LITERAL_PARSERS = { 987 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 988 } 989 990 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 991 992 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 993 994 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 995 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 996 "ISOLATION": ( 997 ("LEVEL", "REPEATABLE", "READ"), 998 ("LEVEL", "READ", "COMMITTED"), 999 ("LEVEL", "READ", "UNCOMITTED"), 1000 ("LEVEL", "SERIALIZABLE"), 1001 ), 1002 "READ": ("WRITE", "ONLY"), 1003 } 1004 1005 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1006 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1007 ) 1008 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1009 1010 CREATE_SEQUENCE: OPTIONS_TYPE = { 1011 "SCALE": ("EXTEND", "NOEXTEND"), 1012 "SHARD": ("EXTEND", "NOEXTEND"), 1013 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1014 **dict.fromkeys( 1015 ( 1016 "SESSION", 1017 "GLOBAL", 1018 "KEEP", 1019 "NOKEEP", 1020 "ORDER", 1021 "NOORDER", 1022 "NOCACHE", 1023 "CYCLE", 1024 "NOCYCLE", 1025 "NOMINVALUE", 1026 "NOMAXVALUE", 1027 "NOSCALE", 1028 "NOSHARD", 1029 ), 1030 tuple(), 1031 ), 1032 } 1033 1034 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1035 1036 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1037 1038 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1039 1040 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1041 1042 CLONE_KEYWORDS = {"CLONE", "COPY"} 1043 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1044 1045 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1046 1047 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1048 1049 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1050 1051 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1052 1053 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1054 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1055 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1056 1057 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1058 1059 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1060 1061 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1062 1063 DISTINCT_TOKENS = {TokenType.DISTINCT} 1064 1065 NULL_TOKENS = {TokenType.NULL} 1066 1067 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1068 1069 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1070 1071 STRICT_CAST = True 1072 1073 PREFIXED_PIVOT_COLUMNS = False 1074 IDENTIFY_PIVOT_STRINGS = False 1075 1076 LOG_DEFAULTS_TO_LN = False 1077 1078 # Whether ADD is present for each column added by ALTER TABLE 1079 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1080 1081 # Whether the table sample clause expects CSV syntax 1082 TABLESAMPLE_CSV = False 1083 1084 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1085 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1086 1087 # Whether the TRIM function expects the characters to trim as its first argument 1088 TRIM_PATTERN_FIRST = False 1089 1090 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1091 STRING_ALIASES = False 1092 1093 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1094 MODIFIERS_ATTACHED_TO_UNION = True 1095 UNION_MODIFIERS = {"order", "limit", "offset"} 1096 1097 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1098 NO_PAREN_IF_COMMANDS = True 1099 1100 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1101 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1102 1103 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1104 # If this is True and '(' is not found, the keyword will be treated as an identifier 1105 VALUES_FOLLOWED_BY_PAREN = True 1106 1107 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1108 SUPPORTS_IMPLICIT_UNNEST = False 1109 1110 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1111 INTERVAL_SPANS = True 1112 1113 # Whether a PARTITION clause can follow a table reference 1114 SUPPORTS_PARTITION_SELECTION = False 1115 1116 __slots__ = ( 1117 "error_level", 1118 "error_message_context", 1119 "max_errors", 1120 "dialect", 1121 "sql", 1122 "errors", 1123 "_tokens", 1124 "_index", 1125 "_curr", 1126 "_next", 1127 "_prev", 1128 "_prev_comments", 1129 ) 1130 1131 # Autofilled 1132 SHOW_TRIE: t.Dict = {} 1133 SET_TRIE: t.Dict = {} 1134 1135 def __init__( 1136 self, 1137 error_level: t.Optional[ErrorLevel] = None, 1138 error_message_context: int = 100, 1139 max_errors: int = 3, 1140 dialect: DialectType = None, 1141 ): 1142 from sqlglot.dialects import Dialect 1143 1144 self.error_level = error_level or ErrorLevel.IMMEDIATE 1145 self.error_message_context = error_message_context 1146 self.max_errors = max_errors 1147 self.dialect = Dialect.get_or_raise(dialect) 1148 self.reset() 1149 1150 def reset(self): 1151 self.sql = "" 1152 self.errors = [] 1153 self._tokens = [] 1154 self._index = 0 1155 self._curr = None 1156 self._next = None 1157 self._prev = None 1158 self._prev_comments = None 1159 1160 def parse( 1161 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1162 ) -> t.List[t.Optional[exp.Expression]]: 1163 """ 1164 Parses a list of tokens and returns a list of syntax trees, one tree 1165 per parsed SQL statement. 1166 1167 Args: 1168 raw_tokens: The list of tokens. 1169 sql: The original SQL string, used to produce helpful debug messages. 1170 1171 Returns: 1172 The list of the produced syntax trees. 1173 """ 1174 return self._parse( 1175 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1176 ) 1177 1178 def parse_into( 1179 self, 1180 expression_types: exp.IntoType, 1181 raw_tokens: t.List[Token], 1182 sql: t.Optional[str] = None, 1183 ) -> t.List[t.Optional[exp.Expression]]: 1184 """ 1185 Parses a list of tokens into a given Expression type. If a collection of Expression 1186 types is given instead, this method will try to parse the token list into each one 1187 of them, stopping at the first for which the parsing succeeds. 1188 1189 Args: 1190 expression_types: The expression type(s) to try and parse the token list into. 1191 raw_tokens: The list of tokens. 1192 sql: The original SQL string, used to produce helpful debug messages. 1193 1194 Returns: 1195 The target Expression. 1196 """ 1197 errors = [] 1198 for expression_type in ensure_list(expression_types): 1199 parser = self.EXPRESSION_PARSERS.get(expression_type) 1200 if not parser: 1201 raise TypeError(f"No parser registered for {expression_type}") 1202 1203 try: 1204 return self._parse(parser, raw_tokens, sql) 1205 except ParseError as e: 1206 e.errors[0]["into_expression"] = expression_type 1207 errors.append(e) 1208 1209 raise ParseError( 1210 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1211 errors=merge_errors(errors), 1212 ) from errors[-1] 1213 1214 def _parse( 1215 self, 1216 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1217 raw_tokens: t.List[Token], 1218 sql: t.Optional[str] = None, 1219 ) -> t.List[t.Optional[exp.Expression]]: 1220 self.reset() 1221 self.sql = sql or "" 1222 1223 total = len(raw_tokens) 1224 chunks: t.List[t.List[Token]] = [[]] 1225 1226 for i, token in enumerate(raw_tokens): 1227 if token.token_type == TokenType.SEMICOLON: 1228 if i < total - 1: 1229 chunks.append([]) 1230 else: 1231 chunks[-1].append(token) 1232 1233 expressions = [] 1234 1235 for tokens in chunks: 1236 self._index = -1 1237 self._tokens = tokens 1238 self._advance() 1239 1240 expressions.append(parse_method(self)) 1241 1242 if self._index < len(self._tokens): 1243 self.raise_error("Invalid expression / Unexpected token") 1244 1245 self.check_errors() 1246 1247 return expressions 1248 1249 def check_errors(self) -> None: 1250 """Logs or raises any found errors, depending on the chosen error level setting.""" 1251 if self.error_level == ErrorLevel.WARN: 1252 for error in self.errors: 1253 logger.error(str(error)) 1254 elif self.error_level == ErrorLevel.RAISE and self.errors: 1255 raise ParseError( 1256 concat_messages(self.errors, self.max_errors), 1257 errors=merge_errors(self.errors), 1258 ) 1259 1260 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1261 """ 1262 Appends an error in the list of recorded errors or raises it, depending on the chosen 1263 error level setting. 1264 """ 1265 token = token or self._curr or self._prev or Token.string("") 1266 start = token.start 1267 end = token.end + 1 1268 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1269 highlight = self.sql[start:end] 1270 end_context = self.sql[end : end + self.error_message_context] 1271 1272 error = ParseError.new( 1273 f"{message}. Line {token.line}, Col: {token.col}.\n" 1274 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1275 description=message, 1276 line=token.line, 1277 col=token.col, 1278 start_context=start_context, 1279 highlight=highlight, 1280 end_context=end_context, 1281 ) 1282 1283 if self.error_level == ErrorLevel.IMMEDIATE: 1284 raise error 1285 1286 self.errors.append(error) 1287 1288 def expression( 1289 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1290 ) -> E: 1291 """ 1292 Creates a new, validated Expression. 1293 1294 Args: 1295 exp_class: The expression class to instantiate. 1296 comments: An optional list of comments to attach to the expression. 1297 kwargs: The arguments to set for the expression along with their respective values. 1298 1299 Returns: 1300 The target expression. 1301 """ 1302 instance = exp_class(**kwargs) 1303 instance.add_comments(comments) if comments else self._add_comments(instance) 1304 return self.validate_expression(instance) 1305 1306 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1307 if expression and self._prev_comments: 1308 expression.add_comments(self._prev_comments) 1309 self._prev_comments = None 1310 1311 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1312 """ 1313 Validates an Expression, making sure that all its mandatory arguments are set. 1314 1315 Args: 1316 expression: The expression to validate. 1317 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1318 1319 Returns: 1320 The validated expression. 1321 """ 1322 if self.error_level != ErrorLevel.IGNORE: 1323 for error_message in expression.error_messages(args): 1324 self.raise_error(error_message) 1325 1326 return expression 1327 1328 def _find_sql(self, start: Token, end: Token) -> str: 1329 return self.sql[start.start : end.end + 1] 1330 1331 def _is_connected(self) -> bool: 1332 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1333 1334 def _advance(self, times: int = 1) -> None: 1335 self._index += times 1336 self._curr = seq_get(self._tokens, self._index) 1337 self._next = seq_get(self._tokens, self._index + 1) 1338 1339 if self._index > 0: 1340 self._prev = self._tokens[self._index - 1] 1341 self._prev_comments = self._prev.comments 1342 else: 1343 self._prev = None 1344 self._prev_comments = None 1345 1346 def _retreat(self, index: int) -> None: 1347 if index != self._index: 1348 self._advance(index - self._index) 1349 1350 def _warn_unsupported(self) -> None: 1351 if len(self._tokens) <= 1: 1352 return 1353 1354 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1355 # interested in emitting a warning for the one being currently processed. 1356 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1357 1358 logger.warning( 1359 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1360 ) 1361 1362 def _parse_command(self) -> exp.Command: 1363 self._warn_unsupported() 1364 return self.expression( 1365 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1366 ) 1367 1368 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1369 """ 1370 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1371 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1372 the parser state accordingly 1373 """ 1374 index = self._index 1375 error_level = self.error_level 1376 1377 self.error_level = ErrorLevel.IMMEDIATE 1378 try: 1379 this = parse_method() 1380 except ParseError: 1381 this = None 1382 finally: 1383 if not this or retreat: 1384 self._retreat(index) 1385 self.error_level = error_level 1386 1387 return this 1388 1389 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1390 start = self._prev 1391 exists = self._parse_exists() if allow_exists else None 1392 1393 self._match(TokenType.ON) 1394 1395 materialized = self._match_text_seq("MATERIALIZED") 1396 kind = self._match_set(self.CREATABLES) and self._prev 1397 if not kind: 1398 return self._parse_as_command(start) 1399 1400 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1401 this = self._parse_user_defined_function(kind=kind.token_type) 1402 elif kind.token_type == TokenType.TABLE: 1403 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1404 elif kind.token_type == TokenType.COLUMN: 1405 this = self._parse_column() 1406 else: 1407 this = self._parse_id_var() 1408 1409 self._match(TokenType.IS) 1410 1411 return self.expression( 1412 exp.Comment, 1413 this=this, 1414 kind=kind.text, 1415 expression=self._parse_string(), 1416 exists=exists, 1417 materialized=materialized, 1418 ) 1419 1420 def _parse_to_table( 1421 self, 1422 ) -> exp.ToTableProperty: 1423 table = self._parse_table_parts(schema=True) 1424 return self.expression(exp.ToTableProperty, this=table) 1425 1426 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1427 def _parse_ttl(self) -> exp.Expression: 1428 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1429 this = self._parse_bitwise() 1430 1431 if self._match_text_seq("DELETE"): 1432 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1433 if self._match_text_seq("RECOMPRESS"): 1434 return self.expression( 1435 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1436 ) 1437 if self._match_text_seq("TO", "DISK"): 1438 return self.expression( 1439 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1440 ) 1441 if self._match_text_seq("TO", "VOLUME"): 1442 return self.expression( 1443 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1444 ) 1445 1446 return this 1447 1448 expressions = self._parse_csv(_parse_ttl_action) 1449 where = self._parse_where() 1450 group = self._parse_group() 1451 1452 aggregates = None 1453 if group and self._match(TokenType.SET): 1454 aggregates = self._parse_csv(self._parse_set_item) 1455 1456 return self.expression( 1457 exp.MergeTreeTTL, 1458 expressions=expressions, 1459 where=where, 1460 group=group, 1461 aggregates=aggregates, 1462 ) 1463 1464 def _parse_statement(self) -> t.Optional[exp.Expression]: 1465 if self._curr is None: 1466 return None 1467 1468 if self._match_set(self.STATEMENT_PARSERS): 1469 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1470 1471 if self._match_set(Tokenizer.COMMANDS): 1472 return self._parse_command() 1473 1474 expression = self._parse_expression() 1475 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1476 return self._parse_query_modifiers(expression) 1477 1478 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1479 start = self._prev 1480 temporary = self._match(TokenType.TEMPORARY) 1481 materialized = self._match_text_seq("MATERIALIZED") 1482 1483 kind = self._match_set(self.CREATABLES) and self._prev.text 1484 if not kind: 1485 return self._parse_as_command(start) 1486 1487 if_exists = exists or self._parse_exists() 1488 table = self._parse_table_parts( 1489 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1490 ) 1491 1492 if self._match(TokenType.L_PAREN, advance=False): 1493 expressions = self._parse_wrapped_csv(self._parse_types) 1494 else: 1495 expressions = None 1496 1497 return self.expression( 1498 exp.Drop, 1499 comments=start.comments, 1500 exists=if_exists, 1501 this=table, 1502 expressions=expressions, 1503 kind=kind, 1504 temporary=temporary, 1505 materialized=materialized, 1506 cascade=self._match_text_seq("CASCADE"), 1507 constraints=self._match_text_seq("CONSTRAINTS"), 1508 purge=self._match_text_seq("PURGE"), 1509 ) 1510 1511 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1512 return ( 1513 self._match_text_seq("IF") 1514 and (not not_ or self._match(TokenType.NOT)) 1515 and self._match(TokenType.EXISTS) 1516 ) 1517 1518 def _parse_create(self) -> exp.Create | exp.Command: 1519 # Note: this can't be None because we've matched a statement parser 1520 start = self._prev 1521 comments = self._prev_comments 1522 1523 replace = ( 1524 start.token_type == TokenType.REPLACE 1525 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1526 or self._match_pair(TokenType.OR, TokenType.ALTER) 1527 ) 1528 1529 unique = self._match(TokenType.UNIQUE) 1530 1531 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1532 self._advance() 1533 1534 properties = None 1535 create_token = self._match_set(self.CREATABLES) and self._prev 1536 1537 if not create_token: 1538 # exp.Properties.Location.POST_CREATE 1539 properties = self._parse_properties() 1540 create_token = self._match_set(self.CREATABLES) and self._prev 1541 1542 if not properties or not create_token: 1543 return self._parse_as_command(start) 1544 1545 exists = self._parse_exists(not_=True) 1546 this = None 1547 expression: t.Optional[exp.Expression] = None 1548 indexes = None 1549 no_schema_binding = None 1550 begin = None 1551 end = None 1552 clone = None 1553 1554 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1555 nonlocal properties 1556 if properties and temp_props: 1557 properties.expressions.extend(temp_props.expressions) 1558 elif temp_props: 1559 properties = temp_props 1560 1561 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1562 this = self._parse_user_defined_function(kind=create_token.token_type) 1563 1564 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1565 extend_props(self._parse_properties()) 1566 1567 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1568 1569 if not expression: 1570 if self._match(TokenType.COMMAND): 1571 expression = self._parse_as_command(self._prev) 1572 else: 1573 begin = self._match(TokenType.BEGIN) 1574 return_ = self._match_text_seq("RETURN") 1575 1576 if self._match(TokenType.STRING, advance=False): 1577 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1578 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1579 expression = self._parse_string() 1580 extend_props(self._parse_properties()) 1581 else: 1582 expression = self._parse_statement() 1583 1584 end = self._match_text_seq("END") 1585 1586 if return_: 1587 expression = self.expression(exp.Return, this=expression) 1588 elif create_token.token_type == TokenType.INDEX: 1589 this = self._parse_index(index=self._parse_id_var()) 1590 elif create_token.token_type in self.DB_CREATABLES: 1591 table_parts = self._parse_table_parts( 1592 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1593 ) 1594 1595 # exp.Properties.Location.POST_NAME 1596 self._match(TokenType.COMMA) 1597 extend_props(self._parse_properties(before=True)) 1598 1599 this = self._parse_schema(this=table_parts) 1600 1601 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1602 extend_props(self._parse_properties()) 1603 1604 self._match(TokenType.ALIAS) 1605 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1606 # exp.Properties.Location.POST_ALIAS 1607 extend_props(self._parse_properties()) 1608 1609 if create_token.token_type == TokenType.SEQUENCE: 1610 expression = self._parse_types() 1611 extend_props(self._parse_properties()) 1612 else: 1613 expression = self._parse_ddl_select() 1614 1615 if create_token.token_type == TokenType.TABLE: 1616 # exp.Properties.Location.POST_EXPRESSION 1617 extend_props(self._parse_properties()) 1618 1619 indexes = [] 1620 while True: 1621 index = self._parse_index() 1622 1623 # exp.Properties.Location.POST_INDEX 1624 extend_props(self._parse_properties()) 1625 1626 if not index: 1627 break 1628 else: 1629 self._match(TokenType.COMMA) 1630 indexes.append(index) 1631 elif create_token.token_type == TokenType.VIEW: 1632 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1633 no_schema_binding = True 1634 1635 shallow = self._match_text_seq("SHALLOW") 1636 1637 if self._match_texts(self.CLONE_KEYWORDS): 1638 copy = self._prev.text.lower() == "copy" 1639 clone = self.expression( 1640 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1641 ) 1642 1643 if self._curr: 1644 return self._parse_as_command(start) 1645 1646 return self.expression( 1647 exp.Create, 1648 comments=comments, 1649 this=this, 1650 kind=create_token.text.upper(), 1651 replace=replace, 1652 unique=unique, 1653 expression=expression, 1654 exists=exists, 1655 properties=properties, 1656 indexes=indexes, 1657 no_schema_binding=no_schema_binding, 1658 begin=begin, 1659 end=end, 1660 clone=clone, 1661 ) 1662 1663 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1664 seq = exp.SequenceProperties() 1665 1666 options = [] 1667 index = self._index 1668 1669 while self._curr: 1670 if self._match_text_seq("INCREMENT"): 1671 self._match_text_seq("BY") 1672 self._match_text_seq("=") 1673 seq.set("increment", self._parse_term()) 1674 elif self._match_text_seq("MINVALUE"): 1675 seq.set("minvalue", self._parse_term()) 1676 elif self._match_text_seq("MAXVALUE"): 1677 seq.set("maxvalue", self._parse_term()) 1678 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1679 self._match_text_seq("=") 1680 seq.set("start", self._parse_term()) 1681 elif self._match_text_seq("CACHE"): 1682 # T-SQL allows empty CACHE which is initialized dynamically 1683 seq.set("cache", self._parse_number() or True) 1684 elif self._match_text_seq("OWNED", "BY"): 1685 # "OWNED BY NONE" is the default 1686 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1687 else: 1688 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1689 if opt: 1690 options.append(opt) 1691 else: 1692 break 1693 1694 seq.set("options", options if options else None) 1695 return None if self._index == index else seq 1696 1697 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1698 # only used for teradata currently 1699 self._match(TokenType.COMMA) 1700 1701 kwargs = { 1702 "no": self._match_text_seq("NO"), 1703 "dual": self._match_text_seq("DUAL"), 1704 "before": self._match_text_seq("BEFORE"), 1705 "default": self._match_text_seq("DEFAULT"), 1706 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1707 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1708 "after": self._match_text_seq("AFTER"), 1709 "minimum": self._match_texts(("MIN", "MINIMUM")), 1710 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1711 } 1712 1713 if self._match_texts(self.PROPERTY_PARSERS): 1714 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1715 try: 1716 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1717 except TypeError: 1718 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1719 1720 return None 1721 1722 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1723 return self._parse_wrapped_csv(self._parse_property) 1724 1725 def _parse_property(self) -> t.Optional[exp.Expression]: 1726 if self._match_texts(self.PROPERTY_PARSERS): 1727 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1728 1729 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1730 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1731 1732 if self._match_text_seq("COMPOUND", "SORTKEY"): 1733 return self._parse_sortkey(compound=True) 1734 1735 if self._match_text_seq("SQL", "SECURITY"): 1736 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1737 1738 index = self._index 1739 key = self._parse_column() 1740 1741 if not self._match(TokenType.EQ): 1742 self._retreat(index) 1743 return self._parse_sequence_properties() 1744 1745 return self.expression( 1746 exp.Property, 1747 this=key.to_dot() if isinstance(key, exp.Column) else key, 1748 value=self._parse_bitwise() or self._parse_var(any_token=True), 1749 ) 1750 1751 def _parse_stored(self) -> exp.FileFormatProperty: 1752 self._match(TokenType.ALIAS) 1753 1754 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1755 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1756 1757 return self.expression( 1758 exp.FileFormatProperty, 1759 this=( 1760 self.expression( 1761 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1762 ) 1763 if input_format or output_format 1764 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1765 ), 1766 ) 1767 1768 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1769 self._match(TokenType.EQ) 1770 self._match(TokenType.ALIAS) 1771 field = self._parse_field() 1772 if isinstance(field, exp.Identifier) and not field.quoted: 1773 field = exp.var(field) 1774 1775 return self.expression(exp_class, this=field, **kwargs) 1776 1777 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1778 properties = [] 1779 while True: 1780 if before: 1781 prop = self._parse_property_before() 1782 else: 1783 prop = self._parse_property() 1784 if not prop: 1785 break 1786 for p in ensure_list(prop): 1787 properties.append(p) 1788 1789 if properties: 1790 return self.expression(exp.Properties, expressions=properties) 1791 1792 return None 1793 1794 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1795 return self.expression( 1796 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1797 ) 1798 1799 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1800 if self._index >= 2: 1801 pre_volatile_token = self._tokens[self._index - 2] 1802 else: 1803 pre_volatile_token = None 1804 1805 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1806 return exp.VolatileProperty() 1807 1808 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1809 1810 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1811 self._match_pair(TokenType.EQ, TokenType.ON) 1812 1813 prop = self.expression(exp.WithSystemVersioningProperty) 1814 if self._match(TokenType.L_PAREN): 1815 self._match_text_seq("HISTORY_TABLE", "=") 1816 prop.set("this", self._parse_table_parts()) 1817 1818 if self._match(TokenType.COMMA): 1819 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1820 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1821 1822 self._match_r_paren() 1823 1824 return prop 1825 1826 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1827 if self._match(TokenType.L_PAREN, advance=False): 1828 return self._parse_wrapped_properties() 1829 1830 if self._match_text_seq("JOURNAL"): 1831 return self._parse_withjournaltable() 1832 1833 if self._match_texts(self.VIEW_ATTRIBUTES): 1834 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1835 1836 if self._match_text_seq("DATA"): 1837 return self._parse_withdata(no=False) 1838 elif self._match_text_seq("NO", "DATA"): 1839 return self._parse_withdata(no=True) 1840 1841 if not self._next: 1842 return None 1843 1844 return self._parse_withisolatedloading() 1845 1846 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1847 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1848 self._match(TokenType.EQ) 1849 1850 user = self._parse_id_var() 1851 self._match(TokenType.PARAMETER) 1852 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1853 1854 if not user or not host: 1855 return None 1856 1857 return exp.DefinerProperty(this=f"{user}@{host}") 1858 1859 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1860 self._match(TokenType.TABLE) 1861 self._match(TokenType.EQ) 1862 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1863 1864 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1865 return self.expression(exp.LogProperty, no=no) 1866 1867 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1868 return self.expression(exp.JournalProperty, **kwargs) 1869 1870 def _parse_checksum(self) -> exp.ChecksumProperty: 1871 self._match(TokenType.EQ) 1872 1873 on = None 1874 if self._match(TokenType.ON): 1875 on = True 1876 elif self._match_text_seq("OFF"): 1877 on = False 1878 1879 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1880 1881 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1882 return self.expression( 1883 exp.Cluster, 1884 expressions=( 1885 self._parse_wrapped_csv(self._parse_ordered) 1886 if wrapped 1887 else self._parse_csv(self._parse_ordered) 1888 ), 1889 ) 1890 1891 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1892 self._match_text_seq("BY") 1893 1894 self._match_l_paren() 1895 expressions = self._parse_csv(self._parse_column) 1896 self._match_r_paren() 1897 1898 if self._match_text_seq("SORTED", "BY"): 1899 self._match_l_paren() 1900 sorted_by = self._parse_csv(self._parse_ordered) 1901 self._match_r_paren() 1902 else: 1903 sorted_by = None 1904 1905 self._match(TokenType.INTO) 1906 buckets = self._parse_number() 1907 self._match_text_seq("BUCKETS") 1908 1909 return self.expression( 1910 exp.ClusteredByProperty, 1911 expressions=expressions, 1912 sorted_by=sorted_by, 1913 buckets=buckets, 1914 ) 1915 1916 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1917 if not self._match_text_seq("GRANTS"): 1918 self._retreat(self._index - 1) 1919 return None 1920 1921 return self.expression(exp.CopyGrantsProperty) 1922 1923 def _parse_freespace(self) -> exp.FreespaceProperty: 1924 self._match(TokenType.EQ) 1925 return self.expression( 1926 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1927 ) 1928 1929 def _parse_mergeblockratio( 1930 self, no: bool = False, default: bool = False 1931 ) -> exp.MergeBlockRatioProperty: 1932 if self._match(TokenType.EQ): 1933 return self.expression( 1934 exp.MergeBlockRatioProperty, 1935 this=self._parse_number(), 1936 percent=self._match(TokenType.PERCENT), 1937 ) 1938 1939 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1940 1941 def _parse_datablocksize( 1942 self, 1943 default: t.Optional[bool] = None, 1944 minimum: t.Optional[bool] = None, 1945 maximum: t.Optional[bool] = None, 1946 ) -> exp.DataBlocksizeProperty: 1947 self._match(TokenType.EQ) 1948 size = self._parse_number() 1949 1950 units = None 1951 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1952 units = self._prev.text 1953 1954 return self.expression( 1955 exp.DataBlocksizeProperty, 1956 size=size, 1957 units=units, 1958 default=default, 1959 minimum=minimum, 1960 maximum=maximum, 1961 ) 1962 1963 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1964 self._match(TokenType.EQ) 1965 always = self._match_text_seq("ALWAYS") 1966 manual = self._match_text_seq("MANUAL") 1967 never = self._match_text_seq("NEVER") 1968 default = self._match_text_seq("DEFAULT") 1969 1970 autotemp = None 1971 if self._match_text_seq("AUTOTEMP"): 1972 autotemp = self._parse_schema() 1973 1974 return self.expression( 1975 exp.BlockCompressionProperty, 1976 always=always, 1977 manual=manual, 1978 never=never, 1979 default=default, 1980 autotemp=autotemp, 1981 ) 1982 1983 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 1984 index = self._index 1985 no = self._match_text_seq("NO") 1986 concurrent = self._match_text_seq("CONCURRENT") 1987 1988 if not self._match_text_seq("ISOLATED", "LOADING"): 1989 self._retreat(index) 1990 return None 1991 1992 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 1993 return self.expression( 1994 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 1995 ) 1996 1997 def _parse_locking(self) -> exp.LockingProperty: 1998 if self._match(TokenType.TABLE): 1999 kind = "TABLE" 2000 elif self._match(TokenType.VIEW): 2001 kind = "VIEW" 2002 elif self._match(TokenType.ROW): 2003 kind = "ROW" 2004 elif self._match_text_seq("DATABASE"): 2005 kind = "DATABASE" 2006 else: 2007 kind = None 2008 2009 if kind in ("DATABASE", "TABLE", "VIEW"): 2010 this = self._parse_table_parts() 2011 else: 2012 this = None 2013 2014 if self._match(TokenType.FOR): 2015 for_or_in = "FOR" 2016 elif self._match(TokenType.IN): 2017 for_or_in = "IN" 2018 else: 2019 for_or_in = None 2020 2021 if self._match_text_seq("ACCESS"): 2022 lock_type = "ACCESS" 2023 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2024 lock_type = "EXCLUSIVE" 2025 elif self._match_text_seq("SHARE"): 2026 lock_type = "SHARE" 2027 elif self._match_text_seq("READ"): 2028 lock_type = "READ" 2029 elif self._match_text_seq("WRITE"): 2030 lock_type = "WRITE" 2031 elif self._match_text_seq("CHECKSUM"): 2032 lock_type = "CHECKSUM" 2033 else: 2034 lock_type = None 2035 2036 override = self._match_text_seq("OVERRIDE") 2037 2038 return self.expression( 2039 exp.LockingProperty, 2040 this=this, 2041 kind=kind, 2042 for_or_in=for_or_in, 2043 lock_type=lock_type, 2044 override=override, 2045 ) 2046 2047 def _parse_partition_by(self) -> t.List[exp.Expression]: 2048 if self._match(TokenType.PARTITION_BY): 2049 return self._parse_csv(self._parse_conjunction) 2050 return [] 2051 2052 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2053 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2054 if self._match_text_seq("MINVALUE"): 2055 return exp.var("MINVALUE") 2056 if self._match_text_seq("MAXVALUE"): 2057 return exp.var("MAXVALUE") 2058 return self._parse_bitwise() 2059 2060 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2061 expression = None 2062 from_expressions = None 2063 to_expressions = None 2064 2065 if self._match(TokenType.IN): 2066 this = self._parse_wrapped_csv(self._parse_bitwise) 2067 elif self._match(TokenType.FROM): 2068 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2069 self._match_text_seq("TO") 2070 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2071 elif self._match_text_seq("WITH", "(", "MODULUS"): 2072 this = self._parse_number() 2073 self._match_text_seq(",", "REMAINDER") 2074 expression = self._parse_number() 2075 self._match_r_paren() 2076 else: 2077 self.raise_error("Failed to parse partition bound spec.") 2078 2079 return self.expression( 2080 exp.PartitionBoundSpec, 2081 this=this, 2082 expression=expression, 2083 from_expressions=from_expressions, 2084 to_expressions=to_expressions, 2085 ) 2086 2087 # https://www.postgresql.org/docs/current/sql-createtable.html 2088 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2089 if not self._match_text_seq("OF"): 2090 self._retreat(self._index - 1) 2091 return None 2092 2093 this = self._parse_table(schema=True) 2094 2095 if self._match(TokenType.DEFAULT): 2096 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2097 elif self._match_text_seq("FOR", "VALUES"): 2098 expression = self._parse_partition_bound_spec() 2099 else: 2100 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2101 2102 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2103 2104 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2105 self._match(TokenType.EQ) 2106 return self.expression( 2107 exp.PartitionedByProperty, 2108 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2109 ) 2110 2111 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2112 if self._match_text_seq("AND", "STATISTICS"): 2113 statistics = True 2114 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2115 statistics = False 2116 else: 2117 statistics = None 2118 2119 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2120 2121 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2122 if self._match_text_seq("SQL"): 2123 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2124 return None 2125 2126 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2127 if self._match_text_seq("SQL", "DATA"): 2128 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2129 return None 2130 2131 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2132 if self._match_text_seq("PRIMARY", "INDEX"): 2133 return exp.NoPrimaryIndexProperty() 2134 if self._match_text_seq("SQL"): 2135 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2136 return None 2137 2138 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2139 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2140 return exp.OnCommitProperty() 2141 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2142 return exp.OnCommitProperty(delete=True) 2143 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2144 2145 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2146 if self._match_text_seq("SQL", "DATA"): 2147 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2148 return None 2149 2150 def _parse_distkey(self) -> exp.DistKeyProperty: 2151 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2152 2153 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2154 table = self._parse_table(schema=True) 2155 2156 options = [] 2157 while self._match_texts(("INCLUDING", "EXCLUDING")): 2158 this = self._prev.text.upper() 2159 2160 id_var = self._parse_id_var() 2161 if not id_var: 2162 return None 2163 2164 options.append( 2165 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2166 ) 2167 2168 return self.expression(exp.LikeProperty, this=table, expressions=options) 2169 2170 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2171 return self.expression( 2172 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2173 ) 2174 2175 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2176 self._match(TokenType.EQ) 2177 return self.expression( 2178 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2179 ) 2180 2181 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2182 self._match_text_seq("WITH", "CONNECTION") 2183 return self.expression( 2184 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2185 ) 2186 2187 def _parse_returns(self) -> exp.ReturnsProperty: 2188 value: t.Optional[exp.Expression] 2189 is_table = self._match(TokenType.TABLE) 2190 2191 if is_table: 2192 if self._match(TokenType.LT): 2193 value = self.expression( 2194 exp.Schema, 2195 this="TABLE", 2196 expressions=self._parse_csv(self._parse_struct_types), 2197 ) 2198 if not self._match(TokenType.GT): 2199 self.raise_error("Expecting >") 2200 else: 2201 value = self._parse_schema(exp.var("TABLE")) 2202 else: 2203 value = self._parse_types() 2204 2205 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2206 2207 def _parse_describe(self) -> exp.Describe: 2208 kind = self._match_set(self.CREATABLES) and self._prev.text 2209 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2210 if not self._match_set(self.ID_VAR_TOKENS, advance=False): 2211 style = None 2212 self._retreat(self._index - 1) 2213 this = self._parse_table(schema=True) 2214 properties = self._parse_properties() 2215 expressions = properties.expressions if properties else None 2216 return self.expression( 2217 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2218 ) 2219 2220 def _parse_insert(self) -> exp.Insert: 2221 comments = ensure_list(self._prev_comments) 2222 hint = self._parse_hint() 2223 overwrite = self._match(TokenType.OVERWRITE) 2224 ignore = self._match(TokenType.IGNORE) 2225 local = self._match_text_seq("LOCAL") 2226 alternative = None 2227 is_function = None 2228 2229 if self._match_text_seq("DIRECTORY"): 2230 this: t.Optional[exp.Expression] = self.expression( 2231 exp.Directory, 2232 this=self._parse_var_or_string(), 2233 local=local, 2234 row_format=self._parse_row_format(match_row=True), 2235 ) 2236 else: 2237 if self._match(TokenType.OR): 2238 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2239 2240 self._match(TokenType.INTO) 2241 comments += ensure_list(self._prev_comments) 2242 self._match(TokenType.TABLE) 2243 is_function = self._match(TokenType.FUNCTION) 2244 2245 this = ( 2246 self._parse_table(schema=True, parse_partition=True) 2247 if not is_function 2248 else self._parse_function() 2249 ) 2250 2251 returning = self._parse_returning() 2252 2253 return self.expression( 2254 exp.Insert, 2255 comments=comments, 2256 hint=hint, 2257 is_function=is_function, 2258 this=this, 2259 stored=self._match_text_seq("STORED") and self._parse_stored(), 2260 by_name=self._match_text_seq("BY", "NAME"), 2261 exists=self._parse_exists(), 2262 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2263 and self._parse_conjunction(), 2264 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2265 conflict=self._parse_on_conflict(), 2266 returning=returning or self._parse_returning(), 2267 overwrite=overwrite, 2268 alternative=alternative, 2269 ignore=ignore, 2270 ) 2271 2272 def _parse_kill(self) -> exp.Kill: 2273 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2274 2275 return self.expression( 2276 exp.Kill, 2277 this=self._parse_primary(), 2278 kind=kind, 2279 ) 2280 2281 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2282 conflict = self._match_text_seq("ON", "CONFLICT") 2283 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2284 2285 if not conflict and not duplicate: 2286 return None 2287 2288 conflict_keys = None 2289 constraint = None 2290 2291 if conflict: 2292 if self._match_text_seq("ON", "CONSTRAINT"): 2293 constraint = self._parse_id_var() 2294 elif self._match(TokenType.L_PAREN): 2295 conflict_keys = self._parse_csv(self._parse_id_var) 2296 self._match_r_paren() 2297 2298 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2299 if self._prev.token_type == TokenType.UPDATE: 2300 self._match(TokenType.SET) 2301 expressions = self._parse_csv(self._parse_equality) 2302 else: 2303 expressions = None 2304 2305 return self.expression( 2306 exp.OnConflict, 2307 duplicate=duplicate, 2308 expressions=expressions, 2309 action=action, 2310 conflict_keys=conflict_keys, 2311 constraint=constraint, 2312 ) 2313 2314 def _parse_returning(self) -> t.Optional[exp.Returning]: 2315 if not self._match(TokenType.RETURNING): 2316 return None 2317 return self.expression( 2318 exp.Returning, 2319 expressions=self._parse_csv(self._parse_expression), 2320 into=self._match(TokenType.INTO) and self._parse_table_part(), 2321 ) 2322 2323 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2324 if not self._match(TokenType.FORMAT): 2325 return None 2326 return self._parse_row_format() 2327 2328 def _parse_row_format( 2329 self, match_row: bool = False 2330 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2331 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2332 return None 2333 2334 if self._match_text_seq("SERDE"): 2335 this = self._parse_string() 2336 2337 serde_properties = None 2338 if self._match(TokenType.SERDE_PROPERTIES): 2339 serde_properties = self.expression( 2340 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2341 ) 2342 2343 return self.expression( 2344 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2345 ) 2346 2347 self._match_text_seq("DELIMITED") 2348 2349 kwargs = {} 2350 2351 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2352 kwargs["fields"] = self._parse_string() 2353 if self._match_text_seq("ESCAPED", "BY"): 2354 kwargs["escaped"] = self._parse_string() 2355 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2356 kwargs["collection_items"] = self._parse_string() 2357 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2358 kwargs["map_keys"] = self._parse_string() 2359 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2360 kwargs["lines"] = self._parse_string() 2361 if self._match_text_seq("NULL", "DEFINED", "AS"): 2362 kwargs["null"] = self._parse_string() 2363 2364 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2365 2366 def _parse_load(self) -> exp.LoadData | exp.Command: 2367 if self._match_text_seq("DATA"): 2368 local = self._match_text_seq("LOCAL") 2369 self._match_text_seq("INPATH") 2370 inpath = self._parse_string() 2371 overwrite = self._match(TokenType.OVERWRITE) 2372 self._match_pair(TokenType.INTO, TokenType.TABLE) 2373 2374 return self.expression( 2375 exp.LoadData, 2376 this=self._parse_table(schema=True), 2377 local=local, 2378 overwrite=overwrite, 2379 inpath=inpath, 2380 partition=self._parse_partition(), 2381 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2382 serde=self._match_text_seq("SERDE") and self._parse_string(), 2383 ) 2384 return self._parse_as_command(self._prev) 2385 2386 def _parse_delete(self) -> exp.Delete: 2387 # This handles MySQL's "Multiple-Table Syntax" 2388 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2389 tables = None 2390 comments = self._prev_comments 2391 if not self._match(TokenType.FROM, advance=False): 2392 tables = self._parse_csv(self._parse_table) or None 2393 2394 returning = self._parse_returning() 2395 2396 return self.expression( 2397 exp.Delete, 2398 comments=comments, 2399 tables=tables, 2400 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2401 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2402 where=self._parse_where(), 2403 returning=returning or self._parse_returning(), 2404 limit=self._parse_limit(), 2405 ) 2406 2407 def _parse_update(self) -> exp.Update: 2408 comments = self._prev_comments 2409 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2410 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2411 returning = self._parse_returning() 2412 return self.expression( 2413 exp.Update, 2414 comments=comments, 2415 **{ # type: ignore 2416 "this": this, 2417 "expressions": expressions, 2418 "from": self._parse_from(joins=True), 2419 "where": self._parse_where(), 2420 "returning": returning or self._parse_returning(), 2421 "order": self._parse_order(), 2422 "limit": self._parse_limit(), 2423 }, 2424 ) 2425 2426 def _parse_uncache(self) -> exp.Uncache: 2427 if not self._match(TokenType.TABLE): 2428 self.raise_error("Expecting TABLE after UNCACHE") 2429 2430 return self.expression( 2431 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2432 ) 2433 2434 def _parse_cache(self) -> exp.Cache: 2435 lazy = self._match_text_seq("LAZY") 2436 self._match(TokenType.TABLE) 2437 table = self._parse_table(schema=True) 2438 2439 options = [] 2440 if self._match_text_seq("OPTIONS"): 2441 self._match_l_paren() 2442 k = self._parse_string() 2443 self._match(TokenType.EQ) 2444 v = self._parse_string() 2445 options = [k, v] 2446 self._match_r_paren() 2447 2448 self._match(TokenType.ALIAS) 2449 return self.expression( 2450 exp.Cache, 2451 this=table, 2452 lazy=lazy, 2453 options=options, 2454 expression=self._parse_select(nested=True), 2455 ) 2456 2457 def _parse_partition(self) -> t.Optional[exp.Partition]: 2458 if not self._match(TokenType.PARTITION): 2459 return None 2460 2461 return self.expression( 2462 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2463 ) 2464 2465 def _parse_value(self) -> exp.Tuple: 2466 if self._match(TokenType.L_PAREN): 2467 expressions = self._parse_csv(self._parse_expression) 2468 self._match_r_paren() 2469 return self.expression(exp.Tuple, expressions=expressions) 2470 2471 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2472 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2473 2474 def _parse_projections(self) -> t.List[exp.Expression]: 2475 return self._parse_expressions() 2476 2477 def _parse_select( 2478 self, 2479 nested: bool = False, 2480 table: bool = False, 2481 parse_subquery_alias: bool = True, 2482 parse_set_operation: bool = True, 2483 ) -> t.Optional[exp.Expression]: 2484 cte = self._parse_with() 2485 2486 if cte: 2487 this = self._parse_statement() 2488 2489 if not this: 2490 self.raise_error("Failed to parse any statement following CTE") 2491 return cte 2492 2493 if "with" in this.arg_types: 2494 this.set("with", cte) 2495 else: 2496 self.raise_error(f"{this.key} does not support CTE") 2497 this = cte 2498 2499 return this 2500 2501 # duckdb supports leading with FROM x 2502 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2503 2504 if self._match(TokenType.SELECT): 2505 comments = self._prev_comments 2506 2507 hint = self._parse_hint() 2508 all_ = self._match(TokenType.ALL) 2509 distinct = self._match_set(self.DISTINCT_TOKENS) 2510 2511 kind = ( 2512 self._match(TokenType.ALIAS) 2513 and self._match_texts(("STRUCT", "VALUE")) 2514 and self._prev.text.upper() 2515 ) 2516 2517 if distinct: 2518 distinct = self.expression( 2519 exp.Distinct, 2520 on=self._parse_value() if self._match(TokenType.ON) else None, 2521 ) 2522 2523 if all_ and distinct: 2524 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2525 2526 limit = self._parse_limit(top=True) 2527 projections = self._parse_projections() 2528 2529 this = self.expression( 2530 exp.Select, 2531 kind=kind, 2532 hint=hint, 2533 distinct=distinct, 2534 expressions=projections, 2535 limit=limit, 2536 ) 2537 this.comments = comments 2538 2539 into = self._parse_into() 2540 if into: 2541 this.set("into", into) 2542 2543 if not from_: 2544 from_ = self._parse_from() 2545 2546 if from_: 2547 this.set("from", from_) 2548 2549 this = self._parse_query_modifiers(this) 2550 elif (table or nested) and self._match(TokenType.L_PAREN): 2551 if self._match(TokenType.PIVOT): 2552 this = self._parse_simplified_pivot() 2553 elif self._match(TokenType.FROM): 2554 this = exp.select("*").from_( 2555 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2556 ) 2557 else: 2558 this = ( 2559 self._parse_table() 2560 if table 2561 else self._parse_select(nested=True, parse_set_operation=False) 2562 ) 2563 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2564 2565 self._match_r_paren() 2566 2567 # We return early here so that the UNION isn't attached to the subquery by the 2568 # following call to _parse_set_operations, but instead becomes the parent node 2569 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2570 elif self._match(TokenType.VALUES, advance=False): 2571 this = self._parse_derived_table_values() 2572 elif from_: 2573 this = exp.select("*").from_(from_.this, copy=False) 2574 else: 2575 this = None 2576 2577 if parse_set_operation: 2578 return self._parse_set_operations(this) 2579 return this 2580 2581 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2582 if not skip_with_token and not self._match(TokenType.WITH): 2583 return None 2584 2585 comments = self._prev_comments 2586 recursive = self._match(TokenType.RECURSIVE) 2587 2588 expressions = [] 2589 while True: 2590 expressions.append(self._parse_cte()) 2591 2592 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2593 break 2594 else: 2595 self._match(TokenType.WITH) 2596 2597 return self.expression( 2598 exp.With, comments=comments, expressions=expressions, recursive=recursive 2599 ) 2600 2601 def _parse_cte(self) -> exp.CTE: 2602 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2603 if not alias or not alias.this: 2604 self.raise_error("Expected CTE to have alias") 2605 2606 self._match(TokenType.ALIAS) 2607 2608 if self._match_text_seq("NOT", "MATERIALIZED"): 2609 materialized = False 2610 elif self._match_text_seq("MATERIALIZED"): 2611 materialized = True 2612 else: 2613 materialized = None 2614 2615 return self.expression( 2616 exp.CTE, 2617 this=self._parse_wrapped(self._parse_statement), 2618 alias=alias, 2619 materialized=materialized, 2620 ) 2621 2622 def _parse_table_alias( 2623 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2624 ) -> t.Optional[exp.TableAlias]: 2625 any_token = self._match(TokenType.ALIAS) 2626 alias = ( 2627 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2628 or self._parse_string_as_identifier() 2629 ) 2630 2631 index = self._index 2632 if self._match(TokenType.L_PAREN): 2633 columns = self._parse_csv(self._parse_function_parameter) 2634 self._match_r_paren() if columns else self._retreat(index) 2635 else: 2636 columns = None 2637 2638 if not alias and not columns: 2639 return None 2640 2641 return self.expression(exp.TableAlias, this=alias, columns=columns) 2642 2643 def _parse_subquery( 2644 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2645 ) -> t.Optional[exp.Subquery]: 2646 if not this: 2647 return None 2648 2649 return self.expression( 2650 exp.Subquery, 2651 this=this, 2652 pivots=self._parse_pivots(), 2653 alias=self._parse_table_alias() if parse_alias else None, 2654 ) 2655 2656 def _implicit_unnests_to_explicit(self, this: E) -> E: 2657 from sqlglot.optimizer.normalize_identifiers import ( 2658 normalize_identifiers as _norm, 2659 ) 2660 2661 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2662 for i, join in enumerate(this.args.get("joins") or []): 2663 table = join.this 2664 normalized_table = table.copy() 2665 normalized_table.meta["maybe_column"] = True 2666 normalized_table = _norm(normalized_table, dialect=self.dialect) 2667 2668 if isinstance(table, exp.Table) and not join.args.get("on"): 2669 if normalized_table.parts[0].name in refs: 2670 table_as_column = table.to_column() 2671 unnest = exp.Unnest(expressions=[table_as_column]) 2672 2673 # Table.to_column creates a parent Alias node that we want to convert to 2674 # a TableAlias and attach to the Unnest, so it matches the parser's output 2675 if isinstance(table.args.get("alias"), exp.TableAlias): 2676 table_as_column.replace(table_as_column.this) 2677 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2678 2679 table.replace(unnest) 2680 2681 refs.add(normalized_table.alias_or_name) 2682 2683 return this 2684 2685 def _parse_query_modifiers( 2686 self, this: t.Optional[exp.Expression] 2687 ) -> t.Optional[exp.Expression]: 2688 if isinstance(this, (exp.Query, exp.Table)): 2689 for join in self._parse_joins(): 2690 this.append("joins", join) 2691 for lateral in iter(self._parse_lateral, None): 2692 this.append("laterals", lateral) 2693 2694 while True: 2695 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2696 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2697 key, expression = parser(self) 2698 2699 if expression: 2700 this.set(key, expression) 2701 if key == "limit": 2702 offset = expression.args.pop("offset", None) 2703 2704 if offset: 2705 offset = exp.Offset(expression=offset) 2706 this.set("offset", offset) 2707 2708 limit_by_expressions = expression.expressions 2709 expression.set("expressions", None) 2710 offset.set("expressions", limit_by_expressions) 2711 continue 2712 break 2713 2714 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2715 this = self._implicit_unnests_to_explicit(this) 2716 2717 return this 2718 2719 def _parse_hint(self) -> t.Optional[exp.Hint]: 2720 if self._match(TokenType.HINT): 2721 hints = [] 2722 for hint in iter( 2723 lambda: self._parse_csv( 2724 lambda: self._parse_function() or self._parse_var(upper=True) 2725 ), 2726 [], 2727 ): 2728 hints.extend(hint) 2729 2730 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2731 self.raise_error("Expected */ after HINT") 2732 2733 return self.expression(exp.Hint, expressions=hints) 2734 2735 return None 2736 2737 def _parse_into(self) -> t.Optional[exp.Into]: 2738 if not self._match(TokenType.INTO): 2739 return None 2740 2741 temp = self._match(TokenType.TEMPORARY) 2742 unlogged = self._match_text_seq("UNLOGGED") 2743 self._match(TokenType.TABLE) 2744 2745 return self.expression( 2746 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2747 ) 2748 2749 def _parse_from( 2750 self, joins: bool = False, skip_from_token: bool = False 2751 ) -> t.Optional[exp.From]: 2752 if not skip_from_token and not self._match(TokenType.FROM): 2753 return None 2754 2755 return self.expression( 2756 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2757 ) 2758 2759 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2760 return self.expression( 2761 exp.MatchRecognizeMeasure, 2762 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2763 this=self._parse_expression(), 2764 ) 2765 2766 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2767 if not self._match(TokenType.MATCH_RECOGNIZE): 2768 return None 2769 2770 self._match_l_paren() 2771 2772 partition = self._parse_partition_by() 2773 order = self._parse_order() 2774 2775 measures = ( 2776 self._parse_csv(self._parse_match_recognize_measure) 2777 if self._match_text_seq("MEASURES") 2778 else None 2779 ) 2780 2781 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2782 rows = exp.var("ONE ROW PER MATCH") 2783 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2784 text = "ALL ROWS PER MATCH" 2785 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2786 text += " SHOW EMPTY MATCHES" 2787 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2788 text += " OMIT EMPTY MATCHES" 2789 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2790 text += " WITH UNMATCHED ROWS" 2791 rows = exp.var(text) 2792 else: 2793 rows = None 2794 2795 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2796 text = "AFTER MATCH SKIP" 2797 if self._match_text_seq("PAST", "LAST", "ROW"): 2798 text += " PAST LAST ROW" 2799 elif self._match_text_seq("TO", "NEXT", "ROW"): 2800 text += " TO NEXT ROW" 2801 elif self._match_text_seq("TO", "FIRST"): 2802 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2803 elif self._match_text_seq("TO", "LAST"): 2804 text += f" TO LAST {self._advance_any().text}" # type: ignore 2805 after = exp.var(text) 2806 else: 2807 after = None 2808 2809 if self._match_text_seq("PATTERN"): 2810 self._match_l_paren() 2811 2812 if not self._curr: 2813 self.raise_error("Expecting )", self._curr) 2814 2815 paren = 1 2816 start = self._curr 2817 2818 while self._curr and paren > 0: 2819 if self._curr.token_type == TokenType.L_PAREN: 2820 paren += 1 2821 if self._curr.token_type == TokenType.R_PAREN: 2822 paren -= 1 2823 2824 end = self._prev 2825 self._advance() 2826 2827 if paren > 0: 2828 self.raise_error("Expecting )", self._curr) 2829 2830 pattern = exp.var(self._find_sql(start, end)) 2831 else: 2832 pattern = None 2833 2834 define = ( 2835 self._parse_csv(self._parse_name_as_expression) 2836 if self._match_text_seq("DEFINE") 2837 else None 2838 ) 2839 2840 self._match_r_paren() 2841 2842 return self.expression( 2843 exp.MatchRecognize, 2844 partition_by=partition, 2845 order=order, 2846 measures=measures, 2847 rows=rows, 2848 after=after, 2849 pattern=pattern, 2850 define=define, 2851 alias=self._parse_table_alias(), 2852 ) 2853 2854 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2855 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2856 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2857 cross_apply = False 2858 2859 if cross_apply is not None: 2860 this = self._parse_select(table=True) 2861 view = None 2862 outer = None 2863 elif self._match(TokenType.LATERAL): 2864 this = self._parse_select(table=True) 2865 view = self._match(TokenType.VIEW) 2866 outer = self._match(TokenType.OUTER) 2867 else: 2868 return None 2869 2870 if not this: 2871 this = ( 2872 self._parse_unnest() 2873 or self._parse_function() 2874 or self._parse_id_var(any_token=False) 2875 ) 2876 2877 while self._match(TokenType.DOT): 2878 this = exp.Dot( 2879 this=this, 2880 expression=self._parse_function() or self._parse_id_var(any_token=False), 2881 ) 2882 2883 if view: 2884 table = self._parse_id_var(any_token=False) 2885 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2886 table_alias: t.Optional[exp.TableAlias] = self.expression( 2887 exp.TableAlias, this=table, columns=columns 2888 ) 2889 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2890 # We move the alias from the lateral's child node to the lateral itself 2891 table_alias = this.args["alias"].pop() 2892 else: 2893 table_alias = self._parse_table_alias() 2894 2895 return self.expression( 2896 exp.Lateral, 2897 this=this, 2898 view=view, 2899 outer=outer, 2900 alias=table_alias, 2901 cross_apply=cross_apply, 2902 ) 2903 2904 def _parse_join_parts( 2905 self, 2906 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2907 return ( 2908 self._match_set(self.JOIN_METHODS) and self._prev, 2909 self._match_set(self.JOIN_SIDES) and self._prev, 2910 self._match_set(self.JOIN_KINDS) and self._prev, 2911 ) 2912 2913 def _parse_join( 2914 self, skip_join_token: bool = False, parse_bracket: bool = False 2915 ) -> t.Optional[exp.Join]: 2916 if self._match(TokenType.COMMA): 2917 return self.expression(exp.Join, this=self._parse_table()) 2918 2919 index = self._index 2920 method, side, kind = self._parse_join_parts() 2921 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2922 join = self._match(TokenType.JOIN) 2923 2924 if not skip_join_token and not join: 2925 self._retreat(index) 2926 kind = None 2927 method = None 2928 side = None 2929 2930 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2931 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2932 2933 if not skip_join_token and not join and not outer_apply and not cross_apply: 2934 return None 2935 2936 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2937 2938 if method: 2939 kwargs["method"] = method.text 2940 if side: 2941 kwargs["side"] = side.text 2942 if kind: 2943 kwargs["kind"] = kind.text 2944 if hint: 2945 kwargs["hint"] = hint 2946 2947 if self._match(TokenType.MATCH_CONDITION): 2948 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2949 2950 if self._match(TokenType.ON): 2951 kwargs["on"] = self._parse_conjunction() 2952 elif self._match(TokenType.USING): 2953 kwargs["using"] = self._parse_wrapped_id_vars() 2954 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2955 kind and kind.token_type == TokenType.CROSS 2956 ): 2957 index = self._index 2958 joins: t.Optional[list] = list(self._parse_joins()) 2959 2960 if joins and self._match(TokenType.ON): 2961 kwargs["on"] = self._parse_conjunction() 2962 elif joins and self._match(TokenType.USING): 2963 kwargs["using"] = self._parse_wrapped_id_vars() 2964 else: 2965 joins = None 2966 self._retreat(index) 2967 2968 kwargs["this"].set("joins", joins if joins else None) 2969 2970 comments = [c for token in (method, side, kind) if token for c in token.comments] 2971 return self.expression(exp.Join, comments=comments, **kwargs) 2972 2973 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2974 this = self._parse_conjunction() 2975 2976 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2977 return this 2978 2979 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2980 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2981 2982 return this 2983 2984 def _parse_index_params(self) -> exp.IndexParameters: 2985 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2986 2987 if self._match(TokenType.L_PAREN, advance=False): 2988 columns = self._parse_wrapped_csv(self._parse_with_operator) 2989 else: 2990 columns = None 2991 2992 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2993 partition_by = self._parse_partition_by() 2994 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2995 tablespace = ( 2996 self._parse_var(any_token=True) 2997 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2998 else None 2999 ) 3000 where = self._parse_where() 3001 3002 return self.expression( 3003 exp.IndexParameters, 3004 using=using, 3005 columns=columns, 3006 include=include, 3007 partition_by=partition_by, 3008 where=where, 3009 with_storage=with_storage, 3010 tablespace=tablespace, 3011 ) 3012 3013 def _parse_index( 3014 self, 3015 index: t.Optional[exp.Expression] = None, 3016 ) -> t.Optional[exp.Index]: 3017 if index: 3018 unique = None 3019 primary = None 3020 amp = None 3021 3022 self._match(TokenType.ON) 3023 self._match(TokenType.TABLE) # hive 3024 table = self._parse_table_parts(schema=True) 3025 else: 3026 unique = self._match(TokenType.UNIQUE) 3027 primary = self._match_text_seq("PRIMARY") 3028 amp = self._match_text_seq("AMP") 3029 3030 if not self._match(TokenType.INDEX): 3031 return None 3032 3033 index = self._parse_id_var() 3034 table = None 3035 3036 params = self._parse_index_params() 3037 3038 return self.expression( 3039 exp.Index, 3040 this=index, 3041 table=table, 3042 unique=unique, 3043 primary=primary, 3044 amp=amp, 3045 params=params, 3046 ) 3047 3048 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3049 hints: t.List[exp.Expression] = [] 3050 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3051 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3052 hints.append( 3053 self.expression( 3054 exp.WithTableHint, 3055 expressions=self._parse_csv( 3056 lambda: self._parse_function() or self._parse_var(any_token=True) 3057 ), 3058 ) 3059 ) 3060 self._match_r_paren() 3061 else: 3062 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3063 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3064 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3065 3066 self._match_texts(("INDEX", "KEY")) 3067 if self._match(TokenType.FOR): 3068 hint.set("target", self._advance_any() and self._prev.text.upper()) 3069 3070 hint.set("expressions", self._parse_wrapped_id_vars()) 3071 hints.append(hint) 3072 3073 return hints or None 3074 3075 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3076 return ( 3077 (not schema and self._parse_function(optional_parens=False)) 3078 or self._parse_id_var(any_token=False) 3079 or self._parse_string_as_identifier() 3080 or self._parse_placeholder() 3081 ) 3082 3083 def _parse_table_parts( 3084 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3085 ) -> exp.Table: 3086 catalog = None 3087 db = None 3088 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3089 3090 while self._match(TokenType.DOT): 3091 if catalog: 3092 # This allows nesting the table in arbitrarily many dot expressions if needed 3093 table = self.expression( 3094 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3095 ) 3096 else: 3097 catalog = db 3098 db = table 3099 # "" used for tsql FROM a..b case 3100 table = self._parse_table_part(schema=schema) or "" 3101 3102 if ( 3103 wildcard 3104 and self._is_connected() 3105 and (isinstance(table, exp.Identifier) or not table) 3106 and self._match(TokenType.STAR) 3107 ): 3108 if isinstance(table, exp.Identifier): 3109 table.args["this"] += "*" 3110 else: 3111 table = exp.Identifier(this="*") 3112 3113 if is_db_reference: 3114 catalog = db 3115 db = table 3116 table = None 3117 3118 if not table and not is_db_reference: 3119 self.raise_error(f"Expected table name but got {self._curr}") 3120 if not db and is_db_reference: 3121 self.raise_error(f"Expected database name but got {self._curr}") 3122 3123 return self.expression( 3124 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3125 ) 3126 3127 def _parse_table( 3128 self, 3129 schema: bool = False, 3130 joins: bool = False, 3131 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3132 parse_bracket: bool = False, 3133 is_db_reference: bool = False, 3134 parse_partition: bool = False, 3135 ) -> t.Optional[exp.Expression]: 3136 lateral = self._parse_lateral() 3137 if lateral: 3138 return lateral 3139 3140 unnest = self._parse_unnest() 3141 if unnest: 3142 return unnest 3143 3144 values = self._parse_derived_table_values() 3145 if values: 3146 return values 3147 3148 subquery = self._parse_select(table=True) 3149 if subquery: 3150 if not subquery.args.get("pivots"): 3151 subquery.set("pivots", self._parse_pivots()) 3152 return subquery 3153 3154 bracket = parse_bracket and self._parse_bracket(None) 3155 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3156 3157 only = self._match(TokenType.ONLY) 3158 3159 this = t.cast( 3160 exp.Expression, 3161 bracket 3162 or self._parse_bracket( 3163 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3164 ), 3165 ) 3166 3167 if only: 3168 this.set("only", only) 3169 3170 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3171 self._match_text_seq("*") 3172 3173 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3174 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3175 this.set("partition", self._parse_partition()) 3176 3177 if schema: 3178 return self._parse_schema(this=this) 3179 3180 version = self._parse_version() 3181 3182 if version: 3183 this.set("version", version) 3184 3185 if self.dialect.ALIAS_POST_TABLESAMPLE: 3186 table_sample = self._parse_table_sample() 3187 3188 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3189 if alias: 3190 this.set("alias", alias) 3191 3192 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3193 return self.expression( 3194 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3195 ) 3196 3197 this.set("hints", self._parse_table_hints()) 3198 3199 if not this.args.get("pivots"): 3200 this.set("pivots", self._parse_pivots()) 3201 3202 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3203 table_sample = self._parse_table_sample() 3204 3205 if table_sample: 3206 table_sample.set("this", this) 3207 this = table_sample 3208 3209 if joins: 3210 for join in self._parse_joins(): 3211 this.append("joins", join) 3212 3213 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3214 this.set("ordinality", True) 3215 this.set("alias", self._parse_table_alias()) 3216 3217 return this 3218 3219 def _parse_version(self) -> t.Optional[exp.Version]: 3220 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3221 this = "TIMESTAMP" 3222 elif self._match(TokenType.VERSION_SNAPSHOT): 3223 this = "VERSION" 3224 else: 3225 return None 3226 3227 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3228 kind = self._prev.text.upper() 3229 start = self._parse_bitwise() 3230 self._match_texts(("TO", "AND")) 3231 end = self._parse_bitwise() 3232 expression: t.Optional[exp.Expression] = self.expression( 3233 exp.Tuple, expressions=[start, end] 3234 ) 3235 elif self._match_text_seq("CONTAINED", "IN"): 3236 kind = "CONTAINED IN" 3237 expression = self.expression( 3238 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3239 ) 3240 elif self._match(TokenType.ALL): 3241 kind = "ALL" 3242 expression = None 3243 else: 3244 self._match_text_seq("AS", "OF") 3245 kind = "AS OF" 3246 expression = self._parse_type() 3247 3248 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3249 3250 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3251 if not self._match(TokenType.UNNEST): 3252 return None 3253 3254 expressions = self._parse_wrapped_csv(self._parse_equality) 3255 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3256 3257 alias = self._parse_table_alias() if with_alias else None 3258 3259 if alias: 3260 if self.dialect.UNNEST_COLUMN_ONLY: 3261 if alias.args.get("columns"): 3262 self.raise_error("Unexpected extra column alias in unnest.") 3263 3264 alias.set("columns", [alias.this]) 3265 alias.set("this", None) 3266 3267 columns = alias.args.get("columns") or [] 3268 if offset and len(expressions) < len(columns): 3269 offset = columns.pop() 3270 3271 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3272 self._match(TokenType.ALIAS) 3273 offset = self._parse_id_var( 3274 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3275 ) or exp.to_identifier("offset") 3276 3277 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3278 3279 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3280 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3281 if not is_derived and not self._match_text_seq("VALUES"): 3282 return None 3283 3284 expressions = self._parse_csv(self._parse_value) 3285 alias = self._parse_table_alias() 3286 3287 if is_derived: 3288 self._match_r_paren() 3289 3290 return self.expression( 3291 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3292 ) 3293 3294 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3295 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3296 as_modifier and self._match_text_seq("USING", "SAMPLE") 3297 ): 3298 return None 3299 3300 bucket_numerator = None 3301 bucket_denominator = None 3302 bucket_field = None 3303 percent = None 3304 size = None 3305 seed = None 3306 3307 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3308 matched_l_paren = self._match(TokenType.L_PAREN) 3309 3310 if self.TABLESAMPLE_CSV: 3311 num = None 3312 expressions = self._parse_csv(self._parse_primary) 3313 else: 3314 expressions = None 3315 num = ( 3316 self._parse_factor() 3317 if self._match(TokenType.NUMBER, advance=False) 3318 else self._parse_primary() or self._parse_placeholder() 3319 ) 3320 3321 if self._match_text_seq("BUCKET"): 3322 bucket_numerator = self._parse_number() 3323 self._match_text_seq("OUT", "OF") 3324 bucket_denominator = bucket_denominator = self._parse_number() 3325 self._match(TokenType.ON) 3326 bucket_field = self._parse_field() 3327 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3328 percent = num 3329 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3330 size = num 3331 else: 3332 percent = num 3333 3334 if matched_l_paren: 3335 self._match_r_paren() 3336 3337 if self._match(TokenType.L_PAREN): 3338 method = self._parse_var(upper=True) 3339 seed = self._match(TokenType.COMMA) and self._parse_number() 3340 self._match_r_paren() 3341 elif self._match_texts(("SEED", "REPEATABLE")): 3342 seed = self._parse_wrapped(self._parse_number) 3343 3344 return self.expression( 3345 exp.TableSample, 3346 expressions=expressions, 3347 method=method, 3348 bucket_numerator=bucket_numerator, 3349 bucket_denominator=bucket_denominator, 3350 bucket_field=bucket_field, 3351 percent=percent, 3352 size=size, 3353 seed=seed, 3354 ) 3355 3356 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3357 return list(iter(self._parse_pivot, None)) or None 3358 3359 def _parse_joins(self) -> t.Iterator[exp.Join]: 3360 return iter(self._parse_join, None) 3361 3362 # https://duckdb.org/docs/sql/statements/pivot 3363 def _parse_simplified_pivot(self) -> exp.Pivot: 3364 def _parse_on() -> t.Optional[exp.Expression]: 3365 this = self._parse_bitwise() 3366 return self._parse_in(this) if self._match(TokenType.IN) else this 3367 3368 this = self._parse_table() 3369 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3370 using = self._match(TokenType.USING) and self._parse_csv( 3371 lambda: self._parse_alias(self._parse_function()) 3372 ) 3373 group = self._parse_group() 3374 return self.expression( 3375 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3376 ) 3377 3378 def _parse_pivot_in(self) -> exp.In: 3379 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3380 this = self._parse_conjunction() 3381 3382 self._match(TokenType.ALIAS) 3383 alias = self._parse_field() 3384 if alias: 3385 return self.expression(exp.PivotAlias, this=this, alias=alias) 3386 3387 return this 3388 3389 value = self._parse_column() 3390 3391 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3392 self.raise_error("Expecting IN (") 3393 3394 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3395 3396 self._match_r_paren() 3397 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3398 3399 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3400 index = self._index 3401 include_nulls = None 3402 3403 if self._match(TokenType.PIVOT): 3404 unpivot = False 3405 elif self._match(TokenType.UNPIVOT): 3406 unpivot = True 3407 3408 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3409 if self._match_text_seq("INCLUDE", "NULLS"): 3410 include_nulls = True 3411 elif self._match_text_seq("EXCLUDE", "NULLS"): 3412 include_nulls = False 3413 else: 3414 return None 3415 3416 expressions = [] 3417 3418 if not self._match(TokenType.L_PAREN): 3419 self._retreat(index) 3420 return None 3421 3422 if unpivot: 3423 expressions = self._parse_csv(self._parse_column) 3424 else: 3425 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3426 3427 if not expressions: 3428 self.raise_error("Failed to parse PIVOT's aggregation list") 3429 3430 if not self._match(TokenType.FOR): 3431 self.raise_error("Expecting FOR") 3432 3433 field = self._parse_pivot_in() 3434 3435 self._match_r_paren() 3436 3437 pivot = self.expression( 3438 exp.Pivot, 3439 expressions=expressions, 3440 field=field, 3441 unpivot=unpivot, 3442 include_nulls=include_nulls, 3443 ) 3444 3445 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3446 pivot.set("alias", self._parse_table_alias()) 3447 3448 if not unpivot: 3449 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3450 3451 columns: t.List[exp.Expression] = [] 3452 for fld in pivot.args["field"].expressions: 3453 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3454 for name in names: 3455 if self.PREFIXED_PIVOT_COLUMNS: 3456 name = f"{name}_{field_name}" if name else field_name 3457 else: 3458 name = f"{field_name}_{name}" if name else field_name 3459 3460 columns.append(exp.to_identifier(name)) 3461 3462 pivot.set("columns", columns) 3463 3464 return pivot 3465 3466 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3467 return [agg.alias for agg in aggregations] 3468 3469 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3470 if not skip_where_token and not self._match(TokenType.PREWHERE): 3471 return None 3472 3473 return self.expression( 3474 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3475 ) 3476 3477 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3478 if not skip_where_token and not self._match(TokenType.WHERE): 3479 return None 3480 3481 return self.expression( 3482 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3483 ) 3484 3485 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3486 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3487 return None 3488 3489 elements: t.Dict[str, t.Any] = defaultdict(list) 3490 3491 if self._match(TokenType.ALL): 3492 elements["all"] = True 3493 elif self._match(TokenType.DISTINCT): 3494 elements["all"] = False 3495 3496 while True: 3497 expressions = self._parse_csv(self._parse_conjunction) 3498 if expressions: 3499 elements["expressions"].extend(expressions) 3500 3501 grouping_sets = self._parse_grouping_sets() 3502 if grouping_sets: 3503 elements["grouping_sets"].extend(grouping_sets) 3504 3505 rollup = None 3506 cube = None 3507 totals = None 3508 3509 index = self._index 3510 with_ = self._match(TokenType.WITH) 3511 if self._match(TokenType.ROLLUP): 3512 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3513 elements["rollup"].extend(ensure_list(rollup)) 3514 3515 if self._match(TokenType.CUBE): 3516 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3517 elements["cube"].extend(ensure_list(cube)) 3518 3519 if self._match_text_seq("TOTALS"): 3520 totals = True 3521 elements["totals"] = True # type: ignore 3522 3523 if not (grouping_sets or rollup or cube or totals): 3524 if with_: 3525 self._retreat(index) 3526 break 3527 3528 return self.expression(exp.Group, **elements) # type: ignore 3529 3530 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3531 if not self._match(TokenType.GROUPING_SETS): 3532 return None 3533 3534 return self._parse_wrapped_csv(self._parse_grouping_set) 3535 3536 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3537 if self._match(TokenType.L_PAREN): 3538 grouping_set = self._parse_csv(self._parse_column) 3539 self._match_r_paren() 3540 return self.expression(exp.Tuple, expressions=grouping_set) 3541 3542 return self._parse_column() 3543 3544 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3545 if not skip_having_token and not self._match(TokenType.HAVING): 3546 return None 3547 return self.expression(exp.Having, this=self._parse_conjunction()) 3548 3549 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3550 if not self._match(TokenType.QUALIFY): 3551 return None 3552 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3553 3554 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3555 if skip_start_token: 3556 start = None 3557 elif self._match(TokenType.START_WITH): 3558 start = self._parse_conjunction() 3559 else: 3560 return None 3561 3562 self._match(TokenType.CONNECT_BY) 3563 nocycle = self._match_text_seq("NOCYCLE") 3564 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3565 exp.Prior, this=self._parse_bitwise() 3566 ) 3567 connect = self._parse_conjunction() 3568 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3569 3570 if not start and self._match(TokenType.START_WITH): 3571 start = self._parse_conjunction() 3572 3573 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3574 3575 def _parse_name_as_expression(self) -> exp.Alias: 3576 return self.expression( 3577 exp.Alias, 3578 alias=self._parse_id_var(any_token=True), 3579 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3580 ) 3581 3582 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3583 if self._match_text_seq("INTERPOLATE"): 3584 return self._parse_wrapped_csv(self._parse_name_as_expression) 3585 return None 3586 3587 def _parse_order( 3588 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3589 ) -> t.Optional[exp.Expression]: 3590 siblings = None 3591 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3592 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3593 return this 3594 3595 siblings = True 3596 3597 return self.expression( 3598 exp.Order, 3599 this=this, 3600 expressions=self._parse_csv(self._parse_ordered), 3601 interpolate=self._parse_interpolate(), 3602 siblings=siblings, 3603 ) 3604 3605 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3606 if not self._match(token): 3607 return None 3608 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3609 3610 def _parse_ordered( 3611 self, parse_method: t.Optional[t.Callable] = None 3612 ) -> t.Optional[exp.Ordered]: 3613 this = parse_method() if parse_method else self._parse_conjunction() 3614 if not this: 3615 return None 3616 3617 asc = self._match(TokenType.ASC) 3618 desc = self._match(TokenType.DESC) or (asc and False) 3619 3620 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3621 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3622 3623 nulls_first = is_nulls_first or False 3624 explicitly_null_ordered = is_nulls_first or is_nulls_last 3625 3626 if ( 3627 not explicitly_null_ordered 3628 and ( 3629 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3630 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3631 ) 3632 and self.dialect.NULL_ORDERING != "nulls_are_last" 3633 ): 3634 nulls_first = True 3635 3636 if self._match_text_seq("WITH", "FILL"): 3637 with_fill = self.expression( 3638 exp.WithFill, 3639 **{ # type: ignore 3640 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3641 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3642 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3643 }, 3644 ) 3645 else: 3646 with_fill = None 3647 3648 return self.expression( 3649 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3650 ) 3651 3652 def _parse_limit( 3653 self, 3654 this: t.Optional[exp.Expression] = None, 3655 top: bool = False, 3656 skip_limit_token: bool = False, 3657 ) -> t.Optional[exp.Expression]: 3658 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3659 comments = self._prev_comments 3660 if top: 3661 limit_paren = self._match(TokenType.L_PAREN) 3662 expression = self._parse_term() if limit_paren else self._parse_number() 3663 3664 if limit_paren: 3665 self._match_r_paren() 3666 else: 3667 expression = self._parse_term() 3668 3669 if self._match(TokenType.COMMA): 3670 offset = expression 3671 expression = self._parse_term() 3672 else: 3673 offset = None 3674 3675 limit_exp = self.expression( 3676 exp.Limit, 3677 this=this, 3678 expression=expression, 3679 offset=offset, 3680 comments=comments, 3681 expressions=self._parse_limit_by(), 3682 ) 3683 3684 return limit_exp 3685 3686 if self._match(TokenType.FETCH): 3687 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3688 direction = self._prev.text.upper() if direction else "FIRST" 3689 3690 count = self._parse_field(tokens=self.FETCH_TOKENS) 3691 percent = self._match(TokenType.PERCENT) 3692 3693 self._match_set((TokenType.ROW, TokenType.ROWS)) 3694 3695 only = self._match_text_seq("ONLY") 3696 with_ties = self._match_text_seq("WITH", "TIES") 3697 3698 if only and with_ties: 3699 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3700 3701 return self.expression( 3702 exp.Fetch, 3703 direction=direction, 3704 count=count, 3705 percent=percent, 3706 with_ties=with_ties, 3707 ) 3708 3709 return this 3710 3711 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3712 if not self._match(TokenType.OFFSET): 3713 return this 3714 3715 count = self._parse_term() 3716 self._match_set((TokenType.ROW, TokenType.ROWS)) 3717 3718 return self.expression( 3719 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3720 ) 3721 3722 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3723 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3724 3725 def _parse_locks(self) -> t.List[exp.Lock]: 3726 locks = [] 3727 while True: 3728 if self._match_text_seq("FOR", "UPDATE"): 3729 update = True 3730 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3731 "LOCK", "IN", "SHARE", "MODE" 3732 ): 3733 update = False 3734 else: 3735 break 3736 3737 expressions = None 3738 if self._match_text_seq("OF"): 3739 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3740 3741 wait: t.Optional[bool | exp.Expression] = None 3742 if self._match_text_seq("NOWAIT"): 3743 wait = True 3744 elif self._match_text_seq("WAIT"): 3745 wait = self._parse_primary() 3746 elif self._match_text_seq("SKIP", "LOCKED"): 3747 wait = False 3748 3749 locks.append( 3750 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3751 ) 3752 3753 return locks 3754 3755 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3756 while this and self._match_set(self.SET_OPERATIONS): 3757 token_type = self._prev.token_type 3758 3759 if token_type == TokenType.UNION: 3760 operation = exp.Union 3761 elif token_type == TokenType.EXCEPT: 3762 operation = exp.Except 3763 else: 3764 operation = exp.Intersect 3765 3766 comments = self._prev.comments 3767 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3768 by_name = self._match_text_seq("BY", "NAME") 3769 expression = self._parse_select(nested=True, parse_set_operation=False) 3770 3771 this = self.expression( 3772 operation, 3773 comments=comments, 3774 this=this, 3775 distinct=distinct, 3776 by_name=by_name, 3777 expression=expression, 3778 ) 3779 3780 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3781 expression = this.expression 3782 3783 if expression: 3784 for arg in self.UNION_MODIFIERS: 3785 expr = expression.args.get(arg) 3786 if expr: 3787 this.set(arg, expr.pop()) 3788 3789 return this 3790 3791 def _parse_expression(self) -> t.Optional[exp.Expression]: 3792 return self._parse_alias(self._parse_conjunction()) 3793 3794 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3795 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3796 3797 def _parse_equality(self) -> t.Optional[exp.Expression]: 3798 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3799 3800 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3801 return self._parse_tokens(self._parse_range, self.COMPARISON) 3802 3803 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3804 this = this or self._parse_bitwise() 3805 negate = self._match(TokenType.NOT) 3806 3807 if self._match_set(self.RANGE_PARSERS): 3808 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3809 if not expression: 3810 return this 3811 3812 this = expression 3813 elif self._match(TokenType.ISNULL): 3814 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3815 3816 # Postgres supports ISNULL and NOTNULL for conditions. 3817 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3818 if self._match(TokenType.NOTNULL): 3819 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3820 this = self.expression(exp.Not, this=this) 3821 3822 if negate: 3823 this = self.expression(exp.Not, this=this) 3824 3825 if self._match(TokenType.IS): 3826 this = self._parse_is(this) 3827 3828 return this 3829 3830 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3831 index = self._index - 1 3832 negate = self._match(TokenType.NOT) 3833 3834 if self._match_text_seq("DISTINCT", "FROM"): 3835 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3836 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3837 3838 expression = self._parse_null() or self._parse_boolean() 3839 if not expression: 3840 self._retreat(index) 3841 return None 3842 3843 this = self.expression(exp.Is, this=this, expression=expression) 3844 return self.expression(exp.Not, this=this) if negate else this 3845 3846 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3847 unnest = self._parse_unnest(with_alias=False) 3848 if unnest: 3849 this = self.expression(exp.In, this=this, unnest=unnest) 3850 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3851 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3852 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3853 3854 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3855 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3856 else: 3857 this = self.expression(exp.In, this=this, expressions=expressions) 3858 3859 if matched_l_paren: 3860 self._match_r_paren(this) 3861 elif not self._match(TokenType.R_BRACKET, expression=this): 3862 self.raise_error("Expecting ]") 3863 else: 3864 this = self.expression(exp.In, this=this, field=self._parse_field()) 3865 3866 return this 3867 3868 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3869 low = self._parse_bitwise() 3870 self._match(TokenType.AND) 3871 high = self._parse_bitwise() 3872 return self.expression(exp.Between, this=this, low=low, high=high) 3873 3874 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3875 if not self._match(TokenType.ESCAPE): 3876 return this 3877 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3878 3879 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3880 index = self._index 3881 3882 if not self._match(TokenType.INTERVAL) and match_interval: 3883 return None 3884 3885 if self._match(TokenType.STRING, advance=False): 3886 this = self._parse_primary() 3887 else: 3888 this = self._parse_term() 3889 3890 if not this or ( 3891 isinstance(this, exp.Column) 3892 and not this.table 3893 and not this.this.quoted 3894 and this.name.upper() == "IS" 3895 ): 3896 self._retreat(index) 3897 return None 3898 3899 unit = self._parse_function() or ( 3900 not self._match(TokenType.ALIAS, advance=False) 3901 and self._parse_var(any_token=True, upper=True) 3902 ) 3903 3904 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3905 # each INTERVAL expression into this canonical form so it's easy to transpile 3906 if this and this.is_number: 3907 this = exp.Literal.string(this.name) 3908 elif this and this.is_string: 3909 parts = this.name.split() 3910 3911 if len(parts) == 2: 3912 if unit: 3913 # This is not actually a unit, it's something else (e.g. a "window side") 3914 unit = None 3915 self._retreat(self._index - 1) 3916 3917 this = exp.Literal.string(parts[0]) 3918 unit = self.expression(exp.Var, this=parts[1].upper()) 3919 3920 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3921 unit = self.expression( 3922 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3923 ) 3924 3925 return self.expression(exp.Interval, this=this, unit=unit) 3926 3927 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3928 this = self._parse_term() 3929 3930 while True: 3931 if self._match_set(self.BITWISE): 3932 this = self.expression( 3933 self.BITWISE[self._prev.token_type], 3934 this=this, 3935 expression=self._parse_term(), 3936 ) 3937 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3938 this = self.expression( 3939 exp.DPipe, 3940 this=this, 3941 expression=self._parse_term(), 3942 safe=not self.dialect.STRICT_STRING_CONCAT, 3943 ) 3944 elif self._match(TokenType.DQMARK): 3945 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3946 elif self._match_pair(TokenType.LT, TokenType.LT): 3947 this = self.expression( 3948 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3949 ) 3950 elif self._match_pair(TokenType.GT, TokenType.GT): 3951 this = self.expression( 3952 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3953 ) 3954 else: 3955 break 3956 3957 return this 3958 3959 def _parse_term(self) -> t.Optional[exp.Expression]: 3960 return self._parse_tokens(self._parse_factor, self.TERM) 3961 3962 def _parse_factor(self) -> t.Optional[exp.Expression]: 3963 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3964 this = parse_method() 3965 3966 while self._match_set(self.FACTOR): 3967 this = self.expression( 3968 self.FACTOR[self._prev.token_type], 3969 this=this, 3970 comments=self._prev_comments, 3971 expression=parse_method(), 3972 ) 3973 if isinstance(this, exp.Div): 3974 this.args["typed"] = self.dialect.TYPED_DIVISION 3975 this.args["safe"] = self.dialect.SAFE_DIVISION 3976 3977 return this 3978 3979 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3980 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3981 3982 def _parse_unary(self) -> t.Optional[exp.Expression]: 3983 if self._match_set(self.UNARY_PARSERS): 3984 return self.UNARY_PARSERS[self._prev.token_type](self) 3985 return self._parse_at_time_zone(self._parse_type()) 3986 3987 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3988 interval = parse_interval and self._parse_interval() 3989 if interval: 3990 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3991 while True: 3992 index = self._index 3993 self._match(TokenType.PLUS) 3994 3995 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3996 self._retreat(index) 3997 break 3998 3999 interval = self.expression( # type: ignore 4000 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4001 ) 4002 4003 return interval 4004 4005 index = self._index 4006 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4007 this = self._parse_column() 4008 4009 if data_type: 4010 if isinstance(this, exp.Literal): 4011 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4012 if parser: 4013 return parser(self, this, data_type) 4014 return self.expression(exp.Cast, this=this, to=data_type) 4015 if not data_type.expressions: 4016 self._retreat(index) 4017 return self._parse_column() 4018 return self._parse_column_ops(data_type) 4019 4020 return this and self._parse_column_ops(this) 4021 4022 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4023 this = self._parse_type() 4024 if not this: 4025 return None 4026 4027 if isinstance(this, exp.Column) and not this.table: 4028 this = exp.var(this.name.upper()) 4029 4030 return self.expression( 4031 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4032 ) 4033 4034 def _parse_types( 4035 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4036 ) -> t.Optional[exp.Expression]: 4037 index = self._index 4038 4039 prefix = self._match_text_seq("SYSUDTLIB", ".") 4040 4041 if not self._match_set(self.TYPE_TOKENS): 4042 identifier = allow_identifiers and self._parse_id_var( 4043 any_token=False, tokens=(TokenType.VAR,) 4044 ) 4045 if identifier: 4046 tokens = self.dialect.tokenize(identifier.name) 4047 4048 if len(tokens) != 1: 4049 self.raise_error("Unexpected identifier", self._prev) 4050 4051 if tokens[0].token_type in self.TYPE_TOKENS: 4052 self._prev = tokens[0] 4053 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4054 type_name = identifier.name 4055 4056 while self._match(TokenType.DOT): 4057 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4058 4059 return exp.DataType.build(type_name, udt=True) 4060 else: 4061 self._retreat(self._index - 1) 4062 return None 4063 else: 4064 return None 4065 4066 type_token = self._prev.token_type 4067 4068 if type_token == TokenType.PSEUDO_TYPE: 4069 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4070 4071 if type_token == TokenType.OBJECT_IDENTIFIER: 4072 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4073 4074 nested = type_token in self.NESTED_TYPE_TOKENS 4075 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4076 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4077 expressions = None 4078 maybe_func = False 4079 4080 if self._match(TokenType.L_PAREN): 4081 if is_struct: 4082 expressions = self._parse_csv(self._parse_struct_types) 4083 elif nested: 4084 expressions = self._parse_csv( 4085 lambda: self._parse_types( 4086 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4087 ) 4088 ) 4089 elif type_token in self.ENUM_TYPE_TOKENS: 4090 expressions = self._parse_csv(self._parse_equality) 4091 elif is_aggregate: 4092 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4093 any_token=False, tokens=(TokenType.VAR,) 4094 ) 4095 if not func_or_ident or not self._match(TokenType.COMMA): 4096 return None 4097 expressions = self._parse_csv( 4098 lambda: self._parse_types( 4099 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4100 ) 4101 ) 4102 expressions.insert(0, func_or_ident) 4103 else: 4104 expressions = self._parse_csv(self._parse_type_size) 4105 4106 if not expressions or not self._match(TokenType.R_PAREN): 4107 self._retreat(index) 4108 return None 4109 4110 maybe_func = True 4111 4112 this: t.Optional[exp.Expression] = None 4113 values: t.Optional[t.List[exp.Expression]] = None 4114 4115 if nested and self._match(TokenType.LT): 4116 if is_struct: 4117 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4118 else: 4119 expressions = self._parse_csv( 4120 lambda: self._parse_types( 4121 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4122 ) 4123 ) 4124 4125 if not self._match(TokenType.GT): 4126 self.raise_error("Expecting >") 4127 4128 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4129 values = self._parse_csv(self._parse_conjunction) 4130 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4131 4132 if type_token in self.TIMESTAMPS: 4133 if self._match_text_seq("WITH", "TIME", "ZONE"): 4134 maybe_func = False 4135 tz_type = ( 4136 exp.DataType.Type.TIMETZ 4137 if type_token in self.TIMES 4138 else exp.DataType.Type.TIMESTAMPTZ 4139 ) 4140 this = exp.DataType(this=tz_type, expressions=expressions) 4141 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4142 maybe_func = False 4143 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4144 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4145 maybe_func = False 4146 elif type_token == TokenType.INTERVAL: 4147 unit = self._parse_var(upper=True) 4148 if unit: 4149 if self._match_text_seq("TO"): 4150 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4151 4152 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4153 else: 4154 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4155 4156 if maybe_func and check_func: 4157 index2 = self._index 4158 peek = self._parse_string() 4159 4160 if not peek: 4161 self._retreat(index) 4162 return None 4163 4164 self._retreat(index2) 4165 4166 if not this: 4167 if self._match_text_seq("UNSIGNED"): 4168 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4169 if not unsigned_type_token: 4170 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4171 4172 type_token = unsigned_type_token or type_token 4173 4174 this = exp.DataType( 4175 this=exp.DataType.Type[type_token.value], 4176 expressions=expressions, 4177 nested=nested, 4178 values=values, 4179 prefix=prefix, 4180 ) 4181 4182 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4183 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4184 4185 return this 4186 4187 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4188 index = self._index 4189 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4190 self._match(TokenType.COLON) 4191 column_def = self._parse_column_def(this) 4192 4193 if type_required and ( 4194 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4195 ): 4196 self._retreat(index) 4197 return self._parse_types() 4198 4199 return column_def 4200 4201 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4202 if not self._match_text_seq("AT", "TIME", "ZONE"): 4203 return this 4204 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4205 4206 def _parse_column(self) -> t.Optional[exp.Expression]: 4207 this = self._parse_column_reference() 4208 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4209 4210 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4211 this = self._parse_field() 4212 if ( 4213 not this 4214 and self._match(TokenType.VALUES, advance=False) 4215 and self.VALUES_FOLLOWED_BY_PAREN 4216 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4217 ): 4218 this = self._parse_id_var() 4219 4220 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4221 4222 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4223 this = self._parse_bracket(this) 4224 4225 while self._match_set(self.COLUMN_OPERATORS): 4226 op_token = self._prev.token_type 4227 op = self.COLUMN_OPERATORS.get(op_token) 4228 4229 if op_token == TokenType.DCOLON: 4230 field = self._parse_types() 4231 if not field: 4232 self.raise_error("Expected type") 4233 elif op and self._curr: 4234 field = self._parse_column_reference() 4235 else: 4236 field = self._parse_field(any_token=True, anonymous_func=True) 4237 4238 if isinstance(field, exp.Func) and this: 4239 # bigquery allows function calls like x.y.count(...) 4240 # SAFE.SUBSTR(...) 4241 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4242 this = exp.replace_tree( 4243 this, 4244 lambda n: ( 4245 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4246 if n.table 4247 else n.this 4248 ) 4249 if isinstance(n, exp.Column) 4250 else n, 4251 ) 4252 4253 if op: 4254 this = op(self, this, field) 4255 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4256 this = self.expression( 4257 exp.Column, 4258 this=field, 4259 table=this.this, 4260 db=this.args.get("table"), 4261 catalog=this.args.get("db"), 4262 ) 4263 else: 4264 this = self.expression(exp.Dot, this=this, expression=field) 4265 this = self._parse_bracket(this) 4266 return this 4267 4268 def _parse_primary(self) -> t.Optional[exp.Expression]: 4269 if self._match_set(self.PRIMARY_PARSERS): 4270 token_type = self._prev.token_type 4271 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4272 4273 if token_type == TokenType.STRING: 4274 expressions = [primary] 4275 while self._match(TokenType.STRING): 4276 expressions.append(exp.Literal.string(self._prev.text)) 4277 4278 if len(expressions) > 1: 4279 return self.expression(exp.Concat, expressions=expressions) 4280 4281 return primary 4282 4283 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4284 return exp.Literal.number(f"0.{self._prev.text}") 4285 4286 if self._match(TokenType.L_PAREN): 4287 comments = self._prev_comments 4288 query = self._parse_select() 4289 4290 if query: 4291 expressions = [query] 4292 else: 4293 expressions = self._parse_expressions() 4294 4295 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4296 4297 if isinstance(this, exp.UNWRAPPED_QUERIES): 4298 this = self._parse_set_operations( 4299 self._parse_subquery(this=this, parse_alias=False) 4300 ) 4301 elif isinstance(this, exp.Subquery): 4302 this = self._parse_subquery( 4303 this=self._parse_set_operations(this), parse_alias=False 4304 ) 4305 elif len(expressions) > 1: 4306 this = self.expression(exp.Tuple, expressions=expressions) 4307 else: 4308 this = self.expression(exp.Paren, this=this) 4309 4310 if this: 4311 this.add_comments(comments) 4312 4313 self._match_r_paren(expression=this) 4314 return this 4315 4316 return None 4317 4318 def _parse_field( 4319 self, 4320 any_token: bool = False, 4321 tokens: t.Optional[t.Collection[TokenType]] = None, 4322 anonymous_func: bool = False, 4323 ) -> t.Optional[exp.Expression]: 4324 if anonymous_func: 4325 field = ( 4326 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4327 or self._parse_primary() 4328 ) 4329 else: 4330 field = self._parse_primary() or self._parse_function( 4331 anonymous=anonymous_func, any_token=any_token 4332 ) 4333 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4334 4335 def _parse_function( 4336 self, 4337 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4338 anonymous: bool = False, 4339 optional_parens: bool = True, 4340 any_token: bool = False, 4341 ) -> t.Optional[exp.Expression]: 4342 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4343 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4344 fn_syntax = False 4345 if ( 4346 self._match(TokenType.L_BRACE, advance=False) 4347 and self._next 4348 and self._next.text.upper() == "FN" 4349 ): 4350 self._advance(2) 4351 fn_syntax = True 4352 4353 func = self._parse_function_call( 4354 functions=functions, 4355 anonymous=anonymous, 4356 optional_parens=optional_parens, 4357 any_token=any_token, 4358 ) 4359 4360 if fn_syntax: 4361 self._match(TokenType.R_BRACE) 4362 4363 return func 4364 4365 def _parse_function_call( 4366 self, 4367 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4368 anonymous: bool = False, 4369 optional_parens: bool = True, 4370 any_token: bool = False, 4371 ) -> t.Optional[exp.Expression]: 4372 if not self._curr: 4373 return None 4374 4375 comments = self._curr.comments 4376 token_type = self._curr.token_type 4377 this = self._curr.text 4378 upper = this.upper() 4379 4380 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4381 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4382 self._advance() 4383 return self._parse_window(parser(self)) 4384 4385 if not self._next or self._next.token_type != TokenType.L_PAREN: 4386 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4387 self._advance() 4388 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4389 4390 return None 4391 4392 if any_token: 4393 if token_type in self.RESERVED_TOKENS: 4394 return None 4395 elif token_type not in self.FUNC_TOKENS: 4396 return None 4397 4398 self._advance(2) 4399 4400 parser = self.FUNCTION_PARSERS.get(upper) 4401 if parser and not anonymous: 4402 this = parser(self) 4403 else: 4404 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4405 4406 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4407 this = self.expression(subquery_predicate, this=self._parse_select()) 4408 self._match_r_paren() 4409 return this 4410 4411 if functions is None: 4412 functions = self.FUNCTIONS 4413 4414 function = functions.get(upper) 4415 4416 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4417 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4418 4419 if alias: 4420 args = self._kv_to_prop_eq(args) 4421 4422 if function and not anonymous: 4423 if "dialect" in function.__code__.co_varnames: 4424 func = function(args, dialect=self.dialect) 4425 else: 4426 func = function(args) 4427 4428 func = self.validate_expression(func, args) 4429 if not self.dialect.NORMALIZE_FUNCTIONS: 4430 func.meta["name"] = this 4431 4432 this = func 4433 else: 4434 if token_type == TokenType.IDENTIFIER: 4435 this = exp.Identifier(this=this, quoted=True) 4436 this = self.expression(exp.Anonymous, this=this, expressions=args) 4437 4438 if isinstance(this, exp.Expression): 4439 this.add_comments(comments) 4440 4441 self._match_r_paren(this) 4442 return self._parse_window(this) 4443 4444 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4445 transformed = [] 4446 4447 for e in expressions: 4448 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4449 if isinstance(e, exp.Alias): 4450 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4451 4452 if not isinstance(e, exp.PropertyEQ): 4453 e = self.expression( 4454 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4455 ) 4456 4457 if isinstance(e.this, exp.Column): 4458 e.this.replace(e.this.this) 4459 4460 transformed.append(e) 4461 4462 return transformed 4463 4464 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4465 return self._parse_column_def(self._parse_id_var()) 4466 4467 def _parse_user_defined_function( 4468 self, kind: t.Optional[TokenType] = None 4469 ) -> t.Optional[exp.Expression]: 4470 this = self._parse_id_var() 4471 4472 while self._match(TokenType.DOT): 4473 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4474 4475 if not self._match(TokenType.L_PAREN): 4476 return this 4477 4478 expressions = self._parse_csv(self._parse_function_parameter) 4479 self._match_r_paren() 4480 return self.expression( 4481 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4482 ) 4483 4484 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4485 literal = self._parse_primary() 4486 if literal: 4487 return self.expression(exp.Introducer, this=token.text, expression=literal) 4488 4489 return self.expression(exp.Identifier, this=token.text) 4490 4491 def _parse_session_parameter(self) -> exp.SessionParameter: 4492 kind = None 4493 this = self._parse_id_var() or self._parse_primary() 4494 4495 if this and self._match(TokenType.DOT): 4496 kind = this.name 4497 this = self._parse_var() or self._parse_primary() 4498 4499 return self.expression(exp.SessionParameter, this=this, kind=kind) 4500 4501 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4502 index = self._index 4503 4504 if self._match(TokenType.L_PAREN): 4505 expressions = t.cast( 4506 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4507 ) 4508 4509 if not self._match(TokenType.R_PAREN): 4510 self._retreat(index) 4511 else: 4512 expressions = [self._parse_id_var()] 4513 4514 if self._match_set(self.LAMBDAS): 4515 return self.LAMBDAS[self._prev.token_type](self, expressions) 4516 4517 self._retreat(index) 4518 4519 this: t.Optional[exp.Expression] 4520 4521 if self._match(TokenType.DISTINCT): 4522 this = self.expression( 4523 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4524 ) 4525 else: 4526 this = self._parse_select_or_expression(alias=alias) 4527 4528 return self._parse_limit( 4529 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4530 ) 4531 4532 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4533 index = self._index 4534 if not self._match(TokenType.L_PAREN): 4535 return this 4536 4537 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4538 # expr can be of both types 4539 if self._match_set(self.SELECT_START_TOKENS): 4540 self._retreat(index) 4541 return this 4542 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4543 self._match_r_paren() 4544 return self.expression(exp.Schema, this=this, expressions=args) 4545 4546 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4547 return self._parse_column_def(self._parse_field(any_token=True)) 4548 4549 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4550 # column defs are not really columns, they're identifiers 4551 if isinstance(this, exp.Column): 4552 this = this.this 4553 4554 kind = self._parse_types(schema=True) 4555 4556 if self._match_text_seq("FOR", "ORDINALITY"): 4557 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4558 4559 constraints: t.List[exp.Expression] = [] 4560 4561 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4562 ("ALIAS", "MATERIALIZED") 4563 ): 4564 persisted = self._prev.text.upper() == "MATERIALIZED" 4565 constraints.append( 4566 self.expression( 4567 exp.ComputedColumnConstraint, 4568 this=self._parse_conjunction(), 4569 persisted=persisted or self._match_text_seq("PERSISTED"), 4570 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4571 ) 4572 ) 4573 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4574 self._match(TokenType.ALIAS) 4575 constraints.append( 4576 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4577 ) 4578 4579 while True: 4580 constraint = self._parse_column_constraint() 4581 if not constraint: 4582 break 4583 constraints.append(constraint) 4584 4585 if not kind and not constraints: 4586 return this 4587 4588 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4589 4590 def _parse_auto_increment( 4591 self, 4592 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4593 start = None 4594 increment = None 4595 4596 if self._match(TokenType.L_PAREN, advance=False): 4597 args = self._parse_wrapped_csv(self._parse_bitwise) 4598 start = seq_get(args, 0) 4599 increment = seq_get(args, 1) 4600 elif self._match_text_seq("START"): 4601 start = self._parse_bitwise() 4602 self._match_text_seq("INCREMENT") 4603 increment = self._parse_bitwise() 4604 4605 if start and increment: 4606 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4607 4608 return exp.AutoIncrementColumnConstraint() 4609 4610 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4611 if not self._match_text_seq("REFRESH"): 4612 self._retreat(self._index - 1) 4613 return None 4614 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4615 4616 def _parse_compress(self) -> exp.CompressColumnConstraint: 4617 if self._match(TokenType.L_PAREN, advance=False): 4618 return self.expression( 4619 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4620 ) 4621 4622 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4623 4624 def _parse_generated_as_identity( 4625 self, 4626 ) -> ( 4627 exp.GeneratedAsIdentityColumnConstraint 4628 | exp.ComputedColumnConstraint 4629 | exp.GeneratedAsRowColumnConstraint 4630 ): 4631 if self._match_text_seq("BY", "DEFAULT"): 4632 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4633 this = self.expression( 4634 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4635 ) 4636 else: 4637 self._match_text_seq("ALWAYS") 4638 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4639 4640 self._match(TokenType.ALIAS) 4641 4642 if self._match_text_seq("ROW"): 4643 start = self._match_text_seq("START") 4644 if not start: 4645 self._match(TokenType.END) 4646 hidden = self._match_text_seq("HIDDEN") 4647 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4648 4649 identity = self._match_text_seq("IDENTITY") 4650 4651 if self._match(TokenType.L_PAREN): 4652 if self._match(TokenType.START_WITH): 4653 this.set("start", self._parse_bitwise()) 4654 if self._match_text_seq("INCREMENT", "BY"): 4655 this.set("increment", self._parse_bitwise()) 4656 if self._match_text_seq("MINVALUE"): 4657 this.set("minvalue", self._parse_bitwise()) 4658 if self._match_text_seq("MAXVALUE"): 4659 this.set("maxvalue", self._parse_bitwise()) 4660 4661 if self._match_text_seq("CYCLE"): 4662 this.set("cycle", True) 4663 elif self._match_text_seq("NO", "CYCLE"): 4664 this.set("cycle", False) 4665 4666 if not identity: 4667 this.set("expression", self._parse_bitwise()) 4668 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4669 args = self._parse_csv(self._parse_bitwise) 4670 this.set("start", seq_get(args, 0)) 4671 this.set("increment", seq_get(args, 1)) 4672 4673 self._match_r_paren() 4674 4675 return this 4676 4677 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4678 self._match_text_seq("LENGTH") 4679 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4680 4681 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4682 if self._match_text_seq("NULL"): 4683 return self.expression(exp.NotNullColumnConstraint) 4684 if self._match_text_seq("CASESPECIFIC"): 4685 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4686 if self._match_text_seq("FOR", "REPLICATION"): 4687 return self.expression(exp.NotForReplicationColumnConstraint) 4688 return None 4689 4690 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4691 if self._match(TokenType.CONSTRAINT): 4692 this = self._parse_id_var() 4693 else: 4694 this = None 4695 4696 if self._match_texts(self.CONSTRAINT_PARSERS): 4697 return self.expression( 4698 exp.ColumnConstraint, 4699 this=this, 4700 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4701 ) 4702 4703 return this 4704 4705 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4706 if not self._match(TokenType.CONSTRAINT): 4707 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4708 4709 return self.expression( 4710 exp.Constraint, 4711 this=self._parse_id_var(), 4712 expressions=self._parse_unnamed_constraints(), 4713 ) 4714 4715 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4716 constraints = [] 4717 while True: 4718 constraint = self._parse_unnamed_constraint() or self._parse_function() 4719 if not constraint: 4720 break 4721 constraints.append(constraint) 4722 4723 return constraints 4724 4725 def _parse_unnamed_constraint( 4726 self, constraints: t.Optional[t.Collection[str]] = None 4727 ) -> t.Optional[exp.Expression]: 4728 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4729 constraints or self.CONSTRAINT_PARSERS 4730 ): 4731 return None 4732 4733 constraint = self._prev.text.upper() 4734 if constraint not in self.CONSTRAINT_PARSERS: 4735 self.raise_error(f"No parser found for schema constraint {constraint}.") 4736 4737 return self.CONSTRAINT_PARSERS[constraint](self) 4738 4739 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4740 self._match_text_seq("KEY") 4741 return self.expression( 4742 exp.UniqueColumnConstraint, 4743 this=self._parse_schema(self._parse_id_var(any_token=False)), 4744 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4745 on_conflict=self._parse_on_conflict(), 4746 ) 4747 4748 def _parse_key_constraint_options(self) -> t.List[str]: 4749 options = [] 4750 while True: 4751 if not self._curr: 4752 break 4753 4754 if self._match(TokenType.ON): 4755 action = None 4756 on = self._advance_any() and self._prev.text 4757 4758 if self._match_text_seq("NO", "ACTION"): 4759 action = "NO ACTION" 4760 elif self._match_text_seq("CASCADE"): 4761 action = "CASCADE" 4762 elif self._match_text_seq("RESTRICT"): 4763 action = "RESTRICT" 4764 elif self._match_pair(TokenType.SET, TokenType.NULL): 4765 action = "SET NULL" 4766 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4767 action = "SET DEFAULT" 4768 else: 4769 self.raise_error("Invalid key constraint") 4770 4771 options.append(f"ON {on} {action}") 4772 elif self._match_text_seq("NOT", "ENFORCED"): 4773 options.append("NOT ENFORCED") 4774 elif self._match_text_seq("DEFERRABLE"): 4775 options.append("DEFERRABLE") 4776 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4777 options.append("INITIALLY DEFERRED") 4778 elif self._match_text_seq("NORELY"): 4779 options.append("NORELY") 4780 elif self._match_text_seq("MATCH", "FULL"): 4781 options.append("MATCH FULL") 4782 else: 4783 break 4784 4785 return options 4786 4787 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4788 if match and not self._match(TokenType.REFERENCES): 4789 return None 4790 4791 expressions = None 4792 this = self._parse_table(schema=True) 4793 options = self._parse_key_constraint_options() 4794 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4795 4796 def _parse_foreign_key(self) -> exp.ForeignKey: 4797 expressions = self._parse_wrapped_id_vars() 4798 reference = self._parse_references() 4799 options = {} 4800 4801 while self._match(TokenType.ON): 4802 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4803 self.raise_error("Expected DELETE or UPDATE") 4804 4805 kind = self._prev.text.lower() 4806 4807 if self._match_text_seq("NO", "ACTION"): 4808 action = "NO ACTION" 4809 elif self._match(TokenType.SET): 4810 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4811 action = "SET " + self._prev.text.upper() 4812 else: 4813 self._advance() 4814 action = self._prev.text.upper() 4815 4816 options[kind] = action 4817 4818 return self.expression( 4819 exp.ForeignKey, 4820 expressions=expressions, 4821 reference=reference, 4822 **options, # type: ignore 4823 ) 4824 4825 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4826 return self._parse_field() 4827 4828 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4829 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4830 self._retreat(self._index - 1) 4831 return None 4832 4833 id_vars = self._parse_wrapped_id_vars() 4834 return self.expression( 4835 exp.PeriodForSystemTimeConstraint, 4836 this=seq_get(id_vars, 0), 4837 expression=seq_get(id_vars, 1), 4838 ) 4839 4840 def _parse_primary_key( 4841 self, wrapped_optional: bool = False, in_props: bool = False 4842 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4843 desc = ( 4844 self._match_set((TokenType.ASC, TokenType.DESC)) 4845 and self._prev.token_type == TokenType.DESC 4846 ) 4847 4848 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4849 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4850 4851 expressions = self._parse_wrapped_csv( 4852 self._parse_primary_key_part, optional=wrapped_optional 4853 ) 4854 options = self._parse_key_constraint_options() 4855 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4856 4857 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4858 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4859 4860 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4861 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4862 return this 4863 4864 bracket_kind = self._prev.token_type 4865 expressions = self._parse_csv( 4866 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4867 ) 4868 4869 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4870 self.raise_error("Expected ]") 4871 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4872 self.raise_error("Expected }") 4873 4874 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4875 if bracket_kind == TokenType.L_BRACE: 4876 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4877 elif not this or this.name.upper() == "ARRAY": 4878 this = self.expression(exp.Array, expressions=expressions) 4879 else: 4880 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4881 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4882 4883 self._add_comments(this) 4884 return self._parse_bracket(this) 4885 4886 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4887 if self._match(TokenType.COLON): 4888 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4889 return this 4890 4891 def _parse_case(self) -> t.Optional[exp.Expression]: 4892 ifs = [] 4893 default = None 4894 4895 comments = self._prev_comments 4896 expression = self._parse_conjunction() 4897 4898 while self._match(TokenType.WHEN): 4899 this = self._parse_conjunction() 4900 self._match(TokenType.THEN) 4901 then = self._parse_conjunction() 4902 ifs.append(self.expression(exp.If, this=this, true=then)) 4903 4904 if self._match(TokenType.ELSE): 4905 default = self._parse_conjunction() 4906 4907 if not self._match(TokenType.END): 4908 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4909 default = exp.column("interval") 4910 else: 4911 self.raise_error("Expected END after CASE", self._prev) 4912 4913 return self.expression( 4914 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4915 ) 4916 4917 def _parse_if(self) -> t.Optional[exp.Expression]: 4918 if self._match(TokenType.L_PAREN): 4919 args = self._parse_csv(self._parse_conjunction) 4920 this = self.validate_expression(exp.If.from_arg_list(args), args) 4921 self._match_r_paren() 4922 else: 4923 index = self._index - 1 4924 4925 if self.NO_PAREN_IF_COMMANDS and index == 0: 4926 return self._parse_as_command(self._prev) 4927 4928 condition = self._parse_conjunction() 4929 4930 if not condition: 4931 self._retreat(index) 4932 return None 4933 4934 self._match(TokenType.THEN) 4935 true = self._parse_conjunction() 4936 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4937 self._match(TokenType.END) 4938 this = self.expression(exp.If, this=condition, true=true, false=false) 4939 4940 return this 4941 4942 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4943 if not self._match_text_seq("VALUE", "FOR"): 4944 self._retreat(self._index - 1) 4945 return None 4946 4947 return self.expression( 4948 exp.NextValueFor, 4949 this=self._parse_column(), 4950 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4951 ) 4952 4953 def _parse_extract(self) -> exp.Extract: 4954 this = self._parse_function() or self._parse_var() or self._parse_type() 4955 4956 if self._match(TokenType.FROM): 4957 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4958 4959 if not self._match(TokenType.COMMA): 4960 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4961 4962 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4963 4964 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4965 this = self._parse_conjunction() 4966 4967 if not self._match(TokenType.ALIAS): 4968 if self._match(TokenType.COMMA): 4969 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4970 4971 self.raise_error("Expected AS after CAST") 4972 4973 fmt = None 4974 to = self._parse_types() 4975 4976 if self._match(TokenType.FORMAT): 4977 fmt_string = self._parse_string() 4978 fmt = self._parse_at_time_zone(fmt_string) 4979 4980 if not to: 4981 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4982 if to.this in exp.DataType.TEMPORAL_TYPES: 4983 this = self.expression( 4984 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4985 this=this, 4986 format=exp.Literal.string( 4987 format_time( 4988 fmt_string.this if fmt_string else "", 4989 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4990 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4991 ) 4992 ), 4993 ) 4994 4995 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4996 this.set("zone", fmt.args["zone"]) 4997 return this 4998 elif not to: 4999 self.raise_error("Expected TYPE after CAST") 5000 elif isinstance(to, exp.Identifier): 5001 to = exp.DataType.build(to.name, udt=True) 5002 elif to.this == exp.DataType.Type.CHAR: 5003 if self._match(TokenType.CHARACTER_SET): 5004 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5005 5006 return self.expression( 5007 exp.Cast if strict else exp.TryCast, 5008 this=this, 5009 to=to, 5010 format=fmt, 5011 safe=safe, 5012 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5013 ) 5014 5015 def _parse_string_agg(self) -> exp.Expression: 5016 if self._match(TokenType.DISTINCT): 5017 args: t.List[t.Optional[exp.Expression]] = [ 5018 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5019 ] 5020 if self._match(TokenType.COMMA): 5021 args.extend(self._parse_csv(self._parse_conjunction)) 5022 else: 5023 args = self._parse_csv(self._parse_conjunction) # type: ignore 5024 5025 index = self._index 5026 if not self._match(TokenType.R_PAREN) and args: 5027 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5028 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5029 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5030 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5031 5032 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5033 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5034 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5035 if not self._match_text_seq("WITHIN", "GROUP"): 5036 self._retreat(index) 5037 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5038 5039 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5040 order = self._parse_order(this=seq_get(args, 0)) 5041 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5042 5043 def _parse_convert( 5044 self, strict: bool, safe: t.Optional[bool] = None 5045 ) -> t.Optional[exp.Expression]: 5046 this = self._parse_bitwise() 5047 5048 if self._match(TokenType.USING): 5049 to: t.Optional[exp.Expression] = self.expression( 5050 exp.CharacterSet, this=self._parse_var() 5051 ) 5052 elif self._match(TokenType.COMMA): 5053 to = self._parse_types() 5054 else: 5055 to = None 5056 5057 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5058 5059 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5060 """ 5061 There are generally two variants of the DECODE function: 5062 5063 - DECODE(bin, charset) 5064 - DECODE(expression, search, result [, search, result] ... [, default]) 5065 5066 The second variant will always be parsed into a CASE expression. Note that NULL 5067 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5068 instead of relying on pattern matching. 5069 """ 5070 args = self._parse_csv(self._parse_conjunction) 5071 5072 if len(args) < 3: 5073 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5074 5075 expression, *expressions = args 5076 if not expression: 5077 return None 5078 5079 ifs = [] 5080 for search, result in zip(expressions[::2], expressions[1::2]): 5081 if not search or not result: 5082 return None 5083 5084 if isinstance(search, exp.Literal): 5085 ifs.append( 5086 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5087 ) 5088 elif isinstance(search, exp.Null): 5089 ifs.append( 5090 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5091 ) 5092 else: 5093 cond = exp.or_( 5094 exp.EQ(this=expression.copy(), expression=search), 5095 exp.and_( 5096 exp.Is(this=expression.copy(), expression=exp.Null()), 5097 exp.Is(this=search.copy(), expression=exp.Null()), 5098 copy=False, 5099 ), 5100 copy=False, 5101 ) 5102 ifs.append(exp.If(this=cond, true=result)) 5103 5104 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5105 5106 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5107 self._match_text_seq("KEY") 5108 key = self._parse_column() 5109 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5110 self._match_text_seq("VALUE") 5111 value = self._parse_bitwise() 5112 5113 if not key and not value: 5114 return None 5115 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5116 5117 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5118 if not this or not self._match_text_seq("FORMAT", "JSON"): 5119 return this 5120 5121 return self.expression(exp.FormatJson, this=this) 5122 5123 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5124 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5125 for value in values: 5126 if self._match_text_seq(value, "ON", on): 5127 return f"{value} ON {on}" 5128 5129 return None 5130 5131 @t.overload 5132 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5133 5134 @t.overload 5135 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5136 5137 def _parse_json_object(self, agg=False): 5138 star = self._parse_star() 5139 expressions = ( 5140 [star] 5141 if star 5142 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5143 ) 5144 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5145 5146 unique_keys = None 5147 if self._match_text_seq("WITH", "UNIQUE"): 5148 unique_keys = True 5149 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5150 unique_keys = False 5151 5152 self._match_text_seq("KEYS") 5153 5154 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5155 self._parse_type() 5156 ) 5157 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5158 5159 return self.expression( 5160 exp.JSONObjectAgg if agg else exp.JSONObject, 5161 expressions=expressions, 5162 null_handling=null_handling, 5163 unique_keys=unique_keys, 5164 return_type=return_type, 5165 encoding=encoding, 5166 ) 5167 5168 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5169 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5170 if not self._match_text_seq("NESTED"): 5171 this = self._parse_id_var() 5172 kind = self._parse_types(allow_identifiers=False) 5173 nested = None 5174 else: 5175 this = None 5176 kind = None 5177 nested = True 5178 5179 path = self._match_text_seq("PATH") and self._parse_string() 5180 nested_schema = nested and self._parse_json_schema() 5181 5182 return self.expression( 5183 exp.JSONColumnDef, 5184 this=this, 5185 kind=kind, 5186 path=path, 5187 nested_schema=nested_schema, 5188 ) 5189 5190 def _parse_json_schema(self) -> exp.JSONSchema: 5191 self._match_text_seq("COLUMNS") 5192 return self.expression( 5193 exp.JSONSchema, 5194 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5195 ) 5196 5197 def _parse_json_table(self) -> exp.JSONTable: 5198 this = self._parse_format_json(self._parse_bitwise()) 5199 path = self._match(TokenType.COMMA) and self._parse_string() 5200 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5201 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5202 schema = self._parse_json_schema() 5203 5204 return exp.JSONTable( 5205 this=this, 5206 schema=schema, 5207 path=path, 5208 error_handling=error_handling, 5209 empty_handling=empty_handling, 5210 ) 5211 5212 def _parse_match_against(self) -> exp.MatchAgainst: 5213 expressions = self._parse_csv(self._parse_column) 5214 5215 self._match_text_seq(")", "AGAINST", "(") 5216 5217 this = self._parse_string() 5218 5219 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5220 modifier = "IN NATURAL LANGUAGE MODE" 5221 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5222 modifier = f"{modifier} WITH QUERY EXPANSION" 5223 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5224 modifier = "IN BOOLEAN MODE" 5225 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5226 modifier = "WITH QUERY EXPANSION" 5227 else: 5228 modifier = None 5229 5230 return self.expression( 5231 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5232 ) 5233 5234 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5235 def _parse_open_json(self) -> exp.OpenJSON: 5236 this = self._parse_bitwise() 5237 path = self._match(TokenType.COMMA) and self._parse_string() 5238 5239 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5240 this = self._parse_field(any_token=True) 5241 kind = self._parse_types() 5242 path = self._parse_string() 5243 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5244 5245 return self.expression( 5246 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5247 ) 5248 5249 expressions = None 5250 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5251 self._match_l_paren() 5252 expressions = self._parse_csv(_parse_open_json_column_def) 5253 5254 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5255 5256 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5257 args = self._parse_csv(self._parse_bitwise) 5258 5259 if self._match(TokenType.IN): 5260 return self.expression( 5261 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5262 ) 5263 5264 if haystack_first: 5265 haystack = seq_get(args, 0) 5266 needle = seq_get(args, 1) 5267 else: 5268 needle = seq_get(args, 0) 5269 haystack = seq_get(args, 1) 5270 5271 return self.expression( 5272 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5273 ) 5274 5275 def _parse_predict(self) -> exp.Predict: 5276 self._match_text_seq("MODEL") 5277 this = self._parse_table() 5278 5279 self._match(TokenType.COMMA) 5280 self._match_text_seq("TABLE") 5281 5282 return self.expression( 5283 exp.Predict, 5284 this=this, 5285 expression=self._parse_table(), 5286 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5287 ) 5288 5289 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5290 args = self._parse_csv(self._parse_table) 5291 return exp.JoinHint(this=func_name.upper(), expressions=args) 5292 5293 def _parse_substring(self) -> exp.Substring: 5294 # Postgres supports the form: substring(string [from int] [for int]) 5295 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5296 5297 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5298 5299 if self._match(TokenType.FROM): 5300 args.append(self._parse_bitwise()) 5301 if self._match(TokenType.FOR): 5302 args.append(self._parse_bitwise()) 5303 5304 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5305 5306 def _parse_trim(self) -> exp.Trim: 5307 # https://www.w3resource.com/sql/character-functions/trim.php 5308 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5309 5310 position = None 5311 collation = None 5312 expression = None 5313 5314 if self._match_texts(self.TRIM_TYPES): 5315 position = self._prev.text.upper() 5316 5317 this = self._parse_bitwise() 5318 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5319 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5320 expression = self._parse_bitwise() 5321 5322 if invert_order: 5323 this, expression = expression, this 5324 5325 if self._match(TokenType.COLLATE): 5326 collation = self._parse_bitwise() 5327 5328 return self.expression( 5329 exp.Trim, this=this, position=position, expression=expression, collation=collation 5330 ) 5331 5332 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5333 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5334 5335 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5336 return self._parse_window(self._parse_id_var(), alias=True) 5337 5338 def _parse_respect_or_ignore_nulls( 5339 self, this: t.Optional[exp.Expression] 5340 ) -> t.Optional[exp.Expression]: 5341 if self._match_text_seq("IGNORE", "NULLS"): 5342 return self.expression(exp.IgnoreNulls, this=this) 5343 if self._match_text_seq("RESPECT", "NULLS"): 5344 return self.expression(exp.RespectNulls, this=this) 5345 return this 5346 5347 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5348 if self._match(TokenType.HAVING): 5349 self._match_texts(("MAX", "MIN")) 5350 max = self._prev.text.upper() != "MIN" 5351 return self.expression( 5352 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5353 ) 5354 5355 return this 5356 5357 def _parse_window( 5358 self, this: t.Optional[exp.Expression], alias: bool = False 5359 ) -> t.Optional[exp.Expression]: 5360 func = this 5361 comments = func.comments if isinstance(func, exp.Expression) else None 5362 5363 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5364 self._match(TokenType.WHERE) 5365 this = self.expression( 5366 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5367 ) 5368 self._match_r_paren() 5369 5370 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5371 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5372 if self._match_text_seq("WITHIN", "GROUP"): 5373 order = self._parse_wrapped(self._parse_order) 5374 this = self.expression(exp.WithinGroup, this=this, expression=order) 5375 5376 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5377 # Some dialects choose to implement and some do not. 5378 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5379 5380 # There is some code above in _parse_lambda that handles 5381 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5382 5383 # The below changes handle 5384 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5385 5386 # Oracle allows both formats 5387 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5388 # and Snowflake chose to do the same for familiarity 5389 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5390 if isinstance(this, exp.AggFunc): 5391 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5392 5393 if ignore_respect and ignore_respect is not this: 5394 ignore_respect.replace(ignore_respect.this) 5395 this = self.expression(ignore_respect.__class__, this=this) 5396 5397 this = self._parse_respect_or_ignore_nulls(this) 5398 5399 # bigquery select from window x AS (partition by ...) 5400 if alias: 5401 over = None 5402 self._match(TokenType.ALIAS) 5403 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5404 return this 5405 else: 5406 over = self._prev.text.upper() 5407 5408 if comments: 5409 func.comments = None # type: ignore 5410 5411 if not self._match(TokenType.L_PAREN): 5412 return self.expression( 5413 exp.Window, 5414 comments=comments, 5415 this=this, 5416 alias=self._parse_id_var(False), 5417 over=over, 5418 ) 5419 5420 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5421 5422 first = self._match(TokenType.FIRST) 5423 if self._match_text_seq("LAST"): 5424 first = False 5425 5426 partition, order = self._parse_partition_and_order() 5427 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5428 5429 if kind: 5430 self._match(TokenType.BETWEEN) 5431 start = self._parse_window_spec() 5432 self._match(TokenType.AND) 5433 end = self._parse_window_spec() 5434 5435 spec = self.expression( 5436 exp.WindowSpec, 5437 kind=kind, 5438 start=start["value"], 5439 start_side=start["side"], 5440 end=end["value"], 5441 end_side=end["side"], 5442 ) 5443 else: 5444 spec = None 5445 5446 self._match_r_paren() 5447 5448 window = self.expression( 5449 exp.Window, 5450 comments=comments, 5451 this=this, 5452 partition_by=partition, 5453 order=order, 5454 spec=spec, 5455 alias=window_alias, 5456 over=over, 5457 first=first, 5458 ) 5459 5460 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5461 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5462 return self._parse_window(window, alias=alias) 5463 5464 return window 5465 5466 def _parse_partition_and_order( 5467 self, 5468 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5469 return self._parse_partition_by(), self._parse_order() 5470 5471 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5472 self._match(TokenType.BETWEEN) 5473 5474 return { 5475 "value": ( 5476 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5477 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5478 or self._parse_bitwise() 5479 ), 5480 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5481 } 5482 5483 def _parse_alias( 5484 self, this: t.Optional[exp.Expression], explicit: bool = False 5485 ) -> t.Optional[exp.Expression]: 5486 any_token = self._match(TokenType.ALIAS) 5487 comments = self._prev_comments 5488 5489 if explicit and not any_token: 5490 return this 5491 5492 if self._match(TokenType.L_PAREN): 5493 aliases = self.expression( 5494 exp.Aliases, 5495 comments=comments, 5496 this=this, 5497 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5498 ) 5499 self._match_r_paren(aliases) 5500 return aliases 5501 5502 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5503 self.STRING_ALIASES and self._parse_string_as_identifier() 5504 ) 5505 5506 if alias: 5507 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5508 column = this.this 5509 5510 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5511 if not this.comments and column and column.comments: 5512 this.comments = column.comments 5513 column.comments = None 5514 5515 return this 5516 5517 def _parse_id_var( 5518 self, 5519 any_token: bool = True, 5520 tokens: t.Optional[t.Collection[TokenType]] = None, 5521 ) -> t.Optional[exp.Expression]: 5522 identifier = self._parse_identifier() 5523 if identifier: 5524 return identifier 5525 5526 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5527 quoted = self._prev.token_type == TokenType.STRING 5528 return exp.Identifier(this=self._prev.text, quoted=quoted) 5529 5530 return None 5531 5532 def _parse_string(self) -> t.Optional[exp.Expression]: 5533 if self._match_set(self.STRING_PARSERS): 5534 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5535 return self._parse_placeholder() 5536 5537 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5538 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5539 5540 def _parse_number(self) -> t.Optional[exp.Expression]: 5541 if self._match_set(self.NUMERIC_PARSERS): 5542 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5543 return self._parse_placeholder() 5544 5545 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5546 if self._match(TokenType.IDENTIFIER): 5547 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5548 return self._parse_placeholder() 5549 5550 def _parse_var( 5551 self, 5552 any_token: bool = False, 5553 tokens: t.Optional[t.Collection[TokenType]] = None, 5554 upper: bool = False, 5555 ) -> t.Optional[exp.Expression]: 5556 if ( 5557 (any_token and self._advance_any()) 5558 or self._match(TokenType.VAR) 5559 or (self._match_set(tokens) if tokens else False) 5560 ): 5561 return self.expression( 5562 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5563 ) 5564 return self._parse_placeholder() 5565 5566 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5567 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5568 self._advance() 5569 return self._prev 5570 return None 5571 5572 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5573 return self._parse_var() or self._parse_string() 5574 5575 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5576 return self._parse_primary() or self._parse_var(any_token=True) 5577 5578 def _parse_null(self) -> t.Optional[exp.Expression]: 5579 if self._match_set(self.NULL_TOKENS): 5580 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5581 return self._parse_placeholder() 5582 5583 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5584 if self._match(TokenType.TRUE): 5585 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5586 if self._match(TokenType.FALSE): 5587 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5588 return self._parse_placeholder() 5589 5590 def _parse_star(self) -> t.Optional[exp.Expression]: 5591 if self._match(TokenType.STAR): 5592 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5593 return self._parse_placeholder() 5594 5595 def _parse_parameter(self) -> exp.Parameter: 5596 self._match(TokenType.L_BRACE) 5597 this = self._parse_identifier() or self._parse_primary_or_var() 5598 expression = self._match(TokenType.COLON) and ( 5599 self._parse_identifier() or self._parse_primary_or_var() 5600 ) 5601 self._match(TokenType.R_BRACE) 5602 return self.expression(exp.Parameter, this=this, expression=expression) 5603 5604 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5605 if self._match_set(self.PLACEHOLDER_PARSERS): 5606 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5607 if placeholder: 5608 return placeholder 5609 self._advance(-1) 5610 return None 5611 5612 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5613 if not self._match(TokenType.EXCEPT): 5614 return None 5615 if self._match(TokenType.L_PAREN, advance=False): 5616 return self._parse_wrapped_csv(self._parse_column) 5617 5618 except_column = self._parse_column() 5619 return [except_column] if except_column else None 5620 5621 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5622 if not self._match(TokenType.REPLACE): 5623 return None 5624 if self._match(TokenType.L_PAREN, advance=False): 5625 return self._parse_wrapped_csv(self._parse_expression) 5626 5627 replace_expression = self._parse_expression() 5628 return [replace_expression] if replace_expression else None 5629 5630 def _parse_csv( 5631 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5632 ) -> t.List[exp.Expression]: 5633 parse_result = parse_method() 5634 items = [parse_result] if parse_result is not None else [] 5635 5636 while self._match(sep): 5637 self._add_comments(parse_result) 5638 parse_result = parse_method() 5639 if parse_result is not None: 5640 items.append(parse_result) 5641 5642 return items 5643 5644 def _parse_tokens( 5645 self, parse_method: t.Callable, expressions: t.Dict 5646 ) -> t.Optional[exp.Expression]: 5647 this = parse_method() 5648 5649 while self._match_set(expressions): 5650 this = self.expression( 5651 expressions[self._prev.token_type], 5652 this=this, 5653 comments=self._prev_comments, 5654 expression=parse_method(), 5655 ) 5656 5657 return this 5658 5659 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5660 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5661 5662 def _parse_wrapped_csv( 5663 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5664 ) -> t.List[exp.Expression]: 5665 return self._parse_wrapped( 5666 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5667 ) 5668 5669 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5670 wrapped = self._match(TokenType.L_PAREN) 5671 if not wrapped and not optional: 5672 self.raise_error("Expecting (") 5673 parse_result = parse_method() 5674 if wrapped: 5675 self._match_r_paren() 5676 return parse_result 5677 5678 def _parse_expressions(self) -> t.List[exp.Expression]: 5679 return self._parse_csv(self._parse_expression) 5680 5681 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5682 return self._parse_select() or self._parse_set_operations( 5683 self._parse_expression() if alias else self._parse_conjunction() 5684 ) 5685 5686 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5687 return self._parse_query_modifiers( 5688 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5689 ) 5690 5691 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5692 this = None 5693 if self._match_texts(self.TRANSACTION_KIND): 5694 this = self._prev.text 5695 5696 self._match_texts(("TRANSACTION", "WORK")) 5697 5698 modes = [] 5699 while True: 5700 mode = [] 5701 while self._match(TokenType.VAR): 5702 mode.append(self._prev.text) 5703 5704 if mode: 5705 modes.append(" ".join(mode)) 5706 if not self._match(TokenType.COMMA): 5707 break 5708 5709 return self.expression(exp.Transaction, this=this, modes=modes) 5710 5711 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5712 chain = None 5713 savepoint = None 5714 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5715 5716 self._match_texts(("TRANSACTION", "WORK")) 5717 5718 if self._match_text_seq("TO"): 5719 self._match_text_seq("SAVEPOINT") 5720 savepoint = self._parse_id_var() 5721 5722 if self._match(TokenType.AND): 5723 chain = not self._match_text_seq("NO") 5724 self._match_text_seq("CHAIN") 5725 5726 if is_rollback: 5727 return self.expression(exp.Rollback, savepoint=savepoint) 5728 5729 return self.expression(exp.Commit, chain=chain) 5730 5731 def _parse_refresh(self) -> exp.Refresh: 5732 self._match(TokenType.TABLE) 5733 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5734 5735 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5736 if not self._match_text_seq("ADD"): 5737 return None 5738 5739 self._match(TokenType.COLUMN) 5740 exists_column = self._parse_exists(not_=True) 5741 expression = self._parse_field_def() 5742 5743 if expression: 5744 expression.set("exists", exists_column) 5745 5746 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5747 if self._match_texts(("FIRST", "AFTER")): 5748 position = self._prev.text 5749 column_position = self.expression( 5750 exp.ColumnPosition, this=self._parse_column(), position=position 5751 ) 5752 expression.set("position", column_position) 5753 5754 return expression 5755 5756 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5757 drop = self._match(TokenType.DROP) and self._parse_drop() 5758 if drop and not isinstance(drop, exp.Command): 5759 drop.set("kind", drop.args.get("kind", "COLUMN")) 5760 return drop 5761 5762 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5763 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5764 return self.expression( 5765 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5766 ) 5767 5768 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5769 index = self._index - 1 5770 5771 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5772 return self._parse_csv( 5773 lambda: self.expression( 5774 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5775 ) 5776 ) 5777 5778 self._retreat(index) 5779 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5780 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5781 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5782 5783 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5784 self._match(TokenType.COLUMN) 5785 column = self._parse_field(any_token=True) 5786 5787 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5788 return self.expression(exp.AlterColumn, this=column, drop=True) 5789 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5790 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5791 if self._match(TokenType.COMMENT): 5792 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5793 5794 self._match_text_seq("SET", "DATA") 5795 self._match_text_seq("TYPE") 5796 return self.expression( 5797 exp.AlterColumn, 5798 this=column, 5799 dtype=self._parse_types(), 5800 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5801 using=self._match(TokenType.USING) and self._parse_conjunction(), 5802 ) 5803 5804 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5805 index = self._index - 1 5806 5807 partition_exists = self._parse_exists() 5808 if self._match(TokenType.PARTITION, advance=False): 5809 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5810 5811 self._retreat(index) 5812 return self._parse_csv(self._parse_drop_column) 5813 5814 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5815 if self._match(TokenType.COLUMN): 5816 exists = self._parse_exists() 5817 old_column = self._parse_column() 5818 to = self._match_text_seq("TO") 5819 new_column = self._parse_column() 5820 5821 if old_column is None or to is None or new_column is None: 5822 return None 5823 5824 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5825 5826 self._match_text_seq("TO") 5827 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5828 5829 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5830 start = self._prev 5831 5832 if not self._match(TokenType.TABLE): 5833 return self._parse_as_command(start) 5834 5835 exists = self._parse_exists() 5836 only = self._match_text_seq("ONLY") 5837 this = self._parse_table(schema=True) 5838 5839 if self._next: 5840 self._advance() 5841 5842 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5843 if parser: 5844 actions = ensure_list(parser(self)) 5845 options = self._parse_csv(self._parse_property) 5846 5847 if not self._curr and actions: 5848 return self.expression( 5849 exp.AlterTable, 5850 this=this, 5851 exists=exists, 5852 actions=actions, 5853 only=only, 5854 options=options, 5855 ) 5856 5857 return self._parse_as_command(start) 5858 5859 def _parse_merge(self) -> exp.Merge: 5860 self._match(TokenType.INTO) 5861 target = self._parse_table() 5862 5863 if target and self._match(TokenType.ALIAS, advance=False): 5864 target.set("alias", self._parse_table_alias()) 5865 5866 self._match(TokenType.USING) 5867 using = self._parse_table() 5868 5869 self._match(TokenType.ON) 5870 on = self._parse_conjunction() 5871 5872 return self.expression( 5873 exp.Merge, 5874 this=target, 5875 using=using, 5876 on=on, 5877 expressions=self._parse_when_matched(), 5878 ) 5879 5880 def _parse_when_matched(self) -> t.List[exp.When]: 5881 whens = [] 5882 5883 while self._match(TokenType.WHEN): 5884 matched = not self._match(TokenType.NOT) 5885 self._match_text_seq("MATCHED") 5886 source = ( 5887 False 5888 if self._match_text_seq("BY", "TARGET") 5889 else self._match_text_seq("BY", "SOURCE") 5890 ) 5891 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5892 5893 self._match(TokenType.THEN) 5894 5895 if self._match(TokenType.INSERT): 5896 _this = self._parse_star() 5897 if _this: 5898 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5899 else: 5900 then = self.expression( 5901 exp.Insert, 5902 this=self._parse_value(), 5903 expression=self._match_text_seq("VALUES") and self._parse_value(), 5904 ) 5905 elif self._match(TokenType.UPDATE): 5906 expressions = self._parse_star() 5907 if expressions: 5908 then = self.expression(exp.Update, expressions=expressions) 5909 else: 5910 then = self.expression( 5911 exp.Update, 5912 expressions=self._match(TokenType.SET) 5913 and self._parse_csv(self._parse_equality), 5914 ) 5915 elif self._match(TokenType.DELETE): 5916 then = self.expression(exp.Var, this=self._prev.text) 5917 else: 5918 then = None 5919 5920 whens.append( 5921 self.expression( 5922 exp.When, 5923 matched=matched, 5924 source=source, 5925 condition=condition, 5926 then=then, 5927 ) 5928 ) 5929 return whens 5930 5931 def _parse_show(self) -> t.Optional[exp.Expression]: 5932 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5933 if parser: 5934 return parser(self) 5935 return self._parse_as_command(self._prev) 5936 5937 def _parse_set_item_assignment( 5938 self, kind: t.Optional[str] = None 5939 ) -> t.Optional[exp.Expression]: 5940 index = self._index 5941 5942 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5943 return self._parse_set_transaction(global_=kind == "GLOBAL") 5944 5945 left = self._parse_primary() or self._parse_id_var() 5946 assignment_delimiter = self._match_texts(("=", "TO")) 5947 5948 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5949 self._retreat(index) 5950 return None 5951 5952 right = self._parse_statement() or self._parse_id_var() 5953 this = self.expression(exp.EQ, this=left, expression=right) 5954 5955 return self.expression(exp.SetItem, this=this, kind=kind) 5956 5957 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5958 self._match_text_seq("TRANSACTION") 5959 characteristics = self._parse_csv( 5960 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5961 ) 5962 return self.expression( 5963 exp.SetItem, 5964 expressions=characteristics, 5965 kind="TRANSACTION", 5966 **{"global": global_}, # type: ignore 5967 ) 5968 5969 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5970 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5971 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5972 5973 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5974 index = self._index 5975 set_ = self.expression( 5976 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5977 ) 5978 5979 if self._curr: 5980 self._retreat(index) 5981 return self._parse_as_command(self._prev) 5982 5983 return set_ 5984 5985 def _parse_var_from_options( 5986 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5987 ) -> t.Optional[exp.Var]: 5988 start = self._curr 5989 if not start: 5990 return None 5991 5992 option = start.text.upper() 5993 continuations = options.get(option) 5994 5995 index = self._index 5996 self._advance() 5997 for keywords in continuations or []: 5998 if isinstance(keywords, str): 5999 keywords = (keywords,) 6000 6001 if self._match_text_seq(*keywords): 6002 option = f"{option} {' '.join(keywords)}" 6003 break 6004 else: 6005 if continuations or continuations is None: 6006 if raise_unmatched: 6007 self.raise_error(f"Unknown option {option}") 6008 6009 self._retreat(index) 6010 return None 6011 6012 return exp.var(option) 6013 6014 def _parse_as_command(self, start: Token) -> exp.Command: 6015 while self._curr: 6016 self._advance() 6017 text = self._find_sql(start, self._prev) 6018 size = len(start.text) 6019 self._warn_unsupported() 6020 return exp.Command(this=text[:size], expression=text[size:]) 6021 6022 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6023 settings = [] 6024 6025 self._match_l_paren() 6026 kind = self._parse_id_var() 6027 6028 if self._match(TokenType.L_PAREN): 6029 while True: 6030 key = self._parse_id_var() 6031 value = self._parse_primary() 6032 6033 if not key and value is None: 6034 break 6035 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6036 self._match(TokenType.R_PAREN) 6037 6038 self._match_r_paren() 6039 6040 return self.expression( 6041 exp.DictProperty, 6042 this=this, 6043 kind=kind.this if kind else None, 6044 settings=settings, 6045 ) 6046 6047 def _parse_dict_range(self, this: str) -> exp.DictRange: 6048 self._match_l_paren() 6049 has_min = self._match_text_seq("MIN") 6050 if has_min: 6051 min = self._parse_var() or self._parse_primary() 6052 self._match_text_seq("MAX") 6053 max = self._parse_var() or self._parse_primary() 6054 else: 6055 max = self._parse_var() or self._parse_primary() 6056 min = exp.Literal.number(0) 6057 self._match_r_paren() 6058 return self.expression(exp.DictRange, this=this, min=min, max=max) 6059 6060 def _parse_comprehension( 6061 self, this: t.Optional[exp.Expression] 6062 ) -> t.Optional[exp.Comprehension]: 6063 index = self._index 6064 expression = self._parse_column() 6065 if not self._match(TokenType.IN): 6066 self._retreat(index - 1) 6067 return None 6068 iterator = self._parse_column() 6069 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6070 return self.expression( 6071 exp.Comprehension, 6072 this=this, 6073 expression=expression, 6074 iterator=iterator, 6075 condition=condition, 6076 ) 6077 6078 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6079 if self._match(TokenType.HEREDOC_STRING): 6080 return self.expression(exp.Heredoc, this=self._prev.text) 6081 6082 if not self._match_text_seq("$"): 6083 return None 6084 6085 tags = ["$"] 6086 tag_text = None 6087 6088 if self._is_connected(): 6089 self._advance() 6090 tags.append(self._prev.text.upper()) 6091 else: 6092 self.raise_error("No closing $ found") 6093 6094 if tags[-1] != "$": 6095 if self._is_connected() and self._match_text_seq("$"): 6096 tag_text = tags[-1] 6097 tags.append("$") 6098 else: 6099 self.raise_error("No closing $ found") 6100 6101 heredoc_start = self._curr 6102 6103 while self._curr: 6104 if self._match_text_seq(*tags, advance=False): 6105 this = self._find_sql(heredoc_start, self._prev) 6106 self._advance(len(tags)) 6107 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6108 6109 self._advance() 6110 6111 self.raise_error(f"No closing {''.join(tags)} found") 6112 return None 6113 6114 def _find_parser( 6115 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6116 ) -> t.Optional[t.Callable]: 6117 if not self._curr: 6118 return None 6119 6120 index = self._index 6121 this = [] 6122 while True: 6123 # The current token might be multiple words 6124 curr = self._curr.text.upper() 6125 key = curr.split(" ") 6126 this.append(curr) 6127 6128 self._advance() 6129 result, trie = in_trie(trie, key) 6130 if result == TrieResult.FAILED: 6131 break 6132 6133 if result == TrieResult.EXISTS: 6134 subparser = parsers[" ".join(this)] 6135 return subparser 6136 6137 self._retreat(index) 6138 return None 6139 6140 def _match(self, token_type, advance=True, expression=None): 6141 if not self._curr: 6142 return None 6143 6144 if self._curr.token_type == token_type: 6145 if advance: 6146 self._advance() 6147 self._add_comments(expression) 6148 return True 6149 6150 return None 6151 6152 def _match_set(self, types, advance=True): 6153 if not self._curr: 6154 return None 6155 6156 if self._curr.token_type in types: 6157 if advance: 6158 self._advance() 6159 return True 6160 6161 return None 6162 6163 def _match_pair(self, token_type_a, token_type_b, advance=True): 6164 if not self._curr or not self._next: 6165 return None 6166 6167 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6168 if advance: 6169 self._advance(2) 6170 return True 6171 6172 return None 6173 6174 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6175 if not self._match(TokenType.L_PAREN, expression=expression): 6176 self.raise_error("Expecting (") 6177 6178 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6179 if not self._match(TokenType.R_PAREN, expression=expression): 6180 self.raise_error("Expecting )") 6181 6182 def _match_texts(self, texts, advance=True): 6183 if self._curr and self._curr.text.upper() in texts: 6184 if advance: 6185 self._advance() 6186 return True 6187 return None 6188 6189 def _match_text_seq(self, *texts, advance=True): 6190 index = self._index 6191 for text in texts: 6192 if self._curr and self._curr.text.upper() == text: 6193 self._advance() 6194 else: 6195 self._retreat(index) 6196 return None 6197 6198 if not advance: 6199 self._retreat(index) 6200 6201 return True 6202 6203 def _replace_lambda( 6204 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6205 ) -> t.Optional[exp.Expression]: 6206 if not node: 6207 return node 6208 6209 for column in node.find_all(exp.Column): 6210 if column.parts[0].name in lambda_variables: 6211 dot_or_id = column.to_dot() if column.table else column.this 6212 parent = column.parent 6213 6214 while isinstance(parent, exp.Dot): 6215 if not isinstance(parent.parent, exp.Dot): 6216 parent.replace(dot_or_id) 6217 break 6218 parent = parent.parent 6219 else: 6220 if column is node: 6221 node = dot_or_id 6222 else: 6223 column.replace(dot_or_id) 6224 return node 6225 6226 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6227 start = self._prev 6228 6229 # Not to be confused with TRUNCATE(number, decimals) function call 6230 if self._match(TokenType.L_PAREN): 6231 self._retreat(self._index - 2) 6232 return self._parse_function() 6233 6234 # Clickhouse supports TRUNCATE DATABASE as well 6235 is_database = self._match(TokenType.DATABASE) 6236 6237 self._match(TokenType.TABLE) 6238 6239 exists = self._parse_exists(not_=False) 6240 6241 expressions = self._parse_csv( 6242 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6243 ) 6244 6245 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6246 6247 if self._match_text_seq("RESTART", "IDENTITY"): 6248 identity = "RESTART" 6249 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6250 identity = "CONTINUE" 6251 else: 6252 identity = None 6253 6254 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6255 option = self._prev.text 6256 else: 6257 option = None 6258 6259 partition = self._parse_partition() 6260 6261 # Fallback case 6262 if self._curr: 6263 return self._parse_as_command(start) 6264 6265 return self.expression( 6266 exp.TruncateTable, 6267 expressions=expressions, 6268 is_database=is_database, 6269 exists=exists, 6270 cluster=cluster, 6271 identity=identity, 6272 option=option, 6273 partition=partition, 6274 ) 6275 6276 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6277 this = self._parse_ordered(self._parse_opclass) 6278 6279 if not self._match(TokenType.WITH): 6280 return this 6281 6282 op = self._parse_var(any_token=True) 6283 6284 return self.expression(exp.WithOperator, this=this, op=op)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1135 def __init__( 1136 self, 1137 error_level: t.Optional[ErrorLevel] = None, 1138 error_message_context: int = 100, 1139 max_errors: int = 3, 1140 dialect: DialectType = None, 1141 ): 1142 from sqlglot.dialects import Dialect 1143 1144 self.error_level = error_level or ErrorLevel.IMMEDIATE 1145 self.error_message_context = error_message_context 1146 self.max_errors = max_errors 1147 self.dialect = Dialect.get_or_raise(dialect) 1148 self.reset()
1160 def parse( 1161 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1162 ) -> t.List[t.Optional[exp.Expression]]: 1163 """ 1164 Parses a list of tokens and returns a list of syntax trees, one tree 1165 per parsed SQL statement. 1166 1167 Args: 1168 raw_tokens: The list of tokens. 1169 sql: The original SQL string, used to produce helpful debug messages. 1170 1171 Returns: 1172 The list of the produced syntax trees. 1173 """ 1174 return self._parse( 1175 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1176 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1178 def parse_into( 1179 self, 1180 expression_types: exp.IntoType, 1181 raw_tokens: t.List[Token], 1182 sql: t.Optional[str] = None, 1183 ) -> t.List[t.Optional[exp.Expression]]: 1184 """ 1185 Parses a list of tokens into a given Expression type. If a collection of Expression 1186 types is given instead, this method will try to parse the token list into each one 1187 of them, stopping at the first for which the parsing succeeds. 1188 1189 Args: 1190 expression_types: The expression type(s) to try and parse the token list into. 1191 raw_tokens: The list of tokens. 1192 sql: The original SQL string, used to produce helpful debug messages. 1193 1194 Returns: 1195 The target Expression. 1196 """ 1197 errors = [] 1198 for expression_type in ensure_list(expression_types): 1199 parser = self.EXPRESSION_PARSERS.get(expression_type) 1200 if not parser: 1201 raise TypeError(f"No parser registered for {expression_type}") 1202 1203 try: 1204 return self._parse(parser, raw_tokens, sql) 1205 except ParseError as e: 1206 e.errors[0]["into_expression"] = expression_type 1207 errors.append(e) 1208 1209 raise ParseError( 1210 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1211 errors=merge_errors(errors), 1212 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1249 def check_errors(self) -> None: 1250 """Logs or raises any found errors, depending on the chosen error level setting.""" 1251 if self.error_level == ErrorLevel.WARN: 1252 for error in self.errors: 1253 logger.error(str(error)) 1254 elif self.error_level == ErrorLevel.RAISE and self.errors: 1255 raise ParseError( 1256 concat_messages(self.errors, self.max_errors), 1257 errors=merge_errors(self.errors), 1258 )
Logs or raises any found errors, depending on the chosen error level setting.
1260 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1261 """ 1262 Appends an error in the list of recorded errors or raises it, depending on the chosen 1263 error level setting. 1264 """ 1265 token = token or self._curr or self._prev or Token.string("") 1266 start = token.start 1267 end = token.end + 1 1268 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1269 highlight = self.sql[start:end] 1270 end_context = self.sql[end : end + self.error_message_context] 1271 1272 error = ParseError.new( 1273 f"{message}. Line {token.line}, Col: {token.col}.\n" 1274 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1275 description=message, 1276 line=token.line, 1277 col=token.col, 1278 start_context=start_context, 1279 highlight=highlight, 1280 end_context=end_context, 1281 ) 1282 1283 if self.error_level == ErrorLevel.IMMEDIATE: 1284 raise error 1285 1286 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1288 def expression( 1289 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1290 ) -> E: 1291 """ 1292 Creates a new, validated Expression. 1293 1294 Args: 1295 exp_class: The expression class to instantiate. 1296 comments: An optional list of comments to attach to the expression. 1297 kwargs: The arguments to set for the expression along with their respective values. 1298 1299 Returns: 1300 The target expression. 1301 """ 1302 instance = exp_class(**kwargs) 1303 instance.add_comments(comments) if comments else self._add_comments(instance) 1304 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1311 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1312 """ 1313 Validates an Expression, making sure that all its mandatory arguments are set. 1314 1315 Args: 1316 expression: The expression to validate. 1317 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1318 1319 Returns: 1320 The validated expression. 1321 """ 1322 if self.error_level != ErrorLevel.IGNORE: 1323 for error_message in expression.error_messages(args): 1324 self.raise_error(error_message) 1325 1326 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.