sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111class _Parser(type): 112 def __new__(cls, clsname, bases, attrs): 113 klass = super().__new__(cls, clsname, bases, attrs) 114 115 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 116 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 117 118 return klass 119 120 121class Parser(metaclass=_Parser): 122 """ 123 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 124 125 Args: 126 error_level: The desired error level. 127 Default: ErrorLevel.IMMEDIATE 128 error_message_context: The amount of context to capture from a query string when displaying 129 the error message (in number of characters). 130 Default: 100 131 max_errors: Maximum number of error messages to include in a raised ParseError. 132 This is only relevant if error_level is ErrorLevel.RAISE. 133 Default: 3 134 """ 135 136 FUNCTIONS: t.Dict[str, t.Callable] = { 137 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 138 "CONCAT": lambda args, dialect: exp.Concat( 139 expressions=args, 140 safe=not dialect.STRICT_STRING_CONCAT, 141 coalesce=dialect.CONCAT_COALESCE, 142 ), 143 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 144 expressions=args, 145 safe=not dialect.STRICT_STRING_CONCAT, 146 coalesce=dialect.CONCAT_COALESCE, 147 ), 148 "DATE_TO_DATE_STR": lambda args: exp.Cast( 149 this=seq_get(args, 0), 150 to=exp.DataType(this=exp.DataType.Type.TEXT), 151 ), 152 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 153 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 154 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 155 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 156 "LIKE": build_like, 157 "LOG": build_logarithm, 158 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 159 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 160 "MOD": build_mod, 161 "TIME_TO_TIME_STR": lambda args: exp.Cast( 162 this=seq_get(args, 0), 163 to=exp.DataType(this=exp.DataType.Type.TEXT), 164 ), 165 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 166 this=exp.Cast( 167 this=seq_get(args, 0), 168 to=exp.DataType(this=exp.DataType.Type.TEXT), 169 ), 170 start=exp.Literal.number(1), 171 length=exp.Literal.number(10), 172 ), 173 "VAR_MAP": build_var_map, 174 "LOWER": build_lower, 175 "UPPER": build_upper, 176 "HEX": build_hex, 177 "TO_HEX": build_hex, 178 } 179 180 NO_PAREN_FUNCTIONS = { 181 TokenType.CURRENT_DATE: exp.CurrentDate, 182 TokenType.CURRENT_DATETIME: exp.CurrentDate, 183 TokenType.CURRENT_TIME: exp.CurrentTime, 184 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 185 TokenType.CURRENT_USER: exp.CurrentUser, 186 } 187 188 STRUCT_TYPE_TOKENS = { 189 TokenType.NESTED, 190 TokenType.OBJECT, 191 TokenType.STRUCT, 192 } 193 194 NESTED_TYPE_TOKENS = { 195 TokenType.ARRAY, 196 TokenType.LOWCARDINALITY, 197 TokenType.MAP, 198 TokenType.NULLABLE, 199 *STRUCT_TYPE_TOKENS, 200 } 201 202 ENUM_TYPE_TOKENS = { 203 TokenType.ENUM, 204 TokenType.ENUM8, 205 TokenType.ENUM16, 206 } 207 208 AGGREGATE_TYPE_TOKENS = { 209 TokenType.AGGREGATEFUNCTION, 210 TokenType.SIMPLEAGGREGATEFUNCTION, 211 } 212 213 TYPE_TOKENS = { 214 TokenType.BIT, 215 TokenType.BOOLEAN, 216 TokenType.TINYINT, 217 TokenType.UTINYINT, 218 TokenType.SMALLINT, 219 TokenType.USMALLINT, 220 TokenType.INT, 221 TokenType.UINT, 222 TokenType.BIGINT, 223 TokenType.UBIGINT, 224 TokenType.INT128, 225 TokenType.UINT128, 226 TokenType.INT256, 227 TokenType.UINT256, 228 TokenType.MEDIUMINT, 229 TokenType.UMEDIUMINT, 230 TokenType.FIXEDSTRING, 231 TokenType.FLOAT, 232 TokenType.DOUBLE, 233 TokenType.CHAR, 234 TokenType.NCHAR, 235 TokenType.VARCHAR, 236 TokenType.NVARCHAR, 237 TokenType.BPCHAR, 238 TokenType.TEXT, 239 TokenType.MEDIUMTEXT, 240 TokenType.LONGTEXT, 241 TokenType.MEDIUMBLOB, 242 TokenType.LONGBLOB, 243 TokenType.BINARY, 244 TokenType.VARBINARY, 245 TokenType.JSON, 246 TokenType.JSONB, 247 TokenType.INTERVAL, 248 TokenType.TINYBLOB, 249 TokenType.TINYTEXT, 250 TokenType.TIME, 251 TokenType.TIMETZ, 252 TokenType.TIMESTAMP, 253 TokenType.TIMESTAMP_S, 254 TokenType.TIMESTAMP_MS, 255 TokenType.TIMESTAMP_NS, 256 TokenType.TIMESTAMPTZ, 257 TokenType.TIMESTAMPLTZ, 258 TokenType.TIMESTAMPNTZ, 259 TokenType.DATETIME, 260 TokenType.DATETIME64, 261 TokenType.DATE, 262 TokenType.DATE32, 263 TokenType.INT4RANGE, 264 TokenType.INT4MULTIRANGE, 265 TokenType.INT8RANGE, 266 TokenType.INT8MULTIRANGE, 267 TokenType.NUMRANGE, 268 TokenType.NUMMULTIRANGE, 269 TokenType.TSRANGE, 270 TokenType.TSMULTIRANGE, 271 TokenType.TSTZRANGE, 272 TokenType.TSTZMULTIRANGE, 273 TokenType.DATERANGE, 274 TokenType.DATEMULTIRANGE, 275 TokenType.DECIMAL, 276 TokenType.UDECIMAL, 277 TokenType.BIGDECIMAL, 278 TokenType.UUID, 279 TokenType.GEOGRAPHY, 280 TokenType.GEOMETRY, 281 TokenType.HLLSKETCH, 282 TokenType.HSTORE, 283 TokenType.PSEUDO_TYPE, 284 TokenType.SUPER, 285 TokenType.SERIAL, 286 TokenType.SMALLSERIAL, 287 TokenType.BIGSERIAL, 288 TokenType.XML, 289 TokenType.YEAR, 290 TokenType.UNIQUEIDENTIFIER, 291 TokenType.USERDEFINED, 292 TokenType.MONEY, 293 TokenType.SMALLMONEY, 294 TokenType.ROWVERSION, 295 TokenType.IMAGE, 296 TokenType.VARIANT, 297 TokenType.OBJECT, 298 TokenType.OBJECT_IDENTIFIER, 299 TokenType.INET, 300 TokenType.IPADDRESS, 301 TokenType.IPPREFIX, 302 TokenType.IPV4, 303 TokenType.IPV6, 304 TokenType.UNKNOWN, 305 TokenType.NULL, 306 TokenType.NAME, 307 TokenType.TDIGEST, 308 *ENUM_TYPE_TOKENS, 309 *NESTED_TYPE_TOKENS, 310 *AGGREGATE_TYPE_TOKENS, 311 } 312 313 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 314 TokenType.BIGINT: TokenType.UBIGINT, 315 TokenType.INT: TokenType.UINT, 316 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 317 TokenType.SMALLINT: TokenType.USMALLINT, 318 TokenType.TINYINT: TokenType.UTINYINT, 319 TokenType.DECIMAL: TokenType.UDECIMAL, 320 } 321 322 SUBQUERY_PREDICATES = { 323 TokenType.ANY: exp.Any, 324 TokenType.ALL: exp.All, 325 TokenType.EXISTS: exp.Exists, 326 TokenType.SOME: exp.Any, 327 } 328 329 RESERVED_TOKENS = { 330 *Tokenizer.SINGLE_TOKENS.values(), 331 TokenType.SELECT, 332 } - {TokenType.IDENTIFIER} 333 334 DB_CREATABLES = { 335 TokenType.DATABASE, 336 TokenType.DICTIONARY, 337 TokenType.MODEL, 338 TokenType.SCHEMA, 339 TokenType.SEQUENCE, 340 TokenType.STORAGE_INTEGRATION, 341 TokenType.TABLE, 342 TokenType.TAG, 343 TokenType.VIEW, 344 TokenType.WAREHOUSE, 345 TokenType.STREAMLIT, 346 } 347 348 CREATABLES = { 349 TokenType.COLUMN, 350 TokenType.CONSTRAINT, 351 TokenType.FOREIGN_KEY, 352 TokenType.FUNCTION, 353 TokenType.INDEX, 354 TokenType.PROCEDURE, 355 *DB_CREATABLES, 356 } 357 358 # Tokens that can represent identifiers 359 ID_VAR_TOKENS = { 360 TokenType.VAR, 361 TokenType.ANTI, 362 TokenType.APPLY, 363 TokenType.ASC, 364 TokenType.ASOF, 365 TokenType.AUTO_INCREMENT, 366 TokenType.BEGIN, 367 TokenType.BPCHAR, 368 TokenType.CACHE, 369 TokenType.CASE, 370 TokenType.COLLATE, 371 TokenType.COMMAND, 372 TokenType.COMMENT, 373 TokenType.COMMIT, 374 TokenType.CONSTRAINT, 375 TokenType.COPY, 376 TokenType.DEFAULT, 377 TokenType.DELETE, 378 TokenType.DESC, 379 TokenType.DESCRIBE, 380 TokenType.DICTIONARY, 381 TokenType.DIV, 382 TokenType.END, 383 TokenType.EXECUTE, 384 TokenType.ESCAPE, 385 TokenType.FALSE, 386 TokenType.FIRST, 387 TokenType.FILTER, 388 TokenType.FINAL, 389 TokenType.FORMAT, 390 TokenType.FULL, 391 TokenType.IDENTIFIER, 392 TokenType.IS, 393 TokenType.ISNULL, 394 TokenType.INTERVAL, 395 TokenType.KEEP, 396 TokenType.KILL, 397 TokenType.LEFT, 398 TokenType.LOAD, 399 TokenType.MERGE, 400 TokenType.NATURAL, 401 TokenType.NEXT, 402 TokenType.OFFSET, 403 TokenType.OPERATOR, 404 TokenType.ORDINALITY, 405 TokenType.OVERLAPS, 406 TokenType.OVERWRITE, 407 TokenType.PARTITION, 408 TokenType.PERCENT, 409 TokenType.PIVOT, 410 TokenType.PRAGMA, 411 TokenType.RANGE, 412 TokenType.RECURSIVE, 413 TokenType.REFERENCES, 414 TokenType.REFRESH, 415 TokenType.REPLACE, 416 TokenType.RIGHT, 417 TokenType.ROLLUP, 418 TokenType.ROW, 419 TokenType.ROWS, 420 TokenType.SEMI, 421 TokenType.SET, 422 TokenType.SETTINGS, 423 TokenType.SHOW, 424 TokenType.TEMPORARY, 425 TokenType.TOP, 426 TokenType.TRUE, 427 TokenType.TRUNCATE, 428 TokenType.UNIQUE, 429 TokenType.UNNEST, 430 TokenType.UNPIVOT, 431 TokenType.UPDATE, 432 TokenType.USE, 433 TokenType.VOLATILE, 434 TokenType.WINDOW, 435 *CREATABLES, 436 *SUBQUERY_PREDICATES, 437 *TYPE_TOKENS, 438 *NO_PAREN_FUNCTIONS, 439 } 440 441 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 442 443 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 444 TokenType.ANTI, 445 TokenType.APPLY, 446 TokenType.ASOF, 447 TokenType.FULL, 448 TokenType.LEFT, 449 TokenType.LOCK, 450 TokenType.NATURAL, 451 TokenType.OFFSET, 452 TokenType.RIGHT, 453 TokenType.SEMI, 454 TokenType.WINDOW, 455 } 456 457 ALIAS_TOKENS = ID_VAR_TOKENS 458 459 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 460 461 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 462 463 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 464 465 FUNC_TOKENS = { 466 TokenType.COLLATE, 467 TokenType.COMMAND, 468 TokenType.CURRENT_DATE, 469 TokenType.CURRENT_DATETIME, 470 TokenType.CURRENT_TIMESTAMP, 471 TokenType.CURRENT_TIME, 472 TokenType.CURRENT_USER, 473 TokenType.FILTER, 474 TokenType.FIRST, 475 TokenType.FORMAT, 476 TokenType.GLOB, 477 TokenType.IDENTIFIER, 478 TokenType.INDEX, 479 TokenType.ISNULL, 480 TokenType.ILIKE, 481 TokenType.INSERT, 482 TokenType.LIKE, 483 TokenType.MERGE, 484 TokenType.OFFSET, 485 TokenType.PRIMARY_KEY, 486 TokenType.RANGE, 487 TokenType.REPLACE, 488 TokenType.RLIKE, 489 TokenType.ROW, 490 TokenType.UNNEST, 491 TokenType.VAR, 492 TokenType.LEFT, 493 TokenType.RIGHT, 494 TokenType.SEQUENCE, 495 TokenType.DATE, 496 TokenType.DATETIME, 497 TokenType.TABLE, 498 TokenType.TIMESTAMP, 499 TokenType.TIMESTAMPTZ, 500 TokenType.TRUNCATE, 501 TokenType.WINDOW, 502 TokenType.XOR, 503 *TYPE_TOKENS, 504 *SUBQUERY_PREDICATES, 505 } 506 507 CONJUNCTION = { 508 TokenType.AND: exp.And, 509 TokenType.OR: exp.Or, 510 } 511 512 EQUALITY = { 513 TokenType.EQ: exp.EQ, 514 TokenType.NEQ: exp.NEQ, 515 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 516 } 517 518 COMPARISON = { 519 TokenType.GT: exp.GT, 520 TokenType.GTE: exp.GTE, 521 TokenType.LT: exp.LT, 522 TokenType.LTE: exp.LTE, 523 } 524 525 BITWISE = { 526 TokenType.AMP: exp.BitwiseAnd, 527 TokenType.CARET: exp.BitwiseXor, 528 TokenType.PIPE: exp.BitwiseOr, 529 } 530 531 TERM = { 532 TokenType.DASH: exp.Sub, 533 TokenType.PLUS: exp.Add, 534 TokenType.MOD: exp.Mod, 535 TokenType.COLLATE: exp.Collate, 536 } 537 538 FACTOR = { 539 TokenType.DIV: exp.IntDiv, 540 TokenType.LR_ARROW: exp.Distance, 541 TokenType.SLASH: exp.Div, 542 TokenType.STAR: exp.Mul, 543 } 544 545 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 546 547 TIMES = { 548 TokenType.TIME, 549 TokenType.TIMETZ, 550 } 551 552 TIMESTAMPS = { 553 TokenType.TIMESTAMP, 554 TokenType.TIMESTAMPTZ, 555 TokenType.TIMESTAMPLTZ, 556 *TIMES, 557 } 558 559 SET_OPERATIONS = { 560 TokenType.UNION, 561 TokenType.INTERSECT, 562 TokenType.EXCEPT, 563 } 564 565 JOIN_METHODS = { 566 TokenType.ASOF, 567 TokenType.NATURAL, 568 TokenType.POSITIONAL, 569 } 570 571 JOIN_SIDES = { 572 TokenType.LEFT, 573 TokenType.RIGHT, 574 TokenType.FULL, 575 } 576 577 JOIN_KINDS = { 578 TokenType.INNER, 579 TokenType.OUTER, 580 TokenType.CROSS, 581 TokenType.SEMI, 582 TokenType.ANTI, 583 } 584 585 JOIN_HINTS: t.Set[str] = set() 586 587 LAMBDAS = { 588 TokenType.ARROW: lambda self, expressions: self.expression( 589 exp.Lambda, 590 this=self._replace_lambda( 591 self._parse_conjunction(), 592 expressions, 593 ), 594 expressions=expressions, 595 ), 596 TokenType.FARROW: lambda self, expressions: self.expression( 597 exp.Kwarg, 598 this=exp.var(expressions[0].name), 599 expression=self._parse_conjunction(), 600 ), 601 } 602 603 COLUMN_OPERATORS = { 604 TokenType.DOT: None, 605 TokenType.DCOLON: lambda self, this, to: self.expression( 606 exp.Cast if self.STRICT_CAST else exp.TryCast, 607 this=this, 608 to=to, 609 ), 610 TokenType.ARROW: lambda self, this, path: self.expression( 611 exp.JSONExtract, 612 this=this, 613 expression=self.dialect.to_json_path(path), 614 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 615 ), 616 TokenType.DARROW: lambda self, this, path: self.expression( 617 exp.JSONExtractScalar, 618 this=this, 619 expression=self.dialect.to_json_path(path), 620 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 621 ), 622 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 623 exp.JSONBExtract, 624 this=this, 625 expression=path, 626 ), 627 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 628 exp.JSONBExtractScalar, 629 this=this, 630 expression=path, 631 ), 632 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 633 exp.JSONBContains, 634 this=this, 635 expression=key, 636 ), 637 } 638 639 EXPRESSION_PARSERS = { 640 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 641 exp.Column: lambda self: self._parse_column(), 642 exp.Condition: lambda self: self._parse_conjunction(), 643 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 644 exp.Expression: lambda self: self._parse_expression(), 645 exp.From: lambda self: self._parse_from(joins=True), 646 exp.Group: lambda self: self._parse_group(), 647 exp.Having: lambda self: self._parse_having(), 648 exp.Identifier: lambda self: self._parse_id_var(), 649 exp.Join: lambda self: self._parse_join(), 650 exp.Lambda: lambda self: self._parse_lambda(), 651 exp.Lateral: lambda self: self._parse_lateral(), 652 exp.Limit: lambda self: self._parse_limit(), 653 exp.Offset: lambda self: self._parse_offset(), 654 exp.Order: lambda self: self._parse_order(), 655 exp.Ordered: lambda self: self._parse_ordered(), 656 exp.Properties: lambda self: self._parse_properties(), 657 exp.Qualify: lambda self: self._parse_qualify(), 658 exp.Returning: lambda self: self._parse_returning(), 659 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 660 exp.Table: lambda self: self._parse_table_parts(), 661 exp.TableAlias: lambda self: self._parse_table_alias(), 662 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 663 exp.Where: lambda self: self._parse_where(), 664 exp.Window: lambda self: self._parse_named_window(), 665 exp.With: lambda self: self._parse_with(), 666 "JOIN_TYPE": lambda self: self._parse_join_parts(), 667 } 668 669 STATEMENT_PARSERS = { 670 TokenType.ALTER: lambda self: self._parse_alter(), 671 TokenType.BEGIN: lambda self: self._parse_transaction(), 672 TokenType.CACHE: lambda self: self._parse_cache(), 673 TokenType.COMMENT: lambda self: self._parse_comment(), 674 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 675 TokenType.COPY: lambda self: self._parse_copy(), 676 TokenType.CREATE: lambda self: self._parse_create(), 677 TokenType.DELETE: lambda self: self._parse_delete(), 678 TokenType.DESC: lambda self: self._parse_describe(), 679 TokenType.DESCRIBE: lambda self: self._parse_describe(), 680 TokenType.DROP: lambda self: self._parse_drop(), 681 TokenType.INSERT: lambda self: self._parse_insert(), 682 TokenType.KILL: lambda self: self._parse_kill(), 683 TokenType.LOAD: lambda self: self._parse_load(), 684 TokenType.MERGE: lambda self: self._parse_merge(), 685 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 686 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 687 TokenType.REFRESH: lambda self: self._parse_refresh(), 688 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 689 TokenType.SET: lambda self: self._parse_set(), 690 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 691 TokenType.UNCACHE: lambda self: self._parse_uncache(), 692 TokenType.UPDATE: lambda self: self._parse_update(), 693 TokenType.USE: lambda self: self.expression( 694 exp.Use, 695 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 696 this=self._parse_table(schema=False), 697 ), 698 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 699 } 700 701 UNARY_PARSERS = { 702 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 703 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 704 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 705 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 706 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 707 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 708 } 709 710 STRING_PARSERS = { 711 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 712 exp.RawString, this=token.text 713 ), 714 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 715 exp.National, this=token.text 716 ), 717 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 718 TokenType.STRING: lambda self, token: self.expression( 719 exp.Literal, this=token.text, is_string=True 720 ), 721 TokenType.UNICODE_STRING: lambda self, token: self.expression( 722 exp.UnicodeString, 723 this=token.text, 724 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 725 ), 726 } 727 728 NUMERIC_PARSERS = { 729 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 730 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 731 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 732 TokenType.NUMBER: lambda self, token: self.expression( 733 exp.Literal, this=token.text, is_string=False 734 ), 735 } 736 737 PRIMARY_PARSERS = { 738 **STRING_PARSERS, 739 **NUMERIC_PARSERS, 740 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 741 TokenType.NULL: lambda self, _: self.expression(exp.Null), 742 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 743 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 744 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 745 TokenType.STAR: lambda self, _: self.expression( 746 exp.Star, 747 **{ 748 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 749 "replace": self._parse_star_op("REPLACE"), 750 "rename": self._parse_star_op("RENAME"), 751 }, 752 ), 753 } 754 755 PLACEHOLDER_PARSERS = { 756 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 757 TokenType.PARAMETER: lambda self: self._parse_parameter(), 758 TokenType.COLON: lambda self: ( 759 self.expression(exp.Placeholder, this=self._prev.text) 760 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 761 else None 762 ), 763 } 764 765 RANGE_PARSERS = { 766 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 767 TokenType.GLOB: binary_range_parser(exp.Glob), 768 TokenType.ILIKE: binary_range_parser(exp.ILike), 769 TokenType.IN: lambda self, this: self._parse_in(this), 770 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 771 TokenType.IS: lambda self, this: self._parse_is(this), 772 TokenType.LIKE: binary_range_parser(exp.Like), 773 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 774 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 775 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 776 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 777 } 778 779 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 780 "ALLOWED_VALUES": lambda self: self.expression( 781 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 782 ), 783 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 784 "AUTO": lambda self: self._parse_auto_property(), 785 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 786 "BACKUP": lambda self: self.expression( 787 exp.BackupProperty, this=self._parse_var(any_token=True) 788 ), 789 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 790 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 791 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 792 "CHECKSUM": lambda self: self._parse_checksum(), 793 "CLUSTER BY": lambda self: self._parse_cluster(), 794 "CLUSTERED": lambda self: self._parse_clustered_by(), 795 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 796 exp.CollateProperty, **kwargs 797 ), 798 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 799 "CONTAINS": lambda self: self._parse_contains_property(), 800 "COPY": lambda self: self._parse_copy_property(), 801 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 802 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 803 "DEFINER": lambda self: self._parse_definer(), 804 "DETERMINISTIC": lambda self: self.expression( 805 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 806 ), 807 "DISTKEY": lambda self: self._parse_distkey(), 808 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 809 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 810 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 811 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 812 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 813 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 814 "FREESPACE": lambda self: self._parse_freespace(), 815 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 816 "HEAP": lambda self: self.expression(exp.HeapProperty), 817 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 818 "IMMUTABLE": lambda self: self.expression( 819 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 820 ), 821 "INHERITS": lambda self: self.expression( 822 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 823 ), 824 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 825 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 826 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 827 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 828 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 829 "LIKE": lambda self: self._parse_create_like(), 830 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 831 "LOCK": lambda self: self._parse_locking(), 832 "LOCKING": lambda self: self._parse_locking(), 833 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 834 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 835 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 836 "MODIFIES": lambda self: self._parse_modifies_property(), 837 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 838 "NO": lambda self: self._parse_no_property(), 839 "ON": lambda self: self._parse_on_property(), 840 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 841 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 842 "PARTITION": lambda self: self._parse_partitioned_of(), 843 "PARTITION BY": lambda self: self._parse_partitioned_by(), 844 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 845 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 846 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 847 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 848 "READS": lambda self: self._parse_reads_property(), 849 "REMOTE": lambda self: self._parse_remote_with_connection(), 850 "RETURNS": lambda self: self._parse_returns(), 851 "STRICT": lambda self: self.expression(exp.StrictProperty), 852 "ROW": lambda self: self._parse_row(), 853 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 854 "SAMPLE": lambda self: self.expression( 855 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 856 ), 857 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 858 "SETTINGS": lambda self: self.expression( 859 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 860 ), 861 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 862 "SORTKEY": lambda self: self._parse_sortkey(), 863 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 864 "STABLE": lambda self: self.expression( 865 exp.StabilityProperty, this=exp.Literal.string("STABLE") 866 ), 867 "STORED": lambda self: self._parse_stored(), 868 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 869 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 870 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 871 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 872 "TO": lambda self: self._parse_to_table(), 873 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 874 "TRANSFORM": lambda self: self.expression( 875 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 876 ), 877 "TTL": lambda self: self._parse_ttl(), 878 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 879 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 880 "VOLATILE": lambda self: self._parse_volatile_property(), 881 "WITH": lambda self: self._parse_with_property(), 882 } 883 884 CONSTRAINT_PARSERS = { 885 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 886 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 887 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 888 "CHARACTER SET": lambda self: self.expression( 889 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 890 ), 891 "CHECK": lambda self: self.expression( 892 exp.CheckColumnConstraint, 893 this=self._parse_wrapped(self._parse_conjunction), 894 enforced=self._match_text_seq("ENFORCED"), 895 ), 896 "COLLATE": lambda self: self.expression( 897 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 898 ), 899 "COMMENT": lambda self: self.expression( 900 exp.CommentColumnConstraint, this=self._parse_string() 901 ), 902 "COMPRESS": lambda self: self._parse_compress(), 903 "CLUSTERED": lambda self: self.expression( 904 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 905 ), 906 "NONCLUSTERED": lambda self: self.expression( 907 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 908 ), 909 "DEFAULT": lambda self: self.expression( 910 exp.DefaultColumnConstraint, this=self._parse_bitwise() 911 ), 912 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 913 "EPHEMERAL": lambda self: self.expression( 914 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 915 ), 916 "EXCLUDE": lambda self: self.expression( 917 exp.ExcludeColumnConstraint, this=self._parse_index_params() 918 ), 919 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 920 "FORMAT": lambda self: self.expression( 921 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 922 ), 923 "GENERATED": lambda self: self._parse_generated_as_identity(), 924 "IDENTITY": lambda self: self._parse_auto_increment(), 925 "INLINE": lambda self: self._parse_inline(), 926 "LIKE": lambda self: self._parse_create_like(), 927 "NOT": lambda self: self._parse_not_constraint(), 928 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 929 "ON": lambda self: ( 930 self._match(TokenType.UPDATE) 931 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 932 ) 933 or self.expression(exp.OnProperty, this=self._parse_id_var()), 934 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 935 "PERIOD": lambda self: self._parse_period_for_system_time(), 936 "PRIMARY KEY": lambda self: self._parse_primary_key(), 937 "REFERENCES": lambda self: self._parse_references(match=False), 938 "TITLE": lambda self: self.expression( 939 exp.TitleColumnConstraint, this=self._parse_var_or_string() 940 ), 941 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 942 "UNIQUE": lambda self: self._parse_unique(), 943 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 944 "WITH": lambda self: self.expression( 945 exp.Properties, expressions=self._parse_wrapped_properties() 946 ), 947 } 948 949 ALTER_PARSERS = { 950 "ADD": lambda self: self._parse_alter_table_add(), 951 "ALTER": lambda self: self._parse_alter_table_alter(), 952 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 953 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 954 "DROP": lambda self: self._parse_alter_table_drop(), 955 "RENAME": lambda self: self._parse_alter_table_rename(), 956 "SET": lambda self: self._parse_alter_table_set(), 957 } 958 959 ALTER_ALTER_PARSERS = { 960 "DISTKEY": lambda self: self._parse_alter_diststyle(), 961 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 962 "SORTKEY": lambda self: self._parse_alter_sortkey(), 963 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 964 } 965 966 SCHEMA_UNNAMED_CONSTRAINTS = { 967 "CHECK", 968 "EXCLUDE", 969 "FOREIGN KEY", 970 "LIKE", 971 "PERIOD", 972 "PRIMARY KEY", 973 "UNIQUE", 974 } 975 976 NO_PAREN_FUNCTION_PARSERS = { 977 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 978 "CASE": lambda self: self._parse_case(), 979 "IF": lambda self: self._parse_if(), 980 "NEXT": lambda self: self._parse_next_value_for(), 981 } 982 983 INVALID_FUNC_NAME_TOKENS = { 984 TokenType.IDENTIFIER, 985 TokenType.STRING, 986 } 987 988 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 989 990 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 991 992 FUNCTION_PARSERS = { 993 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 994 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 995 "DECODE": lambda self: self._parse_decode(), 996 "EXTRACT": lambda self: self._parse_extract(), 997 "JSON_OBJECT": lambda self: self._parse_json_object(), 998 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 999 "JSON_TABLE": lambda self: self._parse_json_table(), 1000 "MATCH": lambda self: self._parse_match_against(), 1001 "OPENJSON": lambda self: self._parse_open_json(), 1002 "POSITION": lambda self: self._parse_position(), 1003 "PREDICT": lambda self: self._parse_predict(), 1004 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1005 "STRING_AGG": lambda self: self._parse_string_agg(), 1006 "SUBSTRING": lambda self: self._parse_substring(), 1007 "TRIM": lambda self: self._parse_trim(), 1008 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1009 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1010 } 1011 1012 QUERY_MODIFIER_PARSERS = { 1013 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1014 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1015 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1016 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1017 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1018 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1019 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1020 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1021 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1022 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1023 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1024 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1025 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1026 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1027 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1028 TokenType.CLUSTER_BY: lambda self: ( 1029 "cluster", 1030 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1031 ), 1032 TokenType.DISTRIBUTE_BY: lambda self: ( 1033 "distribute", 1034 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1035 ), 1036 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1037 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1038 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1039 } 1040 1041 SET_PARSERS = { 1042 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1043 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1044 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1045 "TRANSACTION": lambda self: self._parse_set_transaction(), 1046 } 1047 1048 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1049 1050 TYPE_LITERAL_PARSERS = { 1051 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1052 } 1053 1054 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1055 1056 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1057 1058 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1059 1060 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1061 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1062 "ISOLATION": ( 1063 ("LEVEL", "REPEATABLE", "READ"), 1064 ("LEVEL", "READ", "COMMITTED"), 1065 ("LEVEL", "READ", "UNCOMITTED"), 1066 ("LEVEL", "SERIALIZABLE"), 1067 ), 1068 "READ": ("WRITE", "ONLY"), 1069 } 1070 1071 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1072 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1073 ) 1074 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1075 1076 CREATE_SEQUENCE: OPTIONS_TYPE = { 1077 "SCALE": ("EXTEND", "NOEXTEND"), 1078 "SHARD": ("EXTEND", "NOEXTEND"), 1079 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1080 **dict.fromkeys( 1081 ( 1082 "SESSION", 1083 "GLOBAL", 1084 "KEEP", 1085 "NOKEEP", 1086 "ORDER", 1087 "NOORDER", 1088 "NOCACHE", 1089 "CYCLE", 1090 "NOCYCLE", 1091 "NOMINVALUE", 1092 "NOMAXVALUE", 1093 "NOSCALE", 1094 "NOSHARD", 1095 ), 1096 tuple(), 1097 ), 1098 } 1099 1100 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1101 1102 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1103 1104 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1105 1106 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1107 1108 CLONE_KEYWORDS = {"CLONE", "COPY"} 1109 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1110 1111 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1112 1113 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1114 1115 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1116 1117 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1118 1119 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1120 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1121 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1122 1123 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1124 1125 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1126 1127 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1128 1129 DISTINCT_TOKENS = {TokenType.DISTINCT} 1130 1131 NULL_TOKENS = {TokenType.NULL} 1132 1133 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1134 1135 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1136 1137 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1138 1139 STRICT_CAST = True 1140 1141 PREFIXED_PIVOT_COLUMNS = False 1142 IDENTIFY_PIVOT_STRINGS = False 1143 1144 LOG_DEFAULTS_TO_LN = False 1145 1146 # Whether ADD is present for each column added by ALTER TABLE 1147 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1148 1149 # Whether the table sample clause expects CSV syntax 1150 TABLESAMPLE_CSV = False 1151 1152 # The default method used for table sampling 1153 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1154 1155 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1156 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1157 1158 # Whether the TRIM function expects the characters to trim as its first argument 1159 TRIM_PATTERN_FIRST = False 1160 1161 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1162 STRING_ALIASES = False 1163 1164 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1165 MODIFIERS_ATTACHED_TO_UNION = True 1166 UNION_MODIFIERS = {"order", "limit", "offset"} 1167 1168 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1169 NO_PAREN_IF_COMMANDS = True 1170 1171 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1172 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1173 1174 # Whether the `:` operator is used to extract a value from a JSON document 1175 COLON_IS_JSON_EXTRACT = False 1176 1177 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1178 # If this is True and '(' is not found, the keyword will be treated as an identifier 1179 VALUES_FOLLOWED_BY_PAREN = True 1180 1181 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1182 SUPPORTS_IMPLICIT_UNNEST = False 1183 1184 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1185 INTERVAL_SPANS = True 1186 1187 # Whether a PARTITION clause can follow a table reference 1188 SUPPORTS_PARTITION_SELECTION = False 1189 1190 __slots__ = ( 1191 "error_level", 1192 "error_message_context", 1193 "max_errors", 1194 "dialect", 1195 "sql", 1196 "errors", 1197 "_tokens", 1198 "_index", 1199 "_curr", 1200 "_next", 1201 "_prev", 1202 "_prev_comments", 1203 ) 1204 1205 # Autofilled 1206 SHOW_TRIE: t.Dict = {} 1207 SET_TRIE: t.Dict = {} 1208 1209 def __init__( 1210 self, 1211 error_level: t.Optional[ErrorLevel] = None, 1212 error_message_context: int = 100, 1213 max_errors: int = 3, 1214 dialect: DialectType = None, 1215 ): 1216 from sqlglot.dialects import Dialect 1217 1218 self.error_level = error_level or ErrorLevel.IMMEDIATE 1219 self.error_message_context = error_message_context 1220 self.max_errors = max_errors 1221 self.dialect = Dialect.get_or_raise(dialect) 1222 self.reset() 1223 1224 def reset(self): 1225 self.sql = "" 1226 self.errors = [] 1227 self._tokens = [] 1228 self._index = 0 1229 self._curr = None 1230 self._next = None 1231 self._prev = None 1232 self._prev_comments = None 1233 1234 def parse( 1235 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1236 ) -> t.List[t.Optional[exp.Expression]]: 1237 """ 1238 Parses a list of tokens and returns a list of syntax trees, one tree 1239 per parsed SQL statement. 1240 1241 Args: 1242 raw_tokens: The list of tokens. 1243 sql: The original SQL string, used to produce helpful debug messages. 1244 1245 Returns: 1246 The list of the produced syntax trees. 1247 """ 1248 return self._parse( 1249 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1250 ) 1251 1252 def parse_into( 1253 self, 1254 expression_types: exp.IntoType, 1255 raw_tokens: t.List[Token], 1256 sql: t.Optional[str] = None, 1257 ) -> t.List[t.Optional[exp.Expression]]: 1258 """ 1259 Parses a list of tokens into a given Expression type. If a collection of Expression 1260 types is given instead, this method will try to parse the token list into each one 1261 of them, stopping at the first for which the parsing succeeds. 1262 1263 Args: 1264 expression_types: The expression type(s) to try and parse the token list into. 1265 raw_tokens: The list of tokens. 1266 sql: The original SQL string, used to produce helpful debug messages. 1267 1268 Returns: 1269 The target Expression. 1270 """ 1271 errors = [] 1272 for expression_type in ensure_list(expression_types): 1273 parser = self.EXPRESSION_PARSERS.get(expression_type) 1274 if not parser: 1275 raise TypeError(f"No parser registered for {expression_type}") 1276 1277 try: 1278 return self._parse(parser, raw_tokens, sql) 1279 except ParseError as e: 1280 e.errors[0]["into_expression"] = expression_type 1281 errors.append(e) 1282 1283 raise ParseError( 1284 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1285 errors=merge_errors(errors), 1286 ) from errors[-1] 1287 1288 def _parse( 1289 self, 1290 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1291 raw_tokens: t.List[Token], 1292 sql: t.Optional[str] = None, 1293 ) -> t.List[t.Optional[exp.Expression]]: 1294 self.reset() 1295 self.sql = sql or "" 1296 1297 total = len(raw_tokens) 1298 chunks: t.List[t.List[Token]] = [[]] 1299 1300 for i, token in enumerate(raw_tokens): 1301 if token.token_type == TokenType.SEMICOLON: 1302 if token.comments: 1303 chunks.append([token]) 1304 1305 if i < total - 1: 1306 chunks.append([]) 1307 else: 1308 chunks[-1].append(token) 1309 1310 expressions = [] 1311 1312 for tokens in chunks: 1313 self._index = -1 1314 self._tokens = tokens 1315 self._advance() 1316 1317 expressions.append(parse_method(self)) 1318 1319 if self._index < len(self._tokens): 1320 self.raise_error("Invalid expression / Unexpected token") 1321 1322 self.check_errors() 1323 1324 return expressions 1325 1326 def check_errors(self) -> None: 1327 """Logs or raises any found errors, depending on the chosen error level setting.""" 1328 if self.error_level == ErrorLevel.WARN: 1329 for error in self.errors: 1330 logger.error(str(error)) 1331 elif self.error_level == ErrorLevel.RAISE and self.errors: 1332 raise ParseError( 1333 concat_messages(self.errors, self.max_errors), 1334 errors=merge_errors(self.errors), 1335 ) 1336 1337 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1338 """ 1339 Appends an error in the list of recorded errors or raises it, depending on the chosen 1340 error level setting. 1341 """ 1342 token = token or self._curr or self._prev or Token.string("") 1343 start = token.start 1344 end = token.end + 1 1345 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1346 highlight = self.sql[start:end] 1347 end_context = self.sql[end : end + self.error_message_context] 1348 1349 error = ParseError.new( 1350 f"{message}. Line {token.line}, Col: {token.col}.\n" 1351 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1352 description=message, 1353 line=token.line, 1354 col=token.col, 1355 start_context=start_context, 1356 highlight=highlight, 1357 end_context=end_context, 1358 ) 1359 1360 if self.error_level == ErrorLevel.IMMEDIATE: 1361 raise error 1362 1363 self.errors.append(error) 1364 1365 def expression( 1366 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1367 ) -> E: 1368 """ 1369 Creates a new, validated Expression. 1370 1371 Args: 1372 exp_class: The expression class to instantiate. 1373 comments: An optional list of comments to attach to the expression. 1374 kwargs: The arguments to set for the expression along with their respective values. 1375 1376 Returns: 1377 The target expression. 1378 """ 1379 instance = exp_class(**kwargs) 1380 instance.add_comments(comments) if comments else self._add_comments(instance) 1381 return self.validate_expression(instance) 1382 1383 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1384 if expression and self._prev_comments: 1385 expression.add_comments(self._prev_comments) 1386 self._prev_comments = None 1387 1388 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1389 """ 1390 Validates an Expression, making sure that all its mandatory arguments are set. 1391 1392 Args: 1393 expression: The expression to validate. 1394 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1395 1396 Returns: 1397 The validated expression. 1398 """ 1399 if self.error_level != ErrorLevel.IGNORE: 1400 for error_message in expression.error_messages(args): 1401 self.raise_error(error_message) 1402 1403 return expression 1404 1405 def _find_sql(self, start: Token, end: Token) -> str: 1406 return self.sql[start.start : end.end + 1] 1407 1408 def _is_connected(self) -> bool: 1409 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1410 1411 def _advance(self, times: int = 1) -> None: 1412 self._index += times 1413 self._curr = seq_get(self._tokens, self._index) 1414 self._next = seq_get(self._tokens, self._index + 1) 1415 1416 if self._index > 0: 1417 self._prev = self._tokens[self._index - 1] 1418 self._prev_comments = self._prev.comments 1419 else: 1420 self._prev = None 1421 self._prev_comments = None 1422 1423 def _retreat(self, index: int) -> None: 1424 if index != self._index: 1425 self._advance(index - self._index) 1426 1427 def _warn_unsupported(self) -> None: 1428 if len(self._tokens) <= 1: 1429 return 1430 1431 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1432 # interested in emitting a warning for the one being currently processed. 1433 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1434 1435 logger.warning( 1436 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1437 ) 1438 1439 def _parse_command(self) -> exp.Command: 1440 self._warn_unsupported() 1441 return self.expression( 1442 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1443 ) 1444 1445 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1446 """ 1447 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1448 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1449 the parser state accordingly 1450 """ 1451 index = self._index 1452 error_level = self.error_level 1453 1454 self.error_level = ErrorLevel.IMMEDIATE 1455 try: 1456 this = parse_method() 1457 except ParseError: 1458 this = None 1459 finally: 1460 if not this or retreat: 1461 self._retreat(index) 1462 self.error_level = error_level 1463 1464 return this 1465 1466 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1467 start = self._prev 1468 exists = self._parse_exists() if allow_exists else None 1469 1470 self._match(TokenType.ON) 1471 1472 materialized = self._match_text_seq("MATERIALIZED") 1473 kind = self._match_set(self.CREATABLES) and self._prev 1474 if not kind: 1475 return self._parse_as_command(start) 1476 1477 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1478 this = self._parse_user_defined_function(kind=kind.token_type) 1479 elif kind.token_type == TokenType.TABLE: 1480 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1481 elif kind.token_type == TokenType.COLUMN: 1482 this = self._parse_column() 1483 else: 1484 this = self._parse_id_var() 1485 1486 self._match(TokenType.IS) 1487 1488 return self.expression( 1489 exp.Comment, 1490 this=this, 1491 kind=kind.text, 1492 expression=self._parse_string(), 1493 exists=exists, 1494 materialized=materialized, 1495 ) 1496 1497 def _parse_to_table( 1498 self, 1499 ) -> exp.ToTableProperty: 1500 table = self._parse_table_parts(schema=True) 1501 return self.expression(exp.ToTableProperty, this=table) 1502 1503 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1504 def _parse_ttl(self) -> exp.Expression: 1505 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1506 this = self._parse_bitwise() 1507 1508 if self._match_text_seq("DELETE"): 1509 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1510 if self._match_text_seq("RECOMPRESS"): 1511 return self.expression( 1512 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1513 ) 1514 if self._match_text_seq("TO", "DISK"): 1515 return self.expression( 1516 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1517 ) 1518 if self._match_text_seq("TO", "VOLUME"): 1519 return self.expression( 1520 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1521 ) 1522 1523 return this 1524 1525 expressions = self._parse_csv(_parse_ttl_action) 1526 where = self._parse_where() 1527 group = self._parse_group() 1528 1529 aggregates = None 1530 if group and self._match(TokenType.SET): 1531 aggregates = self._parse_csv(self._parse_set_item) 1532 1533 return self.expression( 1534 exp.MergeTreeTTL, 1535 expressions=expressions, 1536 where=where, 1537 group=group, 1538 aggregates=aggregates, 1539 ) 1540 1541 def _parse_statement(self) -> t.Optional[exp.Expression]: 1542 if self._curr is None: 1543 return None 1544 1545 if self._match_set(self.STATEMENT_PARSERS): 1546 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1547 1548 if self._match_set(self.dialect.tokenizer.COMMANDS): 1549 return self._parse_command() 1550 1551 expression = self._parse_expression() 1552 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1553 return self._parse_query_modifiers(expression) 1554 1555 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1556 start = self._prev 1557 temporary = self._match(TokenType.TEMPORARY) 1558 materialized = self._match_text_seq("MATERIALIZED") 1559 1560 kind = self._match_set(self.CREATABLES) and self._prev.text 1561 if not kind: 1562 return self._parse_as_command(start) 1563 1564 if_exists = exists or self._parse_exists() 1565 table = self._parse_table_parts( 1566 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1567 ) 1568 1569 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1570 1571 if self._match(TokenType.L_PAREN, advance=False): 1572 expressions = self._parse_wrapped_csv(self._parse_types) 1573 else: 1574 expressions = None 1575 1576 return self.expression( 1577 exp.Drop, 1578 comments=start.comments, 1579 exists=if_exists, 1580 this=table, 1581 expressions=expressions, 1582 kind=kind.upper(), 1583 temporary=temporary, 1584 materialized=materialized, 1585 cascade=self._match_text_seq("CASCADE"), 1586 constraints=self._match_text_seq("CONSTRAINTS"), 1587 purge=self._match_text_seq("PURGE"), 1588 cluster=cluster, 1589 ) 1590 1591 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1592 return ( 1593 self._match_text_seq("IF") 1594 and (not not_ or self._match(TokenType.NOT)) 1595 and self._match(TokenType.EXISTS) 1596 ) 1597 1598 def _parse_create(self) -> exp.Create | exp.Command: 1599 # Note: this can't be None because we've matched a statement parser 1600 start = self._prev 1601 comments = self._prev_comments 1602 1603 replace = ( 1604 start.token_type == TokenType.REPLACE 1605 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1606 or self._match_pair(TokenType.OR, TokenType.ALTER) 1607 ) 1608 1609 unique = self._match(TokenType.UNIQUE) 1610 1611 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1612 self._advance() 1613 1614 properties = None 1615 create_token = self._match_set(self.CREATABLES) and self._prev 1616 1617 if not create_token: 1618 # exp.Properties.Location.POST_CREATE 1619 properties = self._parse_properties() 1620 create_token = self._match_set(self.CREATABLES) and self._prev 1621 1622 if not properties or not create_token: 1623 return self._parse_as_command(start) 1624 1625 exists = self._parse_exists(not_=True) 1626 this = None 1627 expression: t.Optional[exp.Expression] = None 1628 indexes = None 1629 no_schema_binding = None 1630 begin = None 1631 end = None 1632 clone = None 1633 1634 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1635 nonlocal properties 1636 if properties and temp_props: 1637 properties.expressions.extend(temp_props.expressions) 1638 elif temp_props: 1639 properties = temp_props 1640 1641 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1642 this = self._parse_user_defined_function(kind=create_token.token_type) 1643 1644 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1645 extend_props(self._parse_properties()) 1646 1647 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1648 extend_props(self._parse_properties()) 1649 1650 if not expression: 1651 if self._match(TokenType.COMMAND): 1652 expression = self._parse_as_command(self._prev) 1653 else: 1654 begin = self._match(TokenType.BEGIN) 1655 return_ = self._match_text_seq("RETURN") 1656 1657 if self._match(TokenType.STRING, advance=False): 1658 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1659 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1660 expression = self._parse_string() 1661 extend_props(self._parse_properties()) 1662 else: 1663 expression = self._parse_statement() 1664 1665 end = self._match_text_seq("END") 1666 1667 if return_: 1668 expression = self.expression(exp.Return, this=expression) 1669 elif create_token.token_type == TokenType.INDEX: 1670 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1671 if not self._match(TokenType.ON): 1672 index = self._parse_id_var() 1673 anonymous = False 1674 else: 1675 index = None 1676 anonymous = True 1677 1678 this = self._parse_index(index=index, anonymous=anonymous) 1679 elif create_token.token_type in self.DB_CREATABLES: 1680 table_parts = self._parse_table_parts( 1681 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1682 ) 1683 1684 # exp.Properties.Location.POST_NAME 1685 self._match(TokenType.COMMA) 1686 extend_props(self._parse_properties(before=True)) 1687 1688 this = self._parse_schema(this=table_parts) 1689 1690 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1691 extend_props(self._parse_properties()) 1692 1693 self._match(TokenType.ALIAS) 1694 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1695 # exp.Properties.Location.POST_ALIAS 1696 extend_props(self._parse_properties()) 1697 1698 if create_token.token_type == TokenType.SEQUENCE: 1699 expression = self._parse_types() 1700 extend_props(self._parse_properties()) 1701 else: 1702 expression = self._parse_ddl_select() 1703 1704 if create_token.token_type == TokenType.TABLE: 1705 # exp.Properties.Location.POST_EXPRESSION 1706 extend_props(self._parse_properties()) 1707 1708 indexes = [] 1709 while True: 1710 index = self._parse_index() 1711 1712 # exp.Properties.Location.POST_INDEX 1713 extend_props(self._parse_properties()) 1714 1715 if not index: 1716 break 1717 else: 1718 self._match(TokenType.COMMA) 1719 indexes.append(index) 1720 elif create_token.token_type == TokenType.VIEW: 1721 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1722 no_schema_binding = True 1723 1724 shallow = self._match_text_seq("SHALLOW") 1725 1726 if self._match_texts(self.CLONE_KEYWORDS): 1727 copy = self._prev.text.lower() == "copy" 1728 clone = self.expression( 1729 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1730 ) 1731 1732 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1733 return self._parse_as_command(start) 1734 1735 return self.expression( 1736 exp.Create, 1737 comments=comments, 1738 this=this, 1739 kind=create_token.text.upper(), 1740 replace=replace, 1741 unique=unique, 1742 expression=expression, 1743 exists=exists, 1744 properties=properties, 1745 indexes=indexes, 1746 no_schema_binding=no_schema_binding, 1747 begin=begin, 1748 end=end, 1749 clone=clone, 1750 ) 1751 1752 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1753 seq = exp.SequenceProperties() 1754 1755 options = [] 1756 index = self._index 1757 1758 while self._curr: 1759 self._match(TokenType.COMMA) 1760 if self._match_text_seq("INCREMENT"): 1761 self._match_text_seq("BY") 1762 self._match_text_seq("=") 1763 seq.set("increment", self._parse_term()) 1764 elif self._match_text_seq("MINVALUE"): 1765 seq.set("minvalue", self._parse_term()) 1766 elif self._match_text_seq("MAXVALUE"): 1767 seq.set("maxvalue", self._parse_term()) 1768 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1769 self._match_text_seq("=") 1770 seq.set("start", self._parse_term()) 1771 elif self._match_text_seq("CACHE"): 1772 # T-SQL allows empty CACHE which is initialized dynamically 1773 seq.set("cache", self._parse_number() or True) 1774 elif self._match_text_seq("OWNED", "BY"): 1775 # "OWNED BY NONE" is the default 1776 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1777 else: 1778 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1779 if opt: 1780 options.append(opt) 1781 else: 1782 break 1783 1784 seq.set("options", options if options else None) 1785 return None if self._index == index else seq 1786 1787 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1788 # only used for teradata currently 1789 self._match(TokenType.COMMA) 1790 1791 kwargs = { 1792 "no": self._match_text_seq("NO"), 1793 "dual": self._match_text_seq("DUAL"), 1794 "before": self._match_text_seq("BEFORE"), 1795 "default": self._match_text_seq("DEFAULT"), 1796 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1797 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1798 "after": self._match_text_seq("AFTER"), 1799 "minimum": self._match_texts(("MIN", "MINIMUM")), 1800 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1801 } 1802 1803 if self._match_texts(self.PROPERTY_PARSERS): 1804 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1805 try: 1806 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1807 except TypeError: 1808 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1809 1810 return None 1811 1812 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1813 return self._parse_wrapped_csv(self._parse_property) 1814 1815 def _parse_property(self) -> t.Optional[exp.Expression]: 1816 if self._match_texts(self.PROPERTY_PARSERS): 1817 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1818 1819 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1820 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1821 1822 if self._match_text_seq("COMPOUND", "SORTKEY"): 1823 return self._parse_sortkey(compound=True) 1824 1825 if self._match_text_seq("SQL", "SECURITY"): 1826 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1827 1828 index = self._index 1829 key = self._parse_column() 1830 1831 if not self._match(TokenType.EQ): 1832 self._retreat(index) 1833 return self._parse_sequence_properties() 1834 1835 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1836 if isinstance(key, exp.Column): 1837 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1838 1839 value = self._parse_bitwise() or self._parse_var(any_token=True) 1840 1841 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1842 if isinstance(value, exp.Column): 1843 value = exp.var(value.name) 1844 1845 return self.expression(exp.Property, this=key, value=value) 1846 1847 def _parse_stored(self) -> exp.FileFormatProperty: 1848 self._match(TokenType.ALIAS) 1849 1850 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1851 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1852 1853 return self.expression( 1854 exp.FileFormatProperty, 1855 this=( 1856 self.expression( 1857 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1858 ) 1859 if input_format or output_format 1860 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1861 ), 1862 ) 1863 1864 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1865 field = self._parse_field() 1866 if isinstance(field, exp.Identifier) and not field.quoted: 1867 field = exp.var(field) 1868 1869 return field 1870 1871 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1872 self._match(TokenType.EQ) 1873 self._match(TokenType.ALIAS) 1874 1875 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1876 1877 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1878 properties = [] 1879 while True: 1880 if before: 1881 prop = self._parse_property_before() 1882 else: 1883 prop = self._parse_property() 1884 if not prop: 1885 break 1886 for p in ensure_list(prop): 1887 properties.append(p) 1888 1889 if properties: 1890 return self.expression(exp.Properties, expressions=properties) 1891 1892 return None 1893 1894 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1895 return self.expression( 1896 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1897 ) 1898 1899 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1900 if self._index >= 2: 1901 pre_volatile_token = self._tokens[self._index - 2] 1902 else: 1903 pre_volatile_token = None 1904 1905 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1906 return exp.VolatileProperty() 1907 1908 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1909 1910 def _parse_retention_period(self) -> exp.Var: 1911 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1912 number = self._parse_number() 1913 number_str = f"{number} " if number else "" 1914 unit = self._parse_var(any_token=True) 1915 return exp.var(f"{number_str}{unit}") 1916 1917 def _parse_system_versioning_property( 1918 self, with_: bool = False 1919 ) -> exp.WithSystemVersioningProperty: 1920 self._match(TokenType.EQ) 1921 prop = self.expression( 1922 exp.WithSystemVersioningProperty, 1923 **{ # type: ignore 1924 "on": True, 1925 "with": with_, 1926 }, 1927 ) 1928 1929 if self._match_text_seq("OFF"): 1930 prop.set("on", False) 1931 return prop 1932 1933 self._match(TokenType.ON) 1934 if self._match(TokenType.L_PAREN): 1935 while self._curr and not self._match(TokenType.R_PAREN): 1936 if self._match_text_seq("HISTORY_TABLE", "="): 1937 prop.set("this", self._parse_table_parts()) 1938 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1939 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1940 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1941 prop.set("retention_period", self._parse_retention_period()) 1942 1943 self._match(TokenType.COMMA) 1944 1945 return prop 1946 1947 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1948 self._match(TokenType.EQ) 1949 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1950 prop = self.expression(exp.DataDeletionProperty, on=on) 1951 1952 if self._match(TokenType.L_PAREN): 1953 while self._curr and not self._match(TokenType.R_PAREN): 1954 if self._match_text_seq("FILTER_COLUMN", "="): 1955 prop.set("filter_column", self._parse_column()) 1956 elif self._match_text_seq("RETENTION_PERIOD", "="): 1957 prop.set("retention_period", self._parse_retention_period()) 1958 1959 self._match(TokenType.COMMA) 1960 1961 return prop 1962 1963 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1964 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1965 prop = self._parse_system_versioning_property(with_=True) 1966 self._match_r_paren() 1967 return prop 1968 1969 if self._match(TokenType.L_PAREN, advance=False): 1970 return self._parse_wrapped_properties() 1971 1972 if self._match_text_seq("JOURNAL"): 1973 return self._parse_withjournaltable() 1974 1975 if self._match_texts(self.VIEW_ATTRIBUTES): 1976 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1977 1978 if self._match_text_seq("DATA"): 1979 return self._parse_withdata(no=False) 1980 elif self._match_text_seq("NO", "DATA"): 1981 return self._parse_withdata(no=True) 1982 1983 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1984 return self._parse_serde_properties(with_=True) 1985 1986 if not self._next: 1987 return None 1988 1989 return self._parse_withisolatedloading() 1990 1991 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1992 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1993 self._match(TokenType.EQ) 1994 1995 user = self._parse_id_var() 1996 self._match(TokenType.PARAMETER) 1997 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1998 1999 if not user or not host: 2000 return None 2001 2002 return exp.DefinerProperty(this=f"{user}@{host}") 2003 2004 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2005 self._match(TokenType.TABLE) 2006 self._match(TokenType.EQ) 2007 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2008 2009 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2010 return self.expression(exp.LogProperty, no=no) 2011 2012 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2013 return self.expression(exp.JournalProperty, **kwargs) 2014 2015 def _parse_checksum(self) -> exp.ChecksumProperty: 2016 self._match(TokenType.EQ) 2017 2018 on = None 2019 if self._match(TokenType.ON): 2020 on = True 2021 elif self._match_text_seq("OFF"): 2022 on = False 2023 2024 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2025 2026 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2027 return self.expression( 2028 exp.Cluster, 2029 expressions=( 2030 self._parse_wrapped_csv(self._parse_ordered) 2031 if wrapped 2032 else self._parse_csv(self._parse_ordered) 2033 ), 2034 ) 2035 2036 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2037 self._match_text_seq("BY") 2038 2039 self._match_l_paren() 2040 expressions = self._parse_csv(self._parse_column) 2041 self._match_r_paren() 2042 2043 if self._match_text_seq("SORTED", "BY"): 2044 self._match_l_paren() 2045 sorted_by = self._parse_csv(self._parse_ordered) 2046 self._match_r_paren() 2047 else: 2048 sorted_by = None 2049 2050 self._match(TokenType.INTO) 2051 buckets = self._parse_number() 2052 self._match_text_seq("BUCKETS") 2053 2054 return self.expression( 2055 exp.ClusteredByProperty, 2056 expressions=expressions, 2057 sorted_by=sorted_by, 2058 buckets=buckets, 2059 ) 2060 2061 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2062 if not self._match_text_seq("GRANTS"): 2063 self._retreat(self._index - 1) 2064 return None 2065 2066 return self.expression(exp.CopyGrantsProperty) 2067 2068 def _parse_freespace(self) -> exp.FreespaceProperty: 2069 self._match(TokenType.EQ) 2070 return self.expression( 2071 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2072 ) 2073 2074 def _parse_mergeblockratio( 2075 self, no: bool = False, default: bool = False 2076 ) -> exp.MergeBlockRatioProperty: 2077 if self._match(TokenType.EQ): 2078 return self.expression( 2079 exp.MergeBlockRatioProperty, 2080 this=self._parse_number(), 2081 percent=self._match(TokenType.PERCENT), 2082 ) 2083 2084 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2085 2086 def _parse_datablocksize( 2087 self, 2088 default: t.Optional[bool] = None, 2089 minimum: t.Optional[bool] = None, 2090 maximum: t.Optional[bool] = None, 2091 ) -> exp.DataBlocksizeProperty: 2092 self._match(TokenType.EQ) 2093 size = self._parse_number() 2094 2095 units = None 2096 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2097 units = self._prev.text 2098 2099 return self.expression( 2100 exp.DataBlocksizeProperty, 2101 size=size, 2102 units=units, 2103 default=default, 2104 minimum=minimum, 2105 maximum=maximum, 2106 ) 2107 2108 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2109 self._match(TokenType.EQ) 2110 always = self._match_text_seq("ALWAYS") 2111 manual = self._match_text_seq("MANUAL") 2112 never = self._match_text_seq("NEVER") 2113 default = self._match_text_seq("DEFAULT") 2114 2115 autotemp = None 2116 if self._match_text_seq("AUTOTEMP"): 2117 autotemp = self._parse_schema() 2118 2119 return self.expression( 2120 exp.BlockCompressionProperty, 2121 always=always, 2122 manual=manual, 2123 never=never, 2124 default=default, 2125 autotemp=autotemp, 2126 ) 2127 2128 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2129 index = self._index 2130 no = self._match_text_seq("NO") 2131 concurrent = self._match_text_seq("CONCURRENT") 2132 2133 if not self._match_text_seq("ISOLATED", "LOADING"): 2134 self._retreat(index) 2135 return None 2136 2137 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2138 return self.expression( 2139 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2140 ) 2141 2142 def _parse_locking(self) -> exp.LockingProperty: 2143 if self._match(TokenType.TABLE): 2144 kind = "TABLE" 2145 elif self._match(TokenType.VIEW): 2146 kind = "VIEW" 2147 elif self._match(TokenType.ROW): 2148 kind = "ROW" 2149 elif self._match_text_seq("DATABASE"): 2150 kind = "DATABASE" 2151 else: 2152 kind = None 2153 2154 if kind in ("DATABASE", "TABLE", "VIEW"): 2155 this = self._parse_table_parts() 2156 else: 2157 this = None 2158 2159 if self._match(TokenType.FOR): 2160 for_or_in = "FOR" 2161 elif self._match(TokenType.IN): 2162 for_or_in = "IN" 2163 else: 2164 for_or_in = None 2165 2166 if self._match_text_seq("ACCESS"): 2167 lock_type = "ACCESS" 2168 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2169 lock_type = "EXCLUSIVE" 2170 elif self._match_text_seq("SHARE"): 2171 lock_type = "SHARE" 2172 elif self._match_text_seq("READ"): 2173 lock_type = "READ" 2174 elif self._match_text_seq("WRITE"): 2175 lock_type = "WRITE" 2176 elif self._match_text_seq("CHECKSUM"): 2177 lock_type = "CHECKSUM" 2178 else: 2179 lock_type = None 2180 2181 override = self._match_text_seq("OVERRIDE") 2182 2183 return self.expression( 2184 exp.LockingProperty, 2185 this=this, 2186 kind=kind, 2187 for_or_in=for_or_in, 2188 lock_type=lock_type, 2189 override=override, 2190 ) 2191 2192 def _parse_partition_by(self) -> t.List[exp.Expression]: 2193 if self._match(TokenType.PARTITION_BY): 2194 return self._parse_csv(self._parse_conjunction) 2195 return [] 2196 2197 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2198 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2199 if self._match_text_seq("MINVALUE"): 2200 return exp.var("MINVALUE") 2201 if self._match_text_seq("MAXVALUE"): 2202 return exp.var("MAXVALUE") 2203 return self._parse_bitwise() 2204 2205 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2206 expression = None 2207 from_expressions = None 2208 to_expressions = None 2209 2210 if self._match(TokenType.IN): 2211 this = self._parse_wrapped_csv(self._parse_bitwise) 2212 elif self._match(TokenType.FROM): 2213 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2214 self._match_text_seq("TO") 2215 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2216 elif self._match_text_seq("WITH", "(", "MODULUS"): 2217 this = self._parse_number() 2218 self._match_text_seq(",", "REMAINDER") 2219 expression = self._parse_number() 2220 self._match_r_paren() 2221 else: 2222 self.raise_error("Failed to parse partition bound spec.") 2223 2224 return self.expression( 2225 exp.PartitionBoundSpec, 2226 this=this, 2227 expression=expression, 2228 from_expressions=from_expressions, 2229 to_expressions=to_expressions, 2230 ) 2231 2232 # https://www.postgresql.org/docs/current/sql-createtable.html 2233 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2234 if not self._match_text_seq("OF"): 2235 self._retreat(self._index - 1) 2236 return None 2237 2238 this = self._parse_table(schema=True) 2239 2240 if self._match(TokenType.DEFAULT): 2241 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2242 elif self._match_text_seq("FOR", "VALUES"): 2243 expression = self._parse_partition_bound_spec() 2244 else: 2245 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2246 2247 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2248 2249 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2250 self._match(TokenType.EQ) 2251 return self.expression( 2252 exp.PartitionedByProperty, 2253 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2254 ) 2255 2256 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2257 if self._match_text_seq("AND", "STATISTICS"): 2258 statistics = True 2259 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2260 statistics = False 2261 else: 2262 statistics = None 2263 2264 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2265 2266 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2267 if self._match_text_seq("SQL"): 2268 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2269 return None 2270 2271 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2272 if self._match_text_seq("SQL", "DATA"): 2273 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2274 return None 2275 2276 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2277 if self._match_text_seq("PRIMARY", "INDEX"): 2278 return exp.NoPrimaryIndexProperty() 2279 if self._match_text_seq("SQL"): 2280 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2281 return None 2282 2283 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2284 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2285 return exp.OnCommitProperty() 2286 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2287 return exp.OnCommitProperty(delete=True) 2288 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2289 2290 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2291 if self._match_text_seq("SQL", "DATA"): 2292 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2293 return None 2294 2295 def _parse_distkey(self) -> exp.DistKeyProperty: 2296 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2297 2298 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2299 table = self._parse_table(schema=True) 2300 2301 options = [] 2302 while self._match_texts(("INCLUDING", "EXCLUDING")): 2303 this = self._prev.text.upper() 2304 2305 id_var = self._parse_id_var() 2306 if not id_var: 2307 return None 2308 2309 options.append( 2310 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2311 ) 2312 2313 return self.expression(exp.LikeProperty, this=table, expressions=options) 2314 2315 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2316 return self.expression( 2317 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2318 ) 2319 2320 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2321 self._match(TokenType.EQ) 2322 return self.expression( 2323 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2324 ) 2325 2326 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2327 self._match_text_seq("WITH", "CONNECTION") 2328 return self.expression( 2329 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2330 ) 2331 2332 def _parse_returns(self) -> exp.ReturnsProperty: 2333 value: t.Optional[exp.Expression] 2334 null = None 2335 is_table = self._match(TokenType.TABLE) 2336 2337 if is_table: 2338 if self._match(TokenType.LT): 2339 value = self.expression( 2340 exp.Schema, 2341 this="TABLE", 2342 expressions=self._parse_csv(self._parse_struct_types), 2343 ) 2344 if not self._match(TokenType.GT): 2345 self.raise_error("Expecting >") 2346 else: 2347 value = self._parse_schema(exp.var("TABLE")) 2348 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2349 null = True 2350 value = None 2351 else: 2352 value = self._parse_types() 2353 2354 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2355 2356 def _parse_describe(self) -> exp.Describe: 2357 kind = self._match_set(self.CREATABLES) and self._prev.text 2358 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2359 if self._match(TokenType.DOT): 2360 style = None 2361 self._retreat(self._index - 2) 2362 this = self._parse_table(schema=True) 2363 properties = self._parse_properties() 2364 expressions = properties.expressions if properties else None 2365 return self.expression( 2366 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2367 ) 2368 2369 def _parse_insert(self) -> exp.Insert: 2370 comments = ensure_list(self._prev_comments) 2371 hint = self._parse_hint() 2372 overwrite = self._match(TokenType.OVERWRITE) 2373 ignore = self._match(TokenType.IGNORE) 2374 local = self._match_text_seq("LOCAL") 2375 alternative = None 2376 is_function = None 2377 2378 if self._match_text_seq("DIRECTORY"): 2379 this: t.Optional[exp.Expression] = self.expression( 2380 exp.Directory, 2381 this=self._parse_var_or_string(), 2382 local=local, 2383 row_format=self._parse_row_format(match_row=True), 2384 ) 2385 else: 2386 if self._match(TokenType.OR): 2387 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2388 2389 self._match(TokenType.INTO) 2390 comments += ensure_list(self._prev_comments) 2391 self._match(TokenType.TABLE) 2392 is_function = self._match(TokenType.FUNCTION) 2393 2394 this = ( 2395 self._parse_table(schema=True, parse_partition=True) 2396 if not is_function 2397 else self._parse_function() 2398 ) 2399 2400 returning = self._parse_returning() 2401 2402 return self.expression( 2403 exp.Insert, 2404 comments=comments, 2405 hint=hint, 2406 is_function=is_function, 2407 this=this, 2408 stored=self._match_text_seq("STORED") and self._parse_stored(), 2409 by_name=self._match_text_seq("BY", "NAME"), 2410 exists=self._parse_exists(), 2411 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2412 and self._parse_conjunction(), 2413 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2414 conflict=self._parse_on_conflict(), 2415 returning=returning or self._parse_returning(), 2416 overwrite=overwrite, 2417 alternative=alternative, 2418 ignore=ignore, 2419 ) 2420 2421 def _parse_kill(self) -> exp.Kill: 2422 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2423 2424 return self.expression( 2425 exp.Kill, 2426 this=self._parse_primary(), 2427 kind=kind, 2428 ) 2429 2430 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2431 conflict = self._match_text_seq("ON", "CONFLICT") 2432 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2433 2434 if not conflict and not duplicate: 2435 return None 2436 2437 conflict_keys = None 2438 constraint = None 2439 2440 if conflict: 2441 if self._match_text_seq("ON", "CONSTRAINT"): 2442 constraint = self._parse_id_var() 2443 elif self._match(TokenType.L_PAREN): 2444 conflict_keys = self._parse_csv(self._parse_id_var) 2445 self._match_r_paren() 2446 2447 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2448 if self._prev.token_type == TokenType.UPDATE: 2449 self._match(TokenType.SET) 2450 expressions = self._parse_csv(self._parse_equality) 2451 else: 2452 expressions = None 2453 2454 return self.expression( 2455 exp.OnConflict, 2456 duplicate=duplicate, 2457 expressions=expressions, 2458 action=action, 2459 conflict_keys=conflict_keys, 2460 constraint=constraint, 2461 ) 2462 2463 def _parse_returning(self) -> t.Optional[exp.Returning]: 2464 if not self._match(TokenType.RETURNING): 2465 return None 2466 return self.expression( 2467 exp.Returning, 2468 expressions=self._parse_csv(self._parse_expression), 2469 into=self._match(TokenType.INTO) and self._parse_table_part(), 2470 ) 2471 2472 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2473 if not self._match(TokenType.FORMAT): 2474 return None 2475 return self._parse_row_format() 2476 2477 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2478 index = self._index 2479 with_ = with_ or self._match_text_seq("WITH") 2480 2481 if not self._match(TokenType.SERDE_PROPERTIES): 2482 self._retreat(index) 2483 return None 2484 return self.expression( 2485 exp.SerdeProperties, 2486 **{ # type: ignore 2487 "expressions": self._parse_wrapped_properties(), 2488 "with": with_, 2489 }, 2490 ) 2491 2492 def _parse_row_format( 2493 self, match_row: bool = False 2494 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2495 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2496 return None 2497 2498 if self._match_text_seq("SERDE"): 2499 this = self._parse_string() 2500 2501 serde_properties = self._parse_serde_properties() 2502 2503 return self.expression( 2504 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2505 ) 2506 2507 self._match_text_seq("DELIMITED") 2508 2509 kwargs = {} 2510 2511 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2512 kwargs["fields"] = self._parse_string() 2513 if self._match_text_seq("ESCAPED", "BY"): 2514 kwargs["escaped"] = self._parse_string() 2515 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2516 kwargs["collection_items"] = self._parse_string() 2517 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2518 kwargs["map_keys"] = self._parse_string() 2519 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2520 kwargs["lines"] = self._parse_string() 2521 if self._match_text_seq("NULL", "DEFINED", "AS"): 2522 kwargs["null"] = self._parse_string() 2523 2524 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2525 2526 def _parse_load(self) -> exp.LoadData | exp.Command: 2527 if self._match_text_seq("DATA"): 2528 local = self._match_text_seq("LOCAL") 2529 self._match_text_seq("INPATH") 2530 inpath = self._parse_string() 2531 overwrite = self._match(TokenType.OVERWRITE) 2532 self._match_pair(TokenType.INTO, TokenType.TABLE) 2533 2534 return self.expression( 2535 exp.LoadData, 2536 this=self._parse_table(schema=True), 2537 local=local, 2538 overwrite=overwrite, 2539 inpath=inpath, 2540 partition=self._parse_partition(), 2541 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2542 serde=self._match_text_seq("SERDE") and self._parse_string(), 2543 ) 2544 return self._parse_as_command(self._prev) 2545 2546 def _parse_delete(self) -> exp.Delete: 2547 # This handles MySQL's "Multiple-Table Syntax" 2548 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2549 tables = None 2550 comments = self._prev_comments 2551 if not self._match(TokenType.FROM, advance=False): 2552 tables = self._parse_csv(self._parse_table) or None 2553 2554 returning = self._parse_returning() 2555 2556 return self.expression( 2557 exp.Delete, 2558 comments=comments, 2559 tables=tables, 2560 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2561 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2562 where=self._parse_where(), 2563 returning=returning or self._parse_returning(), 2564 limit=self._parse_limit(), 2565 ) 2566 2567 def _parse_update(self) -> exp.Update: 2568 comments = self._prev_comments 2569 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2570 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2571 returning = self._parse_returning() 2572 return self.expression( 2573 exp.Update, 2574 comments=comments, 2575 **{ # type: ignore 2576 "this": this, 2577 "expressions": expressions, 2578 "from": self._parse_from(joins=True), 2579 "where": self._parse_where(), 2580 "returning": returning or self._parse_returning(), 2581 "order": self._parse_order(), 2582 "limit": self._parse_limit(), 2583 }, 2584 ) 2585 2586 def _parse_uncache(self) -> exp.Uncache: 2587 if not self._match(TokenType.TABLE): 2588 self.raise_error("Expecting TABLE after UNCACHE") 2589 2590 return self.expression( 2591 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2592 ) 2593 2594 def _parse_cache(self) -> exp.Cache: 2595 lazy = self._match_text_seq("LAZY") 2596 self._match(TokenType.TABLE) 2597 table = self._parse_table(schema=True) 2598 2599 options = [] 2600 if self._match_text_seq("OPTIONS"): 2601 self._match_l_paren() 2602 k = self._parse_string() 2603 self._match(TokenType.EQ) 2604 v = self._parse_string() 2605 options = [k, v] 2606 self._match_r_paren() 2607 2608 self._match(TokenType.ALIAS) 2609 return self.expression( 2610 exp.Cache, 2611 this=table, 2612 lazy=lazy, 2613 options=options, 2614 expression=self._parse_select(nested=True), 2615 ) 2616 2617 def _parse_partition(self) -> t.Optional[exp.Partition]: 2618 if not self._match(TokenType.PARTITION): 2619 return None 2620 2621 return self.expression( 2622 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2623 ) 2624 2625 def _parse_value(self) -> t.Optional[exp.Tuple]: 2626 if self._match(TokenType.L_PAREN): 2627 expressions = self._parse_csv(self._parse_expression) 2628 self._match_r_paren() 2629 return self.expression(exp.Tuple, expressions=expressions) 2630 2631 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2632 expression = self._parse_expression() 2633 if expression: 2634 return self.expression(exp.Tuple, expressions=[expression]) 2635 return None 2636 2637 def _parse_projections(self) -> t.List[exp.Expression]: 2638 return self._parse_expressions() 2639 2640 def _parse_select( 2641 self, 2642 nested: bool = False, 2643 table: bool = False, 2644 parse_subquery_alias: bool = True, 2645 parse_set_operation: bool = True, 2646 ) -> t.Optional[exp.Expression]: 2647 cte = self._parse_with() 2648 2649 if cte: 2650 this = self._parse_statement() 2651 2652 if not this: 2653 self.raise_error("Failed to parse any statement following CTE") 2654 return cte 2655 2656 if "with" in this.arg_types: 2657 this.set("with", cte) 2658 else: 2659 self.raise_error(f"{this.key} does not support CTE") 2660 this = cte 2661 2662 return this 2663 2664 # duckdb supports leading with FROM x 2665 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2666 2667 if self._match(TokenType.SELECT): 2668 comments = self._prev_comments 2669 2670 hint = self._parse_hint() 2671 all_ = self._match(TokenType.ALL) 2672 distinct = self._match_set(self.DISTINCT_TOKENS) 2673 2674 kind = ( 2675 self._match(TokenType.ALIAS) 2676 and self._match_texts(("STRUCT", "VALUE")) 2677 and self._prev.text.upper() 2678 ) 2679 2680 if distinct: 2681 distinct = self.expression( 2682 exp.Distinct, 2683 on=self._parse_value() if self._match(TokenType.ON) else None, 2684 ) 2685 2686 if all_ and distinct: 2687 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2688 2689 limit = self._parse_limit(top=True) 2690 projections = self._parse_projections() 2691 2692 this = self.expression( 2693 exp.Select, 2694 kind=kind, 2695 hint=hint, 2696 distinct=distinct, 2697 expressions=projections, 2698 limit=limit, 2699 ) 2700 this.comments = comments 2701 2702 into = self._parse_into() 2703 if into: 2704 this.set("into", into) 2705 2706 if not from_: 2707 from_ = self._parse_from() 2708 2709 if from_: 2710 this.set("from", from_) 2711 2712 this = self._parse_query_modifiers(this) 2713 elif (table or nested) and self._match(TokenType.L_PAREN): 2714 if self._match(TokenType.PIVOT): 2715 this = self._parse_simplified_pivot() 2716 elif self._match(TokenType.FROM): 2717 this = exp.select("*").from_( 2718 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2719 ) 2720 else: 2721 this = ( 2722 self._parse_table() 2723 if table 2724 else self._parse_select(nested=True, parse_set_operation=False) 2725 ) 2726 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2727 2728 self._match_r_paren() 2729 2730 # We return early here so that the UNION isn't attached to the subquery by the 2731 # following call to _parse_set_operations, but instead becomes the parent node 2732 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2733 elif self._match(TokenType.VALUES, advance=False): 2734 this = self._parse_derived_table_values() 2735 elif from_: 2736 this = exp.select("*").from_(from_.this, copy=False) 2737 else: 2738 this = None 2739 2740 if parse_set_operation: 2741 return self._parse_set_operations(this) 2742 return this 2743 2744 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2745 if not skip_with_token and not self._match(TokenType.WITH): 2746 return None 2747 2748 comments = self._prev_comments 2749 recursive = self._match(TokenType.RECURSIVE) 2750 2751 expressions = [] 2752 while True: 2753 expressions.append(self._parse_cte()) 2754 2755 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2756 break 2757 else: 2758 self._match(TokenType.WITH) 2759 2760 return self.expression( 2761 exp.With, comments=comments, expressions=expressions, recursive=recursive 2762 ) 2763 2764 def _parse_cte(self) -> exp.CTE: 2765 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2766 if not alias or not alias.this: 2767 self.raise_error("Expected CTE to have alias") 2768 2769 self._match(TokenType.ALIAS) 2770 2771 if self._match_text_seq("NOT", "MATERIALIZED"): 2772 materialized = False 2773 elif self._match_text_seq("MATERIALIZED"): 2774 materialized = True 2775 else: 2776 materialized = None 2777 2778 return self.expression( 2779 exp.CTE, 2780 this=self._parse_wrapped(self._parse_statement), 2781 alias=alias, 2782 materialized=materialized, 2783 ) 2784 2785 def _parse_table_alias( 2786 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2787 ) -> t.Optional[exp.TableAlias]: 2788 any_token = self._match(TokenType.ALIAS) 2789 alias = ( 2790 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2791 or self._parse_string_as_identifier() 2792 ) 2793 2794 index = self._index 2795 if self._match(TokenType.L_PAREN): 2796 columns = self._parse_csv(self._parse_function_parameter) 2797 self._match_r_paren() if columns else self._retreat(index) 2798 else: 2799 columns = None 2800 2801 if not alias and not columns: 2802 return None 2803 2804 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2805 2806 # We bubble up comments from the Identifier to the TableAlias 2807 if isinstance(alias, exp.Identifier): 2808 table_alias.add_comments(alias.pop_comments()) 2809 2810 return table_alias 2811 2812 def _parse_subquery( 2813 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2814 ) -> t.Optional[exp.Subquery]: 2815 if not this: 2816 return None 2817 2818 return self.expression( 2819 exp.Subquery, 2820 this=this, 2821 pivots=self._parse_pivots(), 2822 alias=self._parse_table_alias() if parse_alias else None, 2823 ) 2824 2825 def _implicit_unnests_to_explicit(self, this: E) -> E: 2826 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2827 2828 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2829 for i, join in enumerate(this.args.get("joins") or []): 2830 table = join.this 2831 normalized_table = table.copy() 2832 normalized_table.meta["maybe_column"] = True 2833 normalized_table = _norm(normalized_table, dialect=self.dialect) 2834 2835 if isinstance(table, exp.Table) and not join.args.get("on"): 2836 if normalized_table.parts[0].name in refs: 2837 table_as_column = table.to_column() 2838 unnest = exp.Unnest(expressions=[table_as_column]) 2839 2840 # Table.to_column creates a parent Alias node that we want to convert to 2841 # a TableAlias and attach to the Unnest, so it matches the parser's output 2842 if isinstance(table.args.get("alias"), exp.TableAlias): 2843 table_as_column.replace(table_as_column.this) 2844 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2845 2846 table.replace(unnest) 2847 2848 refs.add(normalized_table.alias_or_name) 2849 2850 return this 2851 2852 def _parse_query_modifiers( 2853 self, this: t.Optional[exp.Expression] 2854 ) -> t.Optional[exp.Expression]: 2855 if isinstance(this, (exp.Query, exp.Table)): 2856 for join in self._parse_joins(): 2857 this.append("joins", join) 2858 for lateral in iter(self._parse_lateral, None): 2859 this.append("laterals", lateral) 2860 2861 while True: 2862 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2863 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2864 key, expression = parser(self) 2865 2866 if expression: 2867 this.set(key, expression) 2868 if key == "limit": 2869 offset = expression.args.pop("offset", None) 2870 2871 if offset: 2872 offset = exp.Offset(expression=offset) 2873 this.set("offset", offset) 2874 2875 limit_by_expressions = expression.expressions 2876 expression.set("expressions", None) 2877 offset.set("expressions", limit_by_expressions) 2878 continue 2879 break 2880 2881 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2882 this = self._implicit_unnests_to_explicit(this) 2883 2884 return this 2885 2886 def _parse_hint(self) -> t.Optional[exp.Hint]: 2887 if self._match(TokenType.HINT): 2888 hints = [] 2889 for hint in iter( 2890 lambda: self._parse_csv( 2891 lambda: self._parse_function() or self._parse_var(upper=True) 2892 ), 2893 [], 2894 ): 2895 hints.extend(hint) 2896 2897 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2898 self.raise_error("Expected */ after HINT") 2899 2900 return self.expression(exp.Hint, expressions=hints) 2901 2902 return None 2903 2904 def _parse_into(self) -> t.Optional[exp.Into]: 2905 if not self._match(TokenType.INTO): 2906 return None 2907 2908 temp = self._match(TokenType.TEMPORARY) 2909 unlogged = self._match_text_seq("UNLOGGED") 2910 self._match(TokenType.TABLE) 2911 2912 return self.expression( 2913 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2914 ) 2915 2916 def _parse_from( 2917 self, joins: bool = False, skip_from_token: bool = False 2918 ) -> t.Optional[exp.From]: 2919 if not skip_from_token and not self._match(TokenType.FROM): 2920 return None 2921 2922 return self.expression( 2923 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2924 ) 2925 2926 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2927 return self.expression( 2928 exp.MatchRecognizeMeasure, 2929 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2930 this=self._parse_expression(), 2931 ) 2932 2933 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2934 if not self._match(TokenType.MATCH_RECOGNIZE): 2935 return None 2936 2937 self._match_l_paren() 2938 2939 partition = self._parse_partition_by() 2940 order = self._parse_order() 2941 2942 measures = ( 2943 self._parse_csv(self._parse_match_recognize_measure) 2944 if self._match_text_seq("MEASURES") 2945 else None 2946 ) 2947 2948 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2949 rows = exp.var("ONE ROW PER MATCH") 2950 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2951 text = "ALL ROWS PER MATCH" 2952 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2953 text += " SHOW EMPTY MATCHES" 2954 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2955 text += " OMIT EMPTY MATCHES" 2956 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2957 text += " WITH UNMATCHED ROWS" 2958 rows = exp.var(text) 2959 else: 2960 rows = None 2961 2962 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2963 text = "AFTER MATCH SKIP" 2964 if self._match_text_seq("PAST", "LAST", "ROW"): 2965 text += " PAST LAST ROW" 2966 elif self._match_text_seq("TO", "NEXT", "ROW"): 2967 text += " TO NEXT ROW" 2968 elif self._match_text_seq("TO", "FIRST"): 2969 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2970 elif self._match_text_seq("TO", "LAST"): 2971 text += f" TO LAST {self._advance_any().text}" # type: ignore 2972 after = exp.var(text) 2973 else: 2974 after = None 2975 2976 if self._match_text_seq("PATTERN"): 2977 self._match_l_paren() 2978 2979 if not self._curr: 2980 self.raise_error("Expecting )", self._curr) 2981 2982 paren = 1 2983 start = self._curr 2984 2985 while self._curr and paren > 0: 2986 if self._curr.token_type == TokenType.L_PAREN: 2987 paren += 1 2988 if self._curr.token_type == TokenType.R_PAREN: 2989 paren -= 1 2990 2991 end = self._prev 2992 self._advance() 2993 2994 if paren > 0: 2995 self.raise_error("Expecting )", self._curr) 2996 2997 pattern = exp.var(self._find_sql(start, end)) 2998 else: 2999 pattern = None 3000 3001 define = ( 3002 self._parse_csv(self._parse_name_as_expression) 3003 if self._match_text_seq("DEFINE") 3004 else None 3005 ) 3006 3007 self._match_r_paren() 3008 3009 return self.expression( 3010 exp.MatchRecognize, 3011 partition_by=partition, 3012 order=order, 3013 measures=measures, 3014 rows=rows, 3015 after=after, 3016 pattern=pattern, 3017 define=define, 3018 alias=self._parse_table_alias(), 3019 ) 3020 3021 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3022 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3023 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3024 cross_apply = False 3025 3026 if cross_apply is not None: 3027 this = self._parse_select(table=True) 3028 view = None 3029 outer = None 3030 elif self._match(TokenType.LATERAL): 3031 this = self._parse_select(table=True) 3032 view = self._match(TokenType.VIEW) 3033 outer = self._match(TokenType.OUTER) 3034 else: 3035 return None 3036 3037 if not this: 3038 this = ( 3039 self._parse_unnest() 3040 or self._parse_function() 3041 or self._parse_id_var(any_token=False) 3042 ) 3043 3044 while self._match(TokenType.DOT): 3045 this = exp.Dot( 3046 this=this, 3047 expression=self._parse_function() or self._parse_id_var(any_token=False), 3048 ) 3049 3050 if view: 3051 table = self._parse_id_var(any_token=False) 3052 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3053 table_alias: t.Optional[exp.TableAlias] = self.expression( 3054 exp.TableAlias, this=table, columns=columns 3055 ) 3056 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3057 # We move the alias from the lateral's child node to the lateral itself 3058 table_alias = this.args["alias"].pop() 3059 else: 3060 table_alias = self._parse_table_alias() 3061 3062 return self.expression( 3063 exp.Lateral, 3064 this=this, 3065 view=view, 3066 outer=outer, 3067 alias=table_alias, 3068 cross_apply=cross_apply, 3069 ) 3070 3071 def _parse_join_parts( 3072 self, 3073 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3074 return ( 3075 self._match_set(self.JOIN_METHODS) and self._prev, 3076 self._match_set(self.JOIN_SIDES) and self._prev, 3077 self._match_set(self.JOIN_KINDS) and self._prev, 3078 ) 3079 3080 def _parse_join( 3081 self, skip_join_token: bool = False, parse_bracket: bool = False 3082 ) -> t.Optional[exp.Join]: 3083 if self._match(TokenType.COMMA): 3084 return self.expression(exp.Join, this=self._parse_table()) 3085 3086 index = self._index 3087 method, side, kind = self._parse_join_parts() 3088 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3089 join = self._match(TokenType.JOIN) 3090 3091 if not skip_join_token and not join: 3092 self._retreat(index) 3093 kind = None 3094 method = None 3095 side = None 3096 3097 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3098 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3099 3100 if not skip_join_token and not join and not outer_apply and not cross_apply: 3101 return None 3102 3103 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3104 3105 if method: 3106 kwargs["method"] = method.text 3107 if side: 3108 kwargs["side"] = side.text 3109 if kind: 3110 kwargs["kind"] = kind.text 3111 if hint: 3112 kwargs["hint"] = hint 3113 3114 if self._match(TokenType.MATCH_CONDITION): 3115 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3116 3117 if self._match(TokenType.ON): 3118 kwargs["on"] = self._parse_conjunction() 3119 elif self._match(TokenType.USING): 3120 kwargs["using"] = self._parse_wrapped_id_vars() 3121 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3122 kind and kind.token_type == TokenType.CROSS 3123 ): 3124 index = self._index 3125 joins: t.Optional[list] = list(self._parse_joins()) 3126 3127 if joins and self._match(TokenType.ON): 3128 kwargs["on"] = self._parse_conjunction() 3129 elif joins and self._match(TokenType.USING): 3130 kwargs["using"] = self._parse_wrapped_id_vars() 3131 else: 3132 joins = None 3133 self._retreat(index) 3134 3135 kwargs["this"].set("joins", joins if joins else None) 3136 3137 comments = [c for token in (method, side, kind) if token for c in token.comments] 3138 return self.expression(exp.Join, comments=comments, **kwargs) 3139 3140 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3141 this = self._parse_conjunction() 3142 3143 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3144 return this 3145 3146 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3147 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3148 3149 return this 3150 3151 def _parse_index_params(self) -> exp.IndexParameters: 3152 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3153 3154 if self._match(TokenType.L_PAREN, advance=False): 3155 columns = self._parse_wrapped_csv(self._parse_with_operator) 3156 else: 3157 columns = None 3158 3159 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3160 partition_by = self._parse_partition_by() 3161 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3162 tablespace = ( 3163 self._parse_var(any_token=True) 3164 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3165 else None 3166 ) 3167 where = self._parse_where() 3168 3169 return self.expression( 3170 exp.IndexParameters, 3171 using=using, 3172 columns=columns, 3173 include=include, 3174 partition_by=partition_by, 3175 where=where, 3176 with_storage=with_storage, 3177 tablespace=tablespace, 3178 ) 3179 3180 def _parse_index( 3181 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3182 ) -> t.Optional[exp.Index]: 3183 if index or anonymous: 3184 unique = None 3185 primary = None 3186 amp = None 3187 3188 self._match(TokenType.ON) 3189 self._match(TokenType.TABLE) # hive 3190 table = self._parse_table_parts(schema=True) 3191 else: 3192 unique = self._match(TokenType.UNIQUE) 3193 primary = self._match_text_seq("PRIMARY") 3194 amp = self._match_text_seq("AMP") 3195 3196 if not self._match(TokenType.INDEX): 3197 return None 3198 3199 index = self._parse_id_var() 3200 table = None 3201 3202 params = self._parse_index_params() 3203 3204 return self.expression( 3205 exp.Index, 3206 this=index, 3207 table=table, 3208 unique=unique, 3209 primary=primary, 3210 amp=amp, 3211 params=params, 3212 ) 3213 3214 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3215 hints: t.List[exp.Expression] = [] 3216 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3217 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3218 hints.append( 3219 self.expression( 3220 exp.WithTableHint, 3221 expressions=self._parse_csv( 3222 lambda: self._parse_function() or self._parse_var(any_token=True) 3223 ), 3224 ) 3225 ) 3226 self._match_r_paren() 3227 else: 3228 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3229 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3230 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3231 3232 self._match_texts(("INDEX", "KEY")) 3233 if self._match(TokenType.FOR): 3234 hint.set("target", self._advance_any() and self._prev.text.upper()) 3235 3236 hint.set("expressions", self._parse_wrapped_id_vars()) 3237 hints.append(hint) 3238 3239 return hints or None 3240 3241 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3242 return ( 3243 (not schema and self._parse_function(optional_parens=False)) 3244 or self._parse_id_var(any_token=False) 3245 or self._parse_string_as_identifier() 3246 or self._parse_placeholder() 3247 ) 3248 3249 def _parse_table_parts( 3250 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3251 ) -> exp.Table: 3252 catalog = None 3253 db = None 3254 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3255 3256 while self._match(TokenType.DOT): 3257 if catalog: 3258 # This allows nesting the table in arbitrarily many dot expressions if needed 3259 table = self.expression( 3260 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3261 ) 3262 else: 3263 catalog = db 3264 db = table 3265 # "" used for tsql FROM a..b case 3266 table = self._parse_table_part(schema=schema) or "" 3267 3268 if ( 3269 wildcard 3270 and self._is_connected() 3271 and (isinstance(table, exp.Identifier) or not table) 3272 and self._match(TokenType.STAR) 3273 ): 3274 if isinstance(table, exp.Identifier): 3275 table.args["this"] += "*" 3276 else: 3277 table = exp.Identifier(this="*") 3278 3279 # We bubble up comments from the Identifier to the Table 3280 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3281 3282 if is_db_reference: 3283 catalog = db 3284 db = table 3285 table = None 3286 3287 if not table and not is_db_reference: 3288 self.raise_error(f"Expected table name but got {self._curr}") 3289 if not db and is_db_reference: 3290 self.raise_error(f"Expected database name but got {self._curr}") 3291 3292 return self.expression( 3293 exp.Table, 3294 comments=comments, 3295 this=table, 3296 db=db, 3297 catalog=catalog, 3298 pivots=self._parse_pivots(), 3299 ) 3300 3301 def _parse_table( 3302 self, 3303 schema: bool = False, 3304 joins: bool = False, 3305 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3306 parse_bracket: bool = False, 3307 is_db_reference: bool = False, 3308 parse_partition: bool = False, 3309 ) -> t.Optional[exp.Expression]: 3310 lateral = self._parse_lateral() 3311 if lateral: 3312 return lateral 3313 3314 unnest = self._parse_unnest() 3315 if unnest: 3316 return unnest 3317 3318 values = self._parse_derived_table_values() 3319 if values: 3320 return values 3321 3322 subquery = self._parse_select(table=True) 3323 if subquery: 3324 if not subquery.args.get("pivots"): 3325 subquery.set("pivots", self._parse_pivots()) 3326 return subquery 3327 3328 bracket = parse_bracket and self._parse_bracket(None) 3329 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3330 3331 only = self._match(TokenType.ONLY) 3332 3333 this = t.cast( 3334 exp.Expression, 3335 bracket 3336 or self._parse_bracket( 3337 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3338 ), 3339 ) 3340 3341 if only: 3342 this.set("only", only) 3343 3344 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3345 self._match_text_seq("*") 3346 3347 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3348 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3349 this.set("partition", self._parse_partition()) 3350 3351 if schema: 3352 return self._parse_schema(this=this) 3353 3354 version = self._parse_version() 3355 3356 if version: 3357 this.set("version", version) 3358 3359 if self.dialect.ALIAS_POST_TABLESAMPLE: 3360 table_sample = self._parse_table_sample() 3361 3362 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3363 if alias: 3364 this.set("alias", alias) 3365 3366 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3367 return self.expression( 3368 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3369 ) 3370 3371 this.set("hints", self._parse_table_hints()) 3372 3373 if not this.args.get("pivots"): 3374 this.set("pivots", self._parse_pivots()) 3375 3376 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3377 table_sample = self._parse_table_sample() 3378 3379 if table_sample: 3380 table_sample.set("this", this) 3381 this = table_sample 3382 3383 if joins: 3384 for join in self._parse_joins(): 3385 this.append("joins", join) 3386 3387 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3388 this.set("ordinality", True) 3389 this.set("alias", self._parse_table_alias()) 3390 3391 return this 3392 3393 def _parse_version(self) -> t.Optional[exp.Version]: 3394 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3395 this = "TIMESTAMP" 3396 elif self._match(TokenType.VERSION_SNAPSHOT): 3397 this = "VERSION" 3398 else: 3399 return None 3400 3401 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3402 kind = self._prev.text.upper() 3403 start = self._parse_bitwise() 3404 self._match_texts(("TO", "AND")) 3405 end = self._parse_bitwise() 3406 expression: t.Optional[exp.Expression] = self.expression( 3407 exp.Tuple, expressions=[start, end] 3408 ) 3409 elif self._match_text_seq("CONTAINED", "IN"): 3410 kind = "CONTAINED IN" 3411 expression = self.expression( 3412 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3413 ) 3414 elif self._match(TokenType.ALL): 3415 kind = "ALL" 3416 expression = None 3417 else: 3418 self._match_text_seq("AS", "OF") 3419 kind = "AS OF" 3420 expression = self._parse_type() 3421 3422 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3423 3424 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3425 if not self._match(TokenType.UNNEST): 3426 return None 3427 3428 expressions = self._parse_wrapped_csv(self._parse_equality) 3429 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3430 3431 alias = self._parse_table_alias() if with_alias else None 3432 3433 if alias: 3434 if self.dialect.UNNEST_COLUMN_ONLY: 3435 if alias.args.get("columns"): 3436 self.raise_error("Unexpected extra column alias in unnest.") 3437 3438 alias.set("columns", [alias.this]) 3439 alias.set("this", None) 3440 3441 columns = alias.args.get("columns") or [] 3442 if offset and len(expressions) < len(columns): 3443 offset = columns.pop() 3444 3445 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3446 self._match(TokenType.ALIAS) 3447 offset = self._parse_id_var( 3448 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3449 ) or exp.to_identifier("offset") 3450 3451 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3452 3453 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3454 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3455 if not is_derived and not self._match_text_seq("VALUES"): 3456 return None 3457 3458 expressions = self._parse_csv(self._parse_value) 3459 alias = self._parse_table_alias() 3460 3461 if is_derived: 3462 self._match_r_paren() 3463 3464 return self.expression( 3465 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3466 ) 3467 3468 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3469 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3470 as_modifier and self._match_text_seq("USING", "SAMPLE") 3471 ): 3472 return None 3473 3474 bucket_numerator = None 3475 bucket_denominator = None 3476 bucket_field = None 3477 percent = None 3478 size = None 3479 seed = None 3480 3481 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3482 matched_l_paren = self._match(TokenType.L_PAREN) 3483 3484 if self.TABLESAMPLE_CSV: 3485 num = None 3486 expressions = self._parse_csv(self._parse_primary) 3487 else: 3488 expressions = None 3489 num = ( 3490 self._parse_factor() 3491 if self._match(TokenType.NUMBER, advance=False) 3492 else self._parse_primary() or self._parse_placeholder() 3493 ) 3494 3495 if self._match_text_seq("BUCKET"): 3496 bucket_numerator = self._parse_number() 3497 self._match_text_seq("OUT", "OF") 3498 bucket_denominator = bucket_denominator = self._parse_number() 3499 self._match(TokenType.ON) 3500 bucket_field = self._parse_field() 3501 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3502 percent = num 3503 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3504 size = num 3505 else: 3506 percent = num 3507 3508 if matched_l_paren: 3509 self._match_r_paren() 3510 3511 if self._match(TokenType.L_PAREN): 3512 method = self._parse_var(upper=True) 3513 seed = self._match(TokenType.COMMA) and self._parse_number() 3514 self._match_r_paren() 3515 elif self._match_texts(("SEED", "REPEATABLE")): 3516 seed = self._parse_wrapped(self._parse_number) 3517 3518 if not method and self.DEFAULT_SAMPLING_METHOD: 3519 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3520 3521 return self.expression( 3522 exp.TableSample, 3523 expressions=expressions, 3524 method=method, 3525 bucket_numerator=bucket_numerator, 3526 bucket_denominator=bucket_denominator, 3527 bucket_field=bucket_field, 3528 percent=percent, 3529 size=size, 3530 seed=seed, 3531 ) 3532 3533 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3534 return list(iter(self._parse_pivot, None)) or None 3535 3536 def _parse_joins(self) -> t.Iterator[exp.Join]: 3537 return iter(self._parse_join, None) 3538 3539 # https://duckdb.org/docs/sql/statements/pivot 3540 def _parse_simplified_pivot(self) -> exp.Pivot: 3541 def _parse_on() -> t.Optional[exp.Expression]: 3542 this = self._parse_bitwise() 3543 return self._parse_in(this) if self._match(TokenType.IN) else this 3544 3545 this = self._parse_table() 3546 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3547 using = self._match(TokenType.USING) and self._parse_csv( 3548 lambda: self._parse_alias(self._parse_function()) 3549 ) 3550 group = self._parse_group() 3551 return self.expression( 3552 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3553 ) 3554 3555 def _parse_pivot_in(self) -> exp.In: 3556 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3557 this = self._parse_conjunction() 3558 3559 self._match(TokenType.ALIAS) 3560 alias = self._parse_field() 3561 if alias: 3562 return self.expression(exp.PivotAlias, this=this, alias=alias) 3563 3564 return this 3565 3566 value = self._parse_column() 3567 3568 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3569 self.raise_error("Expecting IN (") 3570 3571 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3572 3573 self._match_r_paren() 3574 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3575 3576 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3577 index = self._index 3578 include_nulls = None 3579 3580 if self._match(TokenType.PIVOT): 3581 unpivot = False 3582 elif self._match(TokenType.UNPIVOT): 3583 unpivot = True 3584 3585 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3586 if self._match_text_seq("INCLUDE", "NULLS"): 3587 include_nulls = True 3588 elif self._match_text_seq("EXCLUDE", "NULLS"): 3589 include_nulls = False 3590 else: 3591 return None 3592 3593 expressions = [] 3594 3595 if not self._match(TokenType.L_PAREN): 3596 self._retreat(index) 3597 return None 3598 3599 if unpivot: 3600 expressions = self._parse_csv(self._parse_column) 3601 else: 3602 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3603 3604 if not expressions: 3605 self.raise_error("Failed to parse PIVOT's aggregation list") 3606 3607 if not self._match(TokenType.FOR): 3608 self.raise_error("Expecting FOR") 3609 3610 field = self._parse_pivot_in() 3611 3612 self._match_r_paren() 3613 3614 pivot = self.expression( 3615 exp.Pivot, 3616 expressions=expressions, 3617 field=field, 3618 unpivot=unpivot, 3619 include_nulls=include_nulls, 3620 ) 3621 3622 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3623 pivot.set("alias", self._parse_table_alias()) 3624 3625 if not unpivot: 3626 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3627 3628 columns: t.List[exp.Expression] = [] 3629 for fld in pivot.args["field"].expressions: 3630 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3631 for name in names: 3632 if self.PREFIXED_PIVOT_COLUMNS: 3633 name = f"{name}_{field_name}" if name else field_name 3634 else: 3635 name = f"{field_name}_{name}" if name else field_name 3636 3637 columns.append(exp.to_identifier(name)) 3638 3639 pivot.set("columns", columns) 3640 3641 return pivot 3642 3643 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3644 return [agg.alias for agg in aggregations] 3645 3646 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3647 if not skip_where_token and not self._match(TokenType.PREWHERE): 3648 return None 3649 3650 return self.expression( 3651 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3652 ) 3653 3654 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3655 if not skip_where_token and not self._match(TokenType.WHERE): 3656 return None 3657 3658 return self.expression( 3659 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3660 ) 3661 3662 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3663 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3664 return None 3665 3666 elements: t.Dict[str, t.Any] = defaultdict(list) 3667 3668 if self._match(TokenType.ALL): 3669 elements["all"] = True 3670 elif self._match(TokenType.DISTINCT): 3671 elements["all"] = False 3672 3673 while True: 3674 expressions = self._parse_csv( 3675 lambda: None 3676 if self._match(TokenType.ROLLUP, advance=False) 3677 else self._parse_conjunction() 3678 ) 3679 if expressions: 3680 elements["expressions"].extend(expressions) 3681 3682 grouping_sets = self._parse_grouping_sets() 3683 if grouping_sets: 3684 elements["grouping_sets"].extend(grouping_sets) 3685 3686 rollup = None 3687 cube = None 3688 totals = None 3689 3690 index = self._index 3691 with_ = self._match(TokenType.WITH) 3692 if self._match(TokenType.ROLLUP): 3693 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3694 elements["rollup"].extend(ensure_list(rollup)) 3695 3696 if self._match(TokenType.CUBE): 3697 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3698 elements["cube"].extend(ensure_list(cube)) 3699 3700 if self._match_text_seq("TOTALS"): 3701 totals = True 3702 elements["totals"] = True # type: ignore 3703 3704 if not (grouping_sets or rollup or cube or totals): 3705 if with_: 3706 self._retreat(index) 3707 break 3708 3709 return self.expression(exp.Group, **elements) # type: ignore 3710 3711 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3712 if not self._match(TokenType.GROUPING_SETS): 3713 return None 3714 3715 return self._parse_wrapped_csv(self._parse_grouping_set) 3716 3717 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3718 if self._match(TokenType.L_PAREN): 3719 grouping_set = self._parse_csv(self._parse_column) 3720 self._match_r_paren() 3721 return self.expression(exp.Tuple, expressions=grouping_set) 3722 3723 return self._parse_column() 3724 3725 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3726 if not skip_having_token and not self._match(TokenType.HAVING): 3727 return None 3728 return self.expression(exp.Having, this=self._parse_conjunction()) 3729 3730 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3731 if not self._match(TokenType.QUALIFY): 3732 return None 3733 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3734 3735 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3736 if skip_start_token: 3737 start = None 3738 elif self._match(TokenType.START_WITH): 3739 start = self._parse_conjunction() 3740 else: 3741 return None 3742 3743 self._match(TokenType.CONNECT_BY) 3744 nocycle = self._match_text_seq("NOCYCLE") 3745 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3746 exp.Prior, this=self._parse_bitwise() 3747 ) 3748 connect = self._parse_conjunction() 3749 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3750 3751 if not start and self._match(TokenType.START_WITH): 3752 start = self._parse_conjunction() 3753 3754 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3755 3756 def _parse_name_as_expression(self) -> exp.Alias: 3757 return self.expression( 3758 exp.Alias, 3759 alias=self._parse_id_var(any_token=True), 3760 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3761 ) 3762 3763 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3764 if self._match_text_seq("INTERPOLATE"): 3765 return self._parse_wrapped_csv(self._parse_name_as_expression) 3766 return None 3767 3768 def _parse_order( 3769 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3770 ) -> t.Optional[exp.Expression]: 3771 siblings = None 3772 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3773 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3774 return this 3775 3776 siblings = True 3777 3778 return self.expression( 3779 exp.Order, 3780 this=this, 3781 expressions=self._parse_csv(self._parse_ordered), 3782 interpolate=self._parse_interpolate(), 3783 siblings=siblings, 3784 ) 3785 3786 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3787 if not self._match(token): 3788 return None 3789 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3790 3791 def _parse_ordered( 3792 self, parse_method: t.Optional[t.Callable] = None 3793 ) -> t.Optional[exp.Ordered]: 3794 this = parse_method() if parse_method else self._parse_conjunction() 3795 if not this: 3796 return None 3797 3798 asc = self._match(TokenType.ASC) 3799 desc = self._match(TokenType.DESC) or (asc and False) 3800 3801 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3802 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3803 3804 nulls_first = is_nulls_first or False 3805 explicitly_null_ordered = is_nulls_first or is_nulls_last 3806 3807 if ( 3808 not explicitly_null_ordered 3809 and ( 3810 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3811 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3812 ) 3813 and self.dialect.NULL_ORDERING != "nulls_are_last" 3814 ): 3815 nulls_first = True 3816 3817 if self._match_text_seq("WITH", "FILL"): 3818 with_fill = self.expression( 3819 exp.WithFill, 3820 **{ # type: ignore 3821 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3822 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3823 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3824 }, 3825 ) 3826 else: 3827 with_fill = None 3828 3829 return self.expression( 3830 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3831 ) 3832 3833 def _parse_limit( 3834 self, 3835 this: t.Optional[exp.Expression] = None, 3836 top: bool = False, 3837 skip_limit_token: bool = False, 3838 ) -> t.Optional[exp.Expression]: 3839 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3840 comments = self._prev_comments 3841 if top: 3842 limit_paren = self._match(TokenType.L_PAREN) 3843 expression = self._parse_term() if limit_paren else self._parse_number() 3844 3845 if limit_paren: 3846 self._match_r_paren() 3847 else: 3848 expression = self._parse_term() 3849 3850 if self._match(TokenType.COMMA): 3851 offset = expression 3852 expression = self._parse_term() 3853 else: 3854 offset = None 3855 3856 limit_exp = self.expression( 3857 exp.Limit, 3858 this=this, 3859 expression=expression, 3860 offset=offset, 3861 comments=comments, 3862 expressions=self._parse_limit_by(), 3863 ) 3864 3865 return limit_exp 3866 3867 if self._match(TokenType.FETCH): 3868 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3869 direction = self._prev.text.upper() if direction else "FIRST" 3870 3871 count = self._parse_field(tokens=self.FETCH_TOKENS) 3872 percent = self._match(TokenType.PERCENT) 3873 3874 self._match_set((TokenType.ROW, TokenType.ROWS)) 3875 3876 only = self._match_text_seq("ONLY") 3877 with_ties = self._match_text_seq("WITH", "TIES") 3878 3879 if only and with_ties: 3880 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3881 3882 return self.expression( 3883 exp.Fetch, 3884 direction=direction, 3885 count=count, 3886 percent=percent, 3887 with_ties=with_ties, 3888 ) 3889 3890 return this 3891 3892 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3893 if not self._match(TokenType.OFFSET): 3894 return this 3895 3896 count = self._parse_term() 3897 self._match_set((TokenType.ROW, TokenType.ROWS)) 3898 3899 return self.expression( 3900 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3901 ) 3902 3903 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3904 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3905 3906 def _parse_locks(self) -> t.List[exp.Lock]: 3907 locks = [] 3908 while True: 3909 if self._match_text_seq("FOR", "UPDATE"): 3910 update = True 3911 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3912 "LOCK", "IN", "SHARE", "MODE" 3913 ): 3914 update = False 3915 else: 3916 break 3917 3918 expressions = None 3919 if self._match_text_seq("OF"): 3920 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3921 3922 wait: t.Optional[bool | exp.Expression] = None 3923 if self._match_text_seq("NOWAIT"): 3924 wait = True 3925 elif self._match_text_seq("WAIT"): 3926 wait = self._parse_primary() 3927 elif self._match_text_seq("SKIP", "LOCKED"): 3928 wait = False 3929 3930 locks.append( 3931 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3932 ) 3933 3934 return locks 3935 3936 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3937 while this and self._match_set(self.SET_OPERATIONS): 3938 token_type = self._prev.token_type 3939 3940 if token_type == TokenType.UNION: 3941 operation = exp.Union 3942 elif token_type == TokenType.EXCEPT: 3943 operation = exp.Except 3944 else: 3945 operation = exp.Intersect 3946 3947 comments = self._prev.comments 3948 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3949 by_name = self._match_text_seq("BY", "NAME") 3950 expression = self._parse_select(nested=True, parse_set_operation=False) 3951 3952 this = self.expression( 3953 operation, 3954 comments=comments, 3955 this=this, 3956 distinct=distinct, 3957 by_name=by_name, 3958 expression=expression, 3959 ) 3960 3961 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3962 expression = this.expression 3963 3964 if expression: 3965 for arg in self.UNION_MODIFIERS: 3966 expr = expression.args.get(arg) 3967 if expr: 3968 this.set(arg, expr.pop()) 3969 3970 return this 3971 3972 def _parse_expression(self) -> t.Optional[exp.Expression]: 3973 return self._parse_alias(self._parse_conjunction()) 3974 3975 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3976 this = self._parse_equality() 3977 3978 if self._match(TokenType.COLON_EQ): 3979 this = self.expression( 3980 exp.PropertyEQ, 3981 this=this, 3982 comments=self._prev_comments, 3983 expression=self._parse_conjunction(), 3984 ) 3985 3986 while self._match_set(self.CONJUNCTION): 3987 this = self.expression( 3988 self.CONJUNCTION[self._prev.token_type], 3989 this=this, 3990 comments=self._prev_comments, 3991 expression=self._parse_equality(), 3992 ) 3993 return this 3994 3995 def _parse_equality(self) -> t.Optional[exp.Expression]: 3996 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3997 3998 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3999 return self._parse_tokens(self._parse_range, self.COMPARISON) 4000 4001 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4002 this = this or self._parse_bitwise() 4003 negate = self._match(TokenType.NOT) 4004 4005 if self._match_set(self.RANGE_PARSERS): 4006 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4007 if not expression: 4008 return this 4009 4010 this = expression 4011 elif self._match(TokenType.ISNULL): 4012 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4013 4014 # Postgres supports ISNULL and NOTNULL for conditions. 4015 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4016 if self._match(TokenType.NOTNULL): 4017 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4018 this = self.expression(exp.Not, this=this) 4019 4020 if negate: 4021 this = self.expression(exp.Not, this=this) 4022 4023 if self._match(TokenType.IS): 4024 this = self._parse_is(this) 4025 4026 return this 4027 4028 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4029 index = self._index - 1 4030 negate = self._match(TokenType.NOT) 4031 4032 if self._match_text_seq("DISTINCT", "FROM"): 4033 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4034 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4035 4036 expression = self._parse_null() or self._parse_boolean() 4037 if not expression: 4038 self._retreat(index) 4039 return None 4040 4041 this = self.expression(exp.Is, this=this, expression=expression) 4042 return self.expression(exp.Not, this=this) if negate else this 4043 4044 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4045 unnest = self._parse_unnest(with_alias=False) 4046 if unnest: 4047 this = self.expression(exp.In, this=this, unnest=unnest) 4048 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4049 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4050 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4051 4052 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4053 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4054 else: 4055 this = self.expression(exp.In, this=this, expressions=expressions) 4056 4057 if matched_l_paren: 4058 self._match_r_paren(this) 4059 elif not self._match(TokenType.R_BRACKET, expression=this): 4060 self.raise_error("Expecting ]") 4061 else: 4062 this = self.expression(exp.In, this=this, field=self._parse_field()) 4063 4064 return this 4065 4066 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4067 low = self._parse_bitwise() 4068 self._match(TokenType.AND) 4069 high = self._parse_bitwise() 4070 return self.expression(exp.Between, this=this, low=low, high=high) 4071 4072 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4073 if not self._match(TokenType.ESCAPE): 4074 return this 4075 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4076 4077 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4078 index = self._index 4079 4080 if not self._match(TokenType.INTERVAL) and match_interval: 4081 return None 4082 4083 if self._match(TokenType.STRING, advance=False): 4084 this = self._parse_primary() 4085 else: 4086 this = self._parse_term() 4087 4088 if not this or ( 4089 isinstance(this, exp.Column) 4090 and not this.table 4091 and not this.this.quoted 4092 and this.name.upper() == "IS" 4093 ): 4094 self._retreat(index) 4095 return None 4096 4097 unit = self._parse_function() or ( 4098 not self._match(TokenType.ALIAS, advance=False) 4099 and self._parse_var(any_token=True, upper=True) 4100 ) 4101 4102 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4103 # each INTERVAL expression into this canonical form so it's easy to transpile 4104 if this and this.is_number: 4105 this = exp.Literal.string(this.name) 4106 elif this and this.is_string: 4107 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4108 if len(parts) == 1: 4109 if unit: 4110 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4111 self._retreat(self._index - 1) 4112 4113 this = exp.Literal.string(parts[0][0]) 4114 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4115 4116 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4117 unit = self.expression( 4118 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4119 ) 4120 4121 interval = self.expression(exp.Interval, this=this, unit=unit) 4122 4123 index = self._index 4124 self._match(TokenType.PLUS) 4125 4126 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4127 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4128 return self.expression( 4129 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4130 ) 4131 4132 self._retreat(index) 4133 return interval 4134 4135 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4136 this = self._parse_term() 4137 4138 while True: 4139 if self._match_set(self.BITWISE): 4140 this = self.expression( 4141 self.BITWISE[self._prev.token_type], 4142 this=this, 4143 expression=self._parse_term(), 4144 ) 4145 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4146 this = self.expression( 4147 exp.DPipe, 4148 this=this, 4149 expression=self._parse_term(), 4150 safe=not self.dialect.STRICT_STRING_CONCAT, 4151 ) 4152 elif self._match(TokenType.DQMARK): 4153 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4154 elif self._match_pair(TokenType.LT, TokenType.LT): 4155 this = self.expression( 4156 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4157 ) 4158 elif self._match_pair(TokenType.GT, TokenType.GT): 4159 this = self.expression( 4160 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4161 ) 4162 else: 4163 break 4164 4165 return this 4166 4167 def _parse_term(self) -> t.Optional[exp.Expression]: 4168 return self._parse_tokens(self._parse_factor, self.TERM) 4169 4170 def _parse_factor(self) -> t.Optional[exp.Expression]: 4171 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4172 this = parse_method() 4173 4174 while self._match_set(self.FACTOR): 4175 klass = self.FACTOR[self._prev.token_type] 4176 comments = self._prev_comments 4177 expression = parse_method() 4178 4179 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4180 self._retreat(self._index - 1) 4181 return this 4182 4183 this = self.expression(klass, this=this, comments=comments, expression=expression) 4184 4185 if isinstance(this, exp.Div): 4186 this.args["typed"] = self.dialect.TYPED_DIVISION 4187 this.args["safe"] = self.dialect.SAFE_DIVISION 4188 4189 return this 4190 4191 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4192 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4193 4194 def _parse_unary(self) -> t.Optional[exp.Expression]: 4195 if self._match_set(self.UNARY_PARSERS): 4196 return self.UNARY_PARSERS[self._prev.token_type](self) 4197 return self._parse_at_time_zone(self._parse_type()) 4198 4199 def _parse_type( 4200 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4201 ) -> t.Optional[exp.Expression]: 4202 interval = parse_interval and self._parse_interval() 4203 if interval: 4204 return interval 4205 4206 index = self._index 4207 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4208 4209 if data_type: 4210 index2 = self._index 4211 this = self._parse_primary() 4212 4213 if isinstance(this, exp.Literal): 4214 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4215 if parser: 4216 return parser(self, this, data_type) 4217 4218 return self.expression(exp.Cast, this=this, to=data_type) 4219 4220 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4221 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4222 # 4223 # If the index difference here is greater than 1, that means the parser itself must have 4224 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4225 # 4226 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4227 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4228 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4229 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4230 # 4231 # In these cases, we don't really want to return the converted type, but instead retreat 4232 # and try to parse a Column or Identifier in the section below. 4233 if data_type.expressions and index2 - index > 1: 4234 self._retreat(index2) 4235 return self._parse_column_ops(data_type) 4236 4237 self._retreat(index) 4238 4239 if fallback_to_identifier: 4240 return self._parse_id_var() 4241 4242 this = self._parse_column() 4243 return this and self._parse_column_ops(this) 4244 4245 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4246 this = self._parse_type() 4247 if not this: 4248 return None 4249 4250 if isinstance(this, exp.Column) and not this.table: 4251 this = exp.var(this.name.upper()) 4252 4253 return self.expression( 4254 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4255 ) 4256 4257 def _parse_types( 4258 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4259 ) -> t.Optional[exp.Expression]: 4260 index = self._index 4261 4262 this: t.Optional[exp.Expression] = None 4263 prefix = self._match_text_seq("SYSUDTLIB", ".") 4264 4265 if not self._match_set(self.TYPE_TOKENS): 4266 identifier = allow_identifiers and self._parse_id_var( 4267 any_token=False, tokens=(TokenType.VAR,) 4268 ) 4269 if identifier: 4270 tokens = self.dialect.tokenize(identifier.name) 4271 4272 if len(tokens) != 1: 4273 self.raise_error("Unexpected identifier", self._prev) 4274 4275 if tokens[0].token_type in self.TYPE_TOKENS: 4276 self._prev = tokens[0] 4277 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4278 type_name = identifier.name 4279 4280 while self._match(TokenType.DOT): 4281 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4282 4283 this = exp.DataType.build(type_name, udt=True) 4284 else: 4285 self._retreat(self._index - 1) 4286 return None 4287 else: 4288 return None 4289 4290 type_token = self._prev.token_type 4291 4292 if type_token == TokenType.PSEUDO_TYPE: 4293 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4294 4295 if type_token == TokenType.OBJECT_IDENTIFIER: 4296 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4297 4298 nested = type_token in self.NESTED_TYPE_TOKENS 4299 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4300 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4301 expressions = None 4302 maybe_func = False 4303 4304 if self._match(TokenType.L_PAREN): 4305 if is_struct: 4306 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4307 elif nested: 4308 expressions = self._parse_csv( 4309 lambda: self._parse_types( 4310 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4311 ) 4312 ) 4313 elif type_token in self.ENUM_TYPE_TOKENS: 4314 expressions = self._parse_csv(self._parse_equality) 4315 elif is_aggregate: 4316 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4317 any_token=False, tokens=(TokenType.VAR,) 4318 ) 4319 if not func_or_ident or not self._match(TokenType.COMMA): 4320 return None 4321 expressions = self._parse_csv( 4322 lambda: self._parse_types( 4323 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4324 ) 4325 ) 4326 expressions.insert(0, func_or_ident) 4327 else: 4328 expressions = self._parse_csv(self._parse_type_size) 4329 4330 if not expressions or not self._match(TokenType.R_PAREN): 4331 self._retreat(index) 4332 return None 4333 4334 maybe_func = True 4335 4336 values: t.Optional[t.List[exp.Expression]] = None 4337 4338 if nested and self._match(TokenType.LT): 4339 if is_struct: 4340 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4341 else: 4342 expressions = self._parse_csv( 4343 lambda: self._parse_types( 4344 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4345 ) 4346 ) 4347 4348 if not self._match(TokenType.GT): 4349 self.raise_error("Expecting >") 4350 4351 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4352 values = self._parse_csv(self._parse_conjunction) 4353 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4354 4355 if type_token in self.TIMESTAMPS: 4356 if self._match_text_seq("WITH", "TIME", "ZONE"): 4357 maybe_func = False 4358 tz_type = ( 4359 exp.DataType.Type.TIMETZ 4360 if type_token in self.TIMES 4361 else exp.DataType.Type.TIMESTAMPTZ 4362 ) 4363 this = exp.DataType(this=tz_type, expressions=expressions) 4364 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4365 maybe_func = False 4366 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4367 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4368 maybe_func = False 4369 elif type_token == TokenType.INTERVAL: 4370 unit = self._parse_var(upper=True) 4371 if unit: 4372 if self._match_text_seq("TO"): 4373 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4374 4375 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4376 else: 4377 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4378 4379 if maybe_func and check_func: 4380 index2 = self._index 4381 peek = self._parse_string() 4382 4383 if not peek: 4384 self._retreat(index) 4385 return None 4386 4387 self._retreat(index2) 4388 4389 if not this: 4390 if self._match_text_seq("UNSIGNED"): 4391 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4392 if not unsigned_type_token: 4393 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4394 4395 type_token = unsigned_type_token or type_token 4396 4397 this = exp.DataType( 4398 this=exp.DataType.Type[type_token.value], 4399 expressions=expressions, 4400 nested=nested, 4401 values=values, 4402 prefix=prefix, 4403 ) 4404 elif expressions: 4405 this.set("expressions", expressions) 4406 4407 index = self._index 4408 4409 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4410 matched_array = self._match(TokenType.ARRAY) 4411 4412 while self._curr: 4413 matched_l_bracket = self._match(TokenType.L_BRACKET) 4414 if not matched_l_bracket and not matched_array: 4415 break 4416 4417 matched_array = False 4418 values = self._parse_csv(self._parse_conjunction) or None 4419 if values and not schema: 4420 self._retreat(index) 4421 break 4422 4423 this = exp.DataType( 4424 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4425 ) 4426 self._match(TokenType.R_BRACKET) 4427 4428 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4429 converter = self.TYPE_CONVERTER.get(this.this) 4430 if converter: 4431 this = converter(t.cast(exp.DataType, this)) 4432 4433 return this 4434 4435 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4436 index = self._index 4437 this = ( 4438 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4439 or self._parse_id_var() 4440 ) 4441 self._match(TokenType.COLON) 4442 4443 if ( 4444 type_required 4445 and not isinstance(this, exp.DataType) 4446 and not self._match_set(self.TYPE_TOKENS, advance=False) 4447 ): 4448 self._retreat(index) 4449 return self._parse_types() 4450 4451 return self._parse_column_def(this) 4452 4453 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4454 if not self._match_text_seq("AT", "TIME", "ZONE"): 4455 return this 4456 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4457 4458 def _parse_column(self) -> t.Optional[exp.Expression]: 4459 this = self._parse_column_reference() 4460 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4461 4462 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4463 this = self._parse_field() 4464 if ( 4465 not this 4466 and self._match(TokenType.VALUES, advance=False) 4467 and self.VALUES_FOLLOWED_BY_PAREN 4468 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4469 ): 4470 this = self._parse_id_var() 4471 4472 if isinstance(this, exp.Identifier): 4473 # We bubble up comments from the Identifier to the Column 4474 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4475 4476 return this 4477 4478 def _parse_colon_as_json_extract( 4479 self, this: t.Optional[exp.Expression] 4480 ) -> t.Optional[exp.Expression]: 4481 casts = [] 4482 json_path = [] 4483 4484 while self._match(TokenType.COLON): 4485 start_index = self._index 4486 path = self._parse_column_ops(self._parse_field(any_token=True)) 4487 4488 # The cast :: operator has a lower precedence than the extraction operator :, so 4489 # we rearrange the AST appropriately to avoid casting the JSON path 4490 while isinstance(path, exp.Cast): 4491 casts.append(path.to) 4492 path = path.this 4493 4494 if casts: 4495 dcolon_offset = next( 4496 i 4497 for i, t in enumerate(self._tokens[start_index:]) 4498 if t.token_type == TokenType.DCOLON 4499 ) 4500 end_token = self._tokens[start_index + dcolon_offset - 1] 4501 else: 4502 end_token = self._prev 4503 4504 if path: 4505 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4506 4507 if json_path: 4508 this = self.expression( 4509 exp.JSONExtract, 4510 this=this, 4511 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4512 ) 4513 4514 while casts: 4515 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4516 4517 return this 4518 4519 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4520 this = self._parse_bracket(this) 4521 4522 while self._match_set(self.COLUMN_OPERATORS): 4523 op_token = self._prev.token_type 4524 op = self.COLUMN_OPERATORS.get(op_token) 4525 4526 if op_token == TokenType.DCOLON: 4527 field = self._parse_types() 4528 if not field: 4529 self.raise_error("Expected type") 4530 elif op and self._curr: 4531 field = self._parse_column_reference() 4532 else: 4533 field = self._parse_field(any_token=True, anonymous_func=True) 4534 4535 if isinstance(field, exp.Func) and this: 4536 # bigquery allows function calls like x.y.count(...) 4537 # SAFE.SUBSTR(...) 4538 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4539 this = exp.replace_tree( 4540 this, 4541 lambda n: ( 4542 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4543 if n.table 4544 else n.this 4545 ) 4546 if isinstance(n, exp.Column) 4547 else n, 4548 ) 4549 4550 if op: 4551 this = op(self, this, field) 4552 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4553 this = self.expression( 4554 exp.Column, 4555 this=field, 4556 table=this.this, 4557 db=this.args.get("table"), 4558 catalog=this.args.get("db"), 4559 ) 4560 else: 4561 this = self.expression(exp.Dot, this=this, expression=field) 4562 4563 this = self._parse_bracket(this) 4564 4565 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4566 4567 def _parse_primary(self) -> t.Optional[exp.Expression]: 4568 if self._match_set(self.PRIMARY_PARSERS): 4569 token_type = self._prev.token_type 4570 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4571 4572 if token_type == TokenType.STRING: 4573 expressions = [primary] 4574 while self._match(TokenType.STRING): 4575 expressions.append(exp.Literal.string(self._prev.text)) 4576 4577 if len(expressions) > 1: 4578 return self.expression(exp.Concat, expressions=expressions) 4579 4580 return primary 4581 4582 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4583 return exp.Literal.number(f"0.{self._prev.text}") 4584 4585 if self._match(TokenType.L_PAREN): 4586 comments = self._prev_comments 4587 query = self._parse_select() 4588 4589 if query: 4590 expressions = [query] 4591 else: 4592 expressions = self._parse_expressions() 4593 4594 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4595 4596 if not this and self._match(TokenType.R_PAREN, advance=False): 4597 this = self.expression(exp.Tuple) 4598 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4599 this = self._parse_subquery(this=this, parse_alias=False) 4600 elif isinstance(this, exp.Subquery): 4601 this = self._parse_subquery( 4602 this=self._parse_set_operations(this), parse_alias=False 4603 ) 4604 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4605 this = self.expression(exp.Tuple, expressions=expressions) 4606 else: 4607 this = self.expression(exp.Paren, this=this) 4608 4609 if this: 4610 this.add_comments(comments) 4611 4612 self._match_r_paren(expression=this) 4613 return this 4614 4615 return None 4616 4617 def _parse_field( 4618 self, 4619 any_token: bool = False, 4620 tokens: t.Optional[t.Collection[TokenType]] = None, 4621 anonymous_func: bool = False, 4622 ) -> t.Optional[exp.Expression]: 4623 if anonymous_func: 4624 field = ( 4625 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4626 or self._parse_primary() 4627 ) 4628 else: 4629 field = self._parse_primary() or self._parse_function( 4630 anonymous=anonymous_func, any_token=any_token 4631 ) 4632 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4633 4634 def _parse_function( 4635 self, 4636 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4637 anonymous: bool = False, 4638 optional_parens: bool = True, 4639 any_token: bool = False, 4640 ) -> t.Optional[exp.Expression]: 4641 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4642 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4643 fn_syntax = False 4644 if ( 4645 self._match(TokenType.L_BRACE, advance=False) 4646 and self._next 4647 and self._next.text.upper() == "FN" 4648 ): 4649 self._advance(2) 4650 fn_syntax = True 4651 4652 func = self._parse_function_call( 4653 functions=functions, 4654 anonymous=anonymous, 4655 optional_parens=optional_parens, 4656 any_token=any_token, 4657 ) 4658 4659 if fn_syntax: 4660 self._match(TokenType.R_BRACE) 4661 4662 return func 4663 4664 def _parse_function_call( 4665 self, 4666 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4667 anonymous: bool = False, 4668 optional_parens: bool = True, 4669 any_token: bool = False, 4670 ) -> t.Optional[exp.Expression]: 4671 if not self._curr: 4672 return None 4673 4674 comments = self._curr.comments 4675 token_type = self._curr.token_type 4676 this = self._curr.text 4677 upper = this.upper() 4678 4679 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4680 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4681 self._advance() 4682 return self._parse_window(parser(self)) 4683 4684 if not self._next or self._next.token_type != TokenType.L_PAREN: 4685 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4686 self._advance() 4687 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4688 4689 return None 4690 4691 if any_token: 4692 if token_type in self.RESERVED_TOKENS: 4693 return None 4694 elif token_type not in self.FUNC_TOKENS: 4695 return None 4696 4697 self._advance(2) 4698 4699 parser = self.FUNCTION_PARSERS.get(upper) 4700 if parser and not anonymous: 4701 this = parser(self) 4702 else: 4703 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4704 4705 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4706 this = self.expression(subquery_predicate, this=self._parse_select()) 4707 self._match_r_paren() 4708 return this 4709 4710 if functions is None: 4711 functions = self.FUNCTIONS 4712 4713 function = functions.get(upper) 4714 4715 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4716 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4717 4718 if alias: 4719 args = self._kv_to_prop_eq(args) 4720 4721 if function and not anonymous: 4722 if "dialect" in function.__code__.co_varnames: 4723 func = function(args, dialect=self.dialect) 4724 else: 4725 func = function(args) 4726 4727 func = self.validate_expression(func, args) 4728 if not self.dialect.NORMALIZE_FUNCTIONS: 4729 func.meta["name"] = this 4730 4731 this = func 4732 else: 4733 if token_type == TokenType.IDENTIFIER: 4734 this = exp.Identifier(this=this, quoted=True) 4735 this = self.expression(exp.Anonymous, this=this, expressions=args) 4736 4737 if isinstance(this, exp.Expression): 4738 this.add_comments(comments) 4739 4740 self._match_r_paren(this) 4741 return self._parse_window(this) 4742 4743 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4744 transformed = [] 4745 4746 for e in expressions: 4747 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4748 if isinstance(e, exp.Alias): 4749 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4750 4751 if not isinstance(e, exp.PropertyEQ): 4752 e = self.expression( 4753 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4754 ) 4755 4756 if isinstance(e.this, exp.Column): 4757 e.this.replace(e.this.this) 4758 4759 transformed.append(e) 4760 4761 return transformed 4762 4763 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4764 return self._parse_column_def(self._parse_id_var()) 4765 4766 def _parse_user_defined_function( 4767 self, kind: t.Optional[TokenType] = None 4768 ) -> t.Optional[exp.Expression]: 4769 this = self._parse_id_var() 4770 4771 while self._match(TokenType.DOT): 4772 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4773 4774 if not self._match(TokenType.L_PAREN): 4775 return this 4776 4777 expressions = self._parse_csv(self._parse_function_parameter) 4778 self._match_r_paren() 4779 return self.expression( 4780 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4781 ) 4782 4783 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4784 literal = self._parse_primary() 4785 if literal: 4786 return self.expression(exp.Introducer, this=token.text, expression=literal) 4787 4788 return self.expression(exp.Identifier, this=token.text) 4789 4790 def _parse_session_parameter(self) -> exp.SessionParameter: 4791 kind = None 4792 this = self._parse_id_var() or self._parse_primary() 4793 4794 if this and self._match(TokenType.DOT): 4795 kind = this.name 4796 this = self._parse_var() or self._parse_primary() 4797 4798 return self.expression(exp.SessionParameter, this=this, kind=kind) 4799 4800 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4801 return self._parse_id_var() 4802 4803 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4804 index = self._index 4805 4806 if self._match(TokenType.L_PAREN): 4807 expressions = t.cast( 4808 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4809 ) 4810 4811 if not self._match(TokenType.R_PAREN): 4812 self._retreat(index) 4813 else: 4814 expressions = [self._parse_lambda_arg()] 4815 4816 if self._match_set(self.LAMBDAS): 4817 return self.LAMBDAS[self._prev.token_type](self, expressions) 4818 4819 self._retreat(index) 4820 4821 this: t.Optional[exp.Expression] 4822 4823 if self._match(TokenType.DISTINCT): 4824 this = self.expression( 4825 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4826 ) 4827 else: 4828 this = self._parse_select_or_expression(alias=alias) 4829 4830 return self._parse_limit( 4831 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4832 ) 4833 4834 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4835 index = self._index 4836 if not self._match(TokenType.L_PAREN): 4837 return this 4838 4839 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4840 # expr can be of both types 4841 if self._match_set(self.SELECT_START_TOKENS): 4842 self._retreat(index) 4843 return this 4844 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4845 self._match_r_paren() 4846 return self.expression(exp.Schema, this=this, expressions=args) 4847 4848 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4849 return self._parse_column_def(self._parse_field(any_token=True)) 4850 4851 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4852 # column defs are not really columns, they're identifiers 4853 if isinstance(this, exp.Column): 4854 this = this.this 4855 4856 kind = self._parse_types(schema=True) 4857 4858 if self._match_text_seq("FOR", "ORDINALITY"): 4859 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4860 4861 constraints: t.List[exp.Expression] = [] 4862 4863 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4864 ("ALIAS", "MATERIALIZED") 4865 ): 4866 persisted = self._prev.text.upper() == "MATERIALIZED" 4867 constraints.append( 4868 self.expression( 4869 exp.ComputedColumnConstraint, 4870 this=self._parse_conjunction(), 4871 persisted=persisted or self._match_text_seq("PERSISTED"), 4872 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4873 ) 4874 ) 4875 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4876 self._match(TokenType.ALIAS) 4877 constraints.append( 4878 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4879 ) 4880 4881 while True: 4882 constraint = self._parse_column_constraint() 4883 if not constraint: 4884 break 4885 constraints.append(constraint) 4886 4887 if not kind and not constraints: 4888 return this 4889 4890 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4891 4892 def _parse_auto_increment( 4893 self, 4894 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4895 start = None 4896 increment = None 4897 4898 if self._match(TokenType.L_PAREN, advance=False): 4899 args = self._parse_wrapped_csv(self._parse_bitwise) 4900 start = seq_get(args, 0) 4901 increment = seq_get(args, 1) 4902 elif self._match_text_seq("START"): 4903 start = self._parse_bitwise() 4904 self._match_text_seq("INCREMENT") 4905 increment = self._parse_bitwise() 4906 4907 if start and increment: 4908 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4909 4910 return exp.AutoIncrementColumnConstraint() 4911 4912 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4913 if not self._match_text_seq("REFRESH"): 4914 self._retreat(self._index - 1) 4915 return None 4916 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4917 4918 def _parse_compress(self) -> exp.CompressColumnConstraint: 4919 if self._match(TokenType.L_PAREN, advance=False): 4920 return self.expression( 4921 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4922 ) 4923 4924 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4925 4926 def _parse_generated_as_identity( 4927 self, 4928 ) -> ( 4929 exp.GeneratedAsIdentityColumnConstraint 4930 | exp.ComputedColumnConstraint 4931 | exp.GeneratedAsRowColumnConstraint 4932 ): 4933 if self._match_text_seq("BY", "DEFAULT"): 4934 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4935 this = self.expression( 4936 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4937 ) 4938 else: 4939 self._match_text_seq("ALWAYS") 4940 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4941 4942 self._match(TokenType.ALIAS) 4943 4944 if self._match_text_seq("ROW"): 4945 start = self._match_text_seq("START") 4946 if not start: 4947 self._match(TokenType.END) 4948 hidden = self._match_text_seq("HIDDEN") 4949 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4950 4951 identity = self._match_text_seq("IDENTITY") 4952 4953 if self._match(TokenType.L_PAREN): 4954 if self._match(TokenType.START_WITH): 4955 this.set("start", self._parse_bitwise()) 4956 if self._match_text_seq("INCREMENT", "BY"): 4957 this.set("increment", self._parse_bitwise()) 4958 if self._match_text_seq("MINVALUE"): 4959 this.set("minvalue", self._parse_bitwise()) 4960 if self._match_text_seq("MAXVALUE"): 4961 this.set("maxvalue", self._parse_bitwise()) 4962 4963 if self._match_text_seq("CYCLE"): 4964 this.set("cycle", True) 4965 elif self._match_text_seq("NO", "CYCLE"): 4966 this.set("cycle", False) 4967 4968 if not identity: 4969 this.set("expression", self._parse_range()) 4970 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4971 args = self._parse_csv(self._parse_bitwise) 4972 this.set("start", seq_get(args, 0)) 4973 this.set("increment", seq_get(args, 1)) 4974 4975 self._match_r_paren() 4976 4977 return this 4978 4979 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4980 self._match_text_seq("LENGTH") 4981 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4982 4983 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4984 if self._match_text_seq("NULL"): 4985 return self.expression(exp.NotNullColumnConstraint) 4986 if self._match_text_seq("CASESPECIFIC"): 4987 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4988 if self._match_text_seq("FOR", "REPLICATION"): 4989 return self.expression(exp.NotForReplicationColumnConstraint) 4990 return None 4991 4992 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4993 if self._match(TokenType.CONSTRAINT): 4994 this = self._parse_id_var() 4995 else: 4996 this = None 4997 4998 if self._match_texts(self.CONSTRAINT_PARSERS): 4999 return self.expression( 5000 exp.ColumnConstraint, 5001 this=this, 5002 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5003 ) 5004 5005 return this 5006 5007 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5008 if not self._match(TokenType.CONSTRAINT): 5009 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5010 5011 return self.expression( 5012 exp.Constraint, 5013 this=self._parse_id_var(), 5014 expressions=self._parse_unnamed_constraints(), 5015 ) 5016 5017 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5018 constraints = [] 5019 while True: 5020 constraint = self._parse_unnamed_constraint() or self._parse_function() 5021 if not constraint: 5022 break 5023 constraints.append(constraint) 5024 5025 return constraints 5026 5027 def _parse_unnamed_constraint( 5028 self, constraints: t.Optional[t.Collection[str]] = None 5029 ) -> t.Optional[exp.Expression]: 5030 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5031 constraints or self.CONSTRAINT_PARSERS 5032 ): 5033 return None 5034 5035 constraint = self._prev.text.upper() 5036 if constraint not in self.CONSTRAINT_PARSERS: 5037 self.raise_error(f"No parser found for schema constraint {constraint}.") 5038 5039 return self.CONSTRAINT_PARSERS[constraint](self) 5040 5041 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5042 self._match_text_seq("KEY") 5043 return self.expression( 5044 exp.UniqueColumnConstraint, 5045 this=self._parse_schema(self._parse_id_var(any_token=False)), 5046 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5047 on_conflict=self._parse_on_conflict(), 5048 ) 5049 5050 def _parse_key_constraint_options(self) -> t.List[str]: 5051 options = [] 5052 while True: 5053 if not self._curr: 5054 break 5055 5056 if self._match(TokenType.ON): 5057 action = None 5058 on = self._advance_any() and self._prev.text 5059 5060 if self._match_text_seq("NO", "ACTION"): 5061 action = "NO ACTION" 5062 elif self._match_text_seq("CASCADE"): 5063 action = "CASCADE" 5064 elif self._match_text_seq("RESTRICT"): 5065 action = "RESTRICT" 5066 elif self._match_pair(TokenType.SET, TokenType.NULL): 5067 action = "SET NULL" 5068 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5069 action = "SET DEFAULT" 5070 else: 5071 self.raise_error("Invalid key constraint") 5072 5073 options.append(f"ON {on} {action}") 5074 elif self._match_text_seq("NOT", "ENFORCED"): 5075 options.append("NOT ENFORCED") 5076 elif self._match_text_seq("DEFERRABLE"): 5077 options.append("DEFERRABLE") 5078 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5079 options.append("INITIALLY DEFERRED") 5080 elif self._match_text_seq("NORELY"): 5081 options.append("NORELY") 5082 elif self._match_text_seq("MATCH", "FULL"): 5083 options.append("MATCH FULL") 5084 else: 5085 break 5086 5087 return options 5088 5089 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5090 if match and not self._match(TokenType.REFERENCES): 5091 return None 5092 5093 expressions = None 5094 this = self._parse_table(schema=True) 5095 options = self._parse_key_constraint_options() 5096 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5097 5098 def _parse_foreign_key(self) -> exp.ForeignKey: 5099 expressions = self._parse_wrapped_id_vars() 5100 reference = self._parse_references() 5101 options = {} 5102 5103 while self._match(TokenType.ON): 5104 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5105 self.raise_error("Expected DELETE or UPDATE") 5106 5107 kind = self._prev.text.lower() 5108 5109 if self._match_text_seq("NO", "ACTION"): 5110 action = "NO ACTION" 5111 elif self._match(TokenType.SET): 5112 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5113 action = "SET " + self._prev.text.upper() 5114 else: 5115 self._advance() 5116 action = self._prev.text.upper() 5117 5118 options[kind] = action 5119 5120 return self.expression( 5121 exp.ForeignKey, 5122 expressions=expressions, 5123 reference=reference, 5124 **options, # type: ignore 5125 ) 5126 5127 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5128 return self._parse_field() 5129 5130 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5131 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5132 self._retreat(self._index - 1) 5133 return None 5134 5135 id_vars = self._parse_wrapped_id_vars() 5136 return self.expression( 5137 exp.PeriodForSystemTimeConstraint, 5138 this=seq_get(id_vars, 0), 5139 expression=seq_get(id_vars, 1), 5140 ) 5141 5142 def _parse_primary_key( 5143 self, wrapped_optional: bool = False, in_props: bool = False 5144 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5145 desc = ( 5146 self._match_set((TokenType.ASC, TokenType.DESC)) 5147 and self._prev.token_type == TokenType.DESC 5148 ) 5149 5150 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5151 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5152 5153 expressions = self._parse_wrapped_csv( 5154 self._parse_primary_key_part, optional=wrapped_optional 5155 ) 5156 options = self._parse_key_constraint_options() 5157 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5158 5159 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5160 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 5161 5162 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5163 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5164 return this 5165 5166 bracket_kind = self._prev.token_type 5167 expressions = self._parse_csv( 5168 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5169 ) 5170 5171 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5172 self.raise_error("Expected ]") 5173 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5174 self.raise_error("Expected }") 5175 5176 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5177 if bracket_kind == TokenType.L_BRACE: 5178 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5179 elif not this or this.name.upper() == "ARRAY": 5180 this = self.expression(exp.Array, expressions=expressions) 5181 else: 5182 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5183 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5184 5185 self._add_comments(this) 5186 return self._parse_bracket(this) 5187 5188 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5189 if self._match(TokenType.COLON): 5190 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 5191 return this 5192 5193 def _parse_case(self) -> t.Optional[exp.Expression]: 5194 ifs = [] 5195 default = None 5196 5197 comments = self._prev_comments 5198 expression = self._parse_conjunction() 5199 5200 while self._match(TokenType.WHEN): 5201 this = self._parse_conjunction() 5202 self._match(TokenType.THEN) 5203 then = self._parse_conjunction() 5204 ifs.append(self.expression(exp.If, this=this, true=then)) 5205 5206 if self._match(TokenType.ELSE): 5207 default = self._parse_conjunction() 5208 5209 if not self._match(TokenType.END): 5210 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5211 default = exp.column("interval") 5212 else: 5213 self.raise_error("Expected END after CASE", self._prev) 5214 5215 return self.expression( 5216 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5217 ) 5218 5219 def _parse_if(self) -> t.Optional[exp.Expression]: 5220 if self._match(TokenType.L_PAREN): 5221 args = self._parse_csv(self._parse_conjunction) 5222 this = self.validate_expression(exp.If.from_arg_list(args), args) 5223 self._match_r_paren() 5224 else: 5225 index = self._index - 1 5226 5227 if self.NO_PAREN_IF_COMMANDS and index == 0: 5228 return self._parse_as_command(self._prev) 5229 5230 condition = self._parse_conjunction() 5231 5232 if not condition: 5233 self._retreat(index) 5234 return None 5235 5236 self._match(TokenType.THEN) 5237 true = self._parse_conjunction() 5238 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 5239 self._match(TokenType.END) 5240 this = self.expression(exp.If, this=condition, true=true, false=false) 5241 5242 return this 5243 5244 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5245 if not self._match_text_seq("VALUE", "FOR"): 5246 self._retreat(self._index - 1) 5247 return None 5248 5249 return self.expression( 5250 exp.NextValueFor, 5251 this=self._parse_column(), 5252 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5253 ) 5254 5255 def _parse_extract(self) -> exp.Extract: 5256 this = self._parse_function() or self._parse_var() or self._parse_type() 5257 5258 if self._match(TokenType.FROM): 5259 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5260 5261 if not self._match(TokenType.COMMA): 5262 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5263 5264 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5265 5266 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5267 this = self._parse_conjunction() 5268 5269 if not self._match(TokenType.ALIAS): 5270 if self._match(TokenType.COMMA): 5271 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5272 5273 self.raise_error("Expected AS after CAST") 5274 5275 fmt = None 5276 to = self._parse_types() 5277 5278 if self._match(TokenType.FORMAT): 5279 fmt_string = self._parse_string() 5280 fmt = self._parse_at_time_zone(fmt_string) 5281 5282 if not to: 5283 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5284 if to.this in exp.DataType.TEMPORAL_TYPES: 5285 this = self.expression( 5286 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5287 this=this, 5288 format=exp.Literal.string( 5289 format_time( 5290 fmt_string.this if fmt_string else "", 5291 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5292 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5293 ) 5294 ), 5295 ) 5296 5297 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5298 this.set("zone", fmt.args["zone"]) 5299 return this 5300 elif not to: 5301 self.raise_error("Expected TYPE after CAST") 5302 elif isinstance(to, exp.Identifier): 5303 to = exp.DataType.build(to.name, udt=True) 5304 elif to.this == exp.DataType.Type.CHAR: 5305 if self._match(TokenType.CHARACTER_SET): 5306 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5307 5308 return self.expression( 5309 exp.Cast if strict else exp.TryCast, 5310 this=this, 5311 to=to, 5312 format=fmt, 5313 safe=safe, 5314 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5315 ) 5316 5317 def _parse_string_agg(self) -> exp.Expression: 5318 if self._match(TokenType.DISTINCT): 5319 args: t.List[t.Optional[exp.Expression]] = [ 5320 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5321 ] 5322 if self._match(TokenType.COMMA): 5323 args.extend(self._parse_csv(self._parse_conjunction)) 5324 else: 5325 args = self._parse_csv(self._parse_conjunction) # type: ignore 5326 5327 index = self._index 5328 if not self._match(TokenType.R_PAREN) and args: 5329 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5330 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5331 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5332 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5333 5334 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5335 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5336 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5337 if not self._match_text_seq("WITHIN", "GROUP"): 5338 self._retreat(index) 5339 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5340 5341 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5342 order = self._parse_order(this=seq_get(args, 0)) 5343 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5344 5345 def _parse_convert( 5346 self, strict: bool, safe: t.Optional[bool] = None 5347 ) -> t.Optional[exp.Expression]: 5348 this = self._parse_bitwise() 5349 5350 if self._match(TokenType.USING): 5351 to: t.Optional[exp.Expression] = self.expression( 5352 exp.CharacterSet, this=self._parse_var() 5353 ) 5354 elif self._match(TokenType.COMMA): 5355 to = self._parse_types() 5356 else: 5357 to = None 5358 5359 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5360 5361 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5362 """ 5363 There are generally two variants of the DECODE function: 5364 5365 - DECODE(bin, charset) 5366 - DECODE(expression, search, result [, search, result] ... [, default]) 5367 5368 The second variant will always be parsed into a CASE expression. Note that NULL 5369 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5370 instead of relying on pattern matching. 5371 """ 5372 args = self._parse_csv(self._parse_conjunction) 5373 5374 if len(args) < 3: 5375 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5376 5377 expression, *expressions = args 5378 if not expression: 5379 return None 5380 5381 ifs = [] 5382 for search, result in zip(expressions[::2], expressions[1::2]): 5383 if not search or not result: 5384 return None 5385 5386 if isinstance(search, exp.Literal): 5387 ifs.append( 5388 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5389 ) 5390 elif isinstance(search, exp.Null): 5391 ifs.append( 5392 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5393 ) 5394 else: 5395 cond = exp.or_( 5396 exp.EQ(this=expression.copy(), expression=search), 5397 exp.and_( 5398 exp.Is(this=expression.copy(), expression=exp.Null()), 5399 exp.Is(this=search.copy(), expression=exp.Null()), 5400 copy=False, 5401 ), 5402 copy=False, 5403 ) 5404 ifs.append(exp.If(this=cond, true=result)) 5405 5406 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5407 5408 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5409 self._match_text_seq("KEY") 5410 key = self._parse_column() 5411 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5412 self._match_text_seq("VALUE") 5413 value = self._parse_bitwise() 5414 5415 if not key and not value: 5416 return None 5417 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5418 5419 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5420 if not this or not self._match_text_seq("FORMAT", "JSON"): 5421 return this 5422 5423 return self.expression(exp.FormatJson, this=this) 5424 5425 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5426 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5427 for value in values: 5428 if self._match_text_seq(value, "ON", on): 5429 return f"{value} ON {on}" 5430 5431 return None 5432 5433 @t.overload 5434 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5435 5436 @t.overload 5437 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5438 5439 def _parse_json_object(self, agg=False): 5440 star = self._parse_star() 5441 expressions = ( 5442 [star] 5443 if star 5444 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5445 ) 5446 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5447 5448 unique_keys = None 5449 if self._match_text_seq("WITH", "UNIQUE"): 5450 unique_keys = True 5451 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5452 unique_keys = False 5453 5454 self._match_text_seq("KEYS") 5455 5456 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5457 self._parse_type() 5458 ) 5459 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5460 5461 return self.expression( 5462 exp.JSONObjectAgg if agg else exp.JSONObject, 5463 expressions=expressions, 5464 null_handling=null_handling, 5465 unique_keys=unique_keys, 5466 return_type=return_type, 5467 encoding=encoding, 5468 ) 5469 5470 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5471 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5472 if not self._match_text_seq("NESTED"): 5473 this = self._parse_id_var() 5474 kind = self._parse_types(allow_identifiers=False) 5475 nested = None 5476 else: 5477 this = None 5478 kind = None 5479 nested = True 5480 5481 path = self._match_text_seq("PATH") and self._parse_string() 5482 nested_schema = nested and self._parse_json_schema() 5483 5484 return self.expression( 5485 exp.JSONColumnDef, 5486 this=this, 5487 kind=kind, 5488 path=path, 5489 nested_schema=nested_schema, 5490 ) 5491 5492 def _parse_json_schema(self) -> exp.JSONSchema: 5493 self._match_text_seq("COLUMNS") 5494 return self.expression( 5495 exp.JSONSchema, 5496 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5497 ) 5498 5499 def _parse_json_table(self) -> exp.JSONTable: 5500 this = self._parse_format_json(self._parse_bitwise()) 5501 path = self._match(TokenType.COMMA) and self._parse_string() 5502 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5503 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5504 schema = self._parse_json_schema() 5505 5506 return exp.JSONTable( 5507 this=this, 5508 schema=schema, 5509 path=path, 5510 error_handling=error_handling, 5511 empty_handling=empty_handling, 5512 ) 5513 5514 def _parse_match_against(self) -> exp.MatchAgainst: 5515 expressions = self._parse_csv(self._parse_column) 5516 5517 self._match_text_seq(")", "AGAINST", "(") 5518 5519 this = self._parse_string() 5520 5521 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5522 modifier = "IN NATURAL LANGUAGE MODE" 5523 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5524 modifier = f"{modifier} WITH QUERY EXPANSION" 5525 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5526 modifier = "IN BOOLEAN MODE" 5527 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5528 modifier = "WITH QUERY EXPANSION" 5529 else: 5530 modifier = None 5531 5532 return self.expression( 5533 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5534 ) 5535 5536 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5537 def _parse_open_json(self) -> exp.OpenJSON: 5538 this = self._parse_bitwise() 5539 path = self._match(TokenType.COMMA) and self._parse_string() 5540 5541 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5542 this = self._parse_field(any_token=True) 5543 kind = self._parse_types() 5544 path = self._parse_string() 5545 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5546 5547 return self.expression( 5548 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5549 ) 5550 5551 expressions = None 5552 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5553 self._match_l_paren() 5554 expressions = self._parse_csv(_parse_open_json_column_def) 5555 5556 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5557 5558 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5559 args = self._parse_csv(self._parse_bitwise) 5560 5561 if self._match(TokenType.IN): 5562 return self.expression( 5563 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5564 ) 5565 5566 if haystack_first: 5567 haystack = seq_get(args, 0) 5568 needle = seq_get(args, 1) 5569 else: 5570 needle = seq_get(args, 0) 5571 haystack = seq_get(args, 1) 5572 5573 return self.expression( 5574 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5575 ) 5576 5577 def _parse_predict(self) -> exp.Predict: 5578 self._match_text_seq("MODEL") 5579 this = self._parse_table() 5580 5581 self._match(TokenType.COMMA) 5582 self._match_text_seq("TABLE") 5583 5584 return self.expression( 5585 exp.Predict, 5586 this=this, 5587 expression=self._parse_table(), 5588 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5589 ) 5590 5591 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5592 args = self._parse_csv(self._parse_table) 5593 return exp.JoinHint(this=func_name.upper(), expressions=args) 5594 5595 def _parse_substring(self) -> exp.Substring: 5596 # Postgres supports the form: substring(string [from int] [for int]) 5597 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5598 5599 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5600 5601 if self._match(TokenType.FROM): 5602 args.append(self._parse_bitwise()) 5603 if self._match(TokenType.FOR): 5604 if len(args) == 1: 5605 args.append(exp.Literal.number(1)) 5606 args.append(self._parse_bitwise()) 5607 5608 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5609 5610 def _parse_trim(self) -> exp.Trim: 5611 # https://www.w3resource.com/sql/character-functions/trim.php 5612 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5613 5614 position = None 5615 collation = None 5616 expression = None 5617 5618 if self._match_texts(self.TRIM_TYPES): 5619 position = self._prev.text.upper() 5620 5621 this = self._parse_bitwise() 5622 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5623 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5624 expression = self._parse_bitwise() 5625 5626 if invert_order: 5627 this, expression = expression, this 5628 5629 if self._match(TokenType.COLLATE): 5630 collation = self._parse_bitwise() 5631 5632 return self.expression( 5633 exp.Trim, this=this, position=position, expression=expression, collation=collation 5634 ) 5635 5636 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5637 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5638 5639 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5640 return self._parse_window(self._parse_id_var(), alias=True) 5641 5642 def _parse_respect_or_ignore_nulls( 5643 self, this: t.Optional[exp.Expression] 5644 ) -> t.Optional[exp.Expression]: 5645 if self._match_text_seq("IGNORE", "NULLS"): 5646 return self.expression(exp.IgnoreNulls, this=this) 5647 if self._match_text_seq("RESPECT", "NULLS"): 5648 return self.expression(exp.RespectNulls, this=this) 5649 return this 5650 5651 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5652 if self._match(TokenType.HAVING): 5653 self._match_texts(("MAX", "MIN")) 5654 max = self._prev.text.upper() != "MIN" 5655 return self.expression( 5656 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5657 ) 5658 5659 return this 5660 5661 def _parse_window( 5662 self, this: t.Optional[exp.Expression], alias: bool = False 5663 ) -> t.Optional[exp.Expression]: 5664 func = this 5665 comments = func.comments if isinstance(func, exp.Expression) else None 5666 5667 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5668 self._match(TokenType.WHERE) 5669 this = self.expression( 5670 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5671 ) 5672 self._match_r_paren() 5673 5674 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5675 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5676 if self._match_text_seq("WITHIN", "GROUP"): 5677 order = self._parse_wrapped(self._parse_order) 5678 this = self.expression(exp.WithinGroup, this=this, expression=order) 5679 5680 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5681 # Some dialects choose to implement and some do not. 5682 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5683 5684 # There is some code above in _parse_lambda that handles 5685 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5686 5687 # The below changes handle 5688 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5689 5690 # Oracle allows both formats 5691 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5692 # and Snowflake chose to do the same for familiarity 5693 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5694 if isinstance(this, exp.AggFunc): 5695 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5696 5697 if ignore_respect and ignore_respect is not this: 5698 ignore_respect.replace(ignore_respect.this) 5699 this = self.expression(ignore_respect.__class__, this=this) 5700 5701 this = self._parse_respect_or_ignore_nulls(this) 5702 5703 # bigquery select from window x AS (partition by ...) 5704 if alias: 5705 over = None 5706 self._match(TokenType.ALIAS) 5707 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5708 return this 5709 else: 5710 over = self._prev.text.upper() 5711 5712 if comments and isinstance(func, exp.Expression): 5713 func.pop_comments() 5714 5715 if not self._match(TokenType.L_PAREN): 5716 return self.expression( 5717 exp.Window, 5718 comments=comments, 5719 this=this, 5720 alias=self._parse_id_var(False), 5721 over=over, 5722 ) 5723 5724 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5725 5726 first = self._match(TokenType.FIRST) 5727 if self._match_text_seq("LAST"): 5728 first = False 5729 5730 partition, order = self._parse_partition_and_order() 5731 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5732 5733 if kind: 5734 self._match(TokenType.BETWEEN) 5735 start = self._parse_window_spec() 5736 self._match(TokenType.AND) 5737 end = self._parse_window_spec() 5738 5739 spec = self.expression( 5740 exp.WindowSpec, 5741 kind=kind, 5742 start=start["value"], 5743 start_side=start["side"], 5744 end=end["value"], 5745 end_side=end["side"], 5746 ) 5747 else: 5748 spec = None 5749 5750 self._match_r_paren() 5751 5752 window = self.expression( 5753 exp.Window, 5754 comments=comments, 5755 this=this, 5756 partition_by=partition, 5757 order=order, 5758 spec=spec, 5759 alias=window_alias, 5760 over=over, 5761 first=first, 5762 ) 5763 5764 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5765 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5766 return self._parse_window(window, alias=alias) 5767 5768 return window 5769 5770 def _parse_partition_and_order( 5771 self, 5772 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5773 return self._parse_partition_by(), self._parse_order() 5774 5775 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5776 self._match(TokenType.BETWEEN) 5777 5778 return { 5779 "value": ( 5780 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5781 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5782 or self._parse_bitwise() 5783 ), 5784 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5785 } 5786 5787 def _parse_alias( 5788 self, this: t.Optional[exp.Expression], explicit: bool = False 5789 ) -> t.Optional[exp.Expression]: 5790 any_token = self._match(TokenType.ALIAS) 5791 comments = self._prev_comments or [] 5792 5793 if explicit and not any_token: 5794 return this 5795 5796 if self._match(TokenType.L_PAREN): 5797 aliases = self.expression( 5798 exp.Aliases, 5799 comments=comments, 5800 this=this, 5801 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5802 ) 5803 self._match_r_paren(aliases) 5804 return aliases 5805 5806 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5807 self.STRING_ALIASES and self._parse_string_as_identifier() 5808 ) 5809 5810 if alias: 5811 comments.extend(alias.pop_comments()) 5812 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5813 column = this.this 5814 5815 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5816 if not this.comments and column and column.comments: 5817 this.comments = column.pop_comments() 5818 5819 return this 5820 5821 def _parse_id_var( 5822 self, 5823 any_token: bool = True, 5824 tokens: t.Optional[t.Collection[TokenType]] = None, 5825 ) -> t.Optional[exp.Expression]: 5826 expression = self._parse_identifier() 5827 if not expression and ( 5828 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5829 ): 5830 quoted = self._prev.token_type == TokenType.STRING 5831 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5832 5833 return expression 5834 5835 def _parse_string(self) -> t.Optional[exp.Expression]: 5836 if self._match_set(self.STRING_PARSERS): 5837 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5838 return self._parse_placeholder() 5839 5840 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5841 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5842 5843 def _parse_number(self) -> t.Optional[exp.Expression]: 5844 if self._match_set(self.NUMERIC_PARSERS): 5845 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5846 return self._parse_placeholder() 5847 5848 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5849 if self._match(TokenType.IDENTIFIER): 5850 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5851 return self._parse_placeholder() 5852 5853 def _parse_var( 5854 self, 5855 any_token: bool = False, 5856 tokens: t.Optional[t.Collection[TokenType]] = None, 5857 upper: bool = False, 5858 ) -> t.Optional[exp.Expression]: 5859 if ( 5860 (any_token and self._advance_any()) 5861 or self._match(TokenType.VAR) 5862 or (self._match_set(tokens) if tokens else False) 5863 ): 5864 return self.expression( 5865 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5866 ) 5867 return self._parse_placeholder() 5868 5869 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5870 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5871 self._advance() 5872 return self._prev 5873 return None 5874 5875 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5876 return self._parse_var() or self._parse_string() 5877 5878 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5879 return self._parse_primary() or self._parse_var(any_token=True) 5880 5881 def _parse_null(self) -> t.Optional[exp.Expression]: 5882 if self._match_set(self.NULL_TOKENS): 5883 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5884 return self._parse_placeholder() 5885 5886 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5887 if self._match(TokenType.TRUE): 5888 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5889 if self._match(TokenType.FALSE): 5890 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5891 return self._parse_placeholder() 5892 5893 def _parse_star(self) -> t.Optional[exp.Expression]: 5894 if self._match(TokenType.STAR): 5895 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5896 return self._parse_placeholder() 5897 5898 def _parse_parameter(self) -> exp.Parameter: 5899 this = self._parse_identifier() or self._parse_primary_or_var() 5900 return self.expression(exp.Parameter, this=this) 5901 5902 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5903 if self._match_set(self.PLACEHOLDER_PARSERS): 5904 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5905 if placeholder: 5906 return placeholder 5907 self._advance(-1) 5908 return None 5909 5910 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5911 if not self._match_texts(keywords): 5912 return None 5913 if self._match(TokenType.L_PAREN, advance=False): 5914 return self._parse_wrapped_csv(self._parse_expression) 5915 5916 expression = self._parse_expression() 5917 return [expression] if expression else None 5918 5919 def _parse_csv( 5920 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5921 ) -> t.List[exp.Expression]: 5922 parse_result = parse_method() 5923 items = [parse_result] if parse_result is not None else [] 5924 5925 while self._match(sep): 5926 self._add_comments(parse_result) 5927 parse_result = parse_method() 5928 if parse_result is not None: 5929 items.append(parse_result) 5930 5931 return items 5932 5933 def _parse_tokens( 5934 self, parse_method: t.Callable, expressions: t.Dict 5935 ) -> t.Optional[exp.Expression]: 5936 this = parse_method() 5937 5938 while self._match_set(expressions): 5939 this = self.expression( 5940 expressions[self._prev.token_type], 5941 this=this, 5942 comments=self._prev_comments, 5943 expression=parse_method(), 5944 ) 5945 5946 return this 5947 5948 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5949 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5950 5951 def _parse_wrapped_csv( 5952 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5953 ) -> t.List[exp.Expression]: 5954 return self._parse_wrapped( 5955 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5956 ) 5957 5958 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5959 wrapped = self._match(TokenType.L_PAREN) 5960 if not wrapped and not optional: 5961 self.raise_error("Expecting (") 5962 parse_result = parse_method() 5963 if wrapped: 5964 self._match_r_paren() 5965 return parse_result 5966 5967 def _parse_expressions(self) -> t.List[exp.Expression]: 5968 return self._parse_csv(self._parse_expression) 5969 5970 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5971 return self._parse_select() or self._parse_set_operations( 5972 self._parse_expression() if alias else self._parse_conjunction() 5973 ) 5974 5975 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5976 return self._parse_query_modifiers( 5977 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5978 ) 5979 5980 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5981 this = None 5982 if self._match_texts(self.TRANSACTION_KIND): 5983 this = self._prev.text 5984 5985 self._match_texts(("TRANSACTION", "WORK")) 5986 5987 modes = [] 5988 while True: 5989 mode = [] 5990 while self._match(TokenType.VAR): 5991 mode.append(self._prev.text) 5992 5993 if mode: 5994 modes.append(" ".join(mode)) 5995 if not self._match(TokenType.COMMA): 5996 break 5997 5998 return self.expression(exp.Transaction, this=this, modes=modes) 5999 6000 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6001 chain = None 6002 savepoint = None 6003 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6004 6005 self._match_texts(("TRANSACTION", "WORK")) 6006 6007 if self._match_text_seq("TO"): 6008 self._match_text_seq("SAVEPOINT") 6009 savepoint = self._parse_id_var() 6010 6011 if self._match(TokenType.AND): 6012 chain = not self._match_text_seq("NO") 6013 self._match_text_seq("CHAIN") 6014 6015 if is_rollback: 6016 return self.expression(exp.Rollback, savepoint=savepoint) 6017 6018 return self.expression(exp.Commit, chain=chain) 6019 6020 def _parse_refresh(self) -> exp.Refresh: 6021 self._match(TokenType.TABLE) 6022 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6023 6024 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6025 if not self._match_text_seq("ADD"): 6026 return None 6027 6028 self._match(TokenType.COLUMN) 6029 exists_column = self._parse_exists(not_=True) 6030 expression = self._parse_field_def() 6031 6032 if expression: 6033 expression.set("exists", exists_column) 6034 6035 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6036 if self._match_texts(("FIRST", "AFTER")): 6037 position = self._prev.text 6038 column_position = self.expression( 6039 exp.ColumnPosition, this=self._parse_column(), position=position 6040 ) 6041 expression.set("position", column_position) 6042 6043 return expression 6044 6045 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6046 drop = self._match(TokenType.DROP) and self._parse_drop() 6047 if drop and not isinstance(drop, exp.Command): 6048 drop.set("kind", drop.args.get("kind", "COLUMN")) 6049 return drop 6050 6051 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6052 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6053 return self.expression( 6054 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6055 ) 6056 6057 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6058 index = self._index - 1 6059 6060 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6061 return self._parse_csv( 6062 lambda: self.expression( 6063 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6064 ) 6065 ) 6066 6067 self._retreat(index) 6068 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6069 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6070 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6071 6072 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6073 if self._match_texts(self.ALTER_ALTER_PARSERS): 6074 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6075 6076 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6077 # keyword after ALTER we default to parsing this statement 6078 self._match(TokenType.COLUMN) 6079 column = self._parse_field(any_token=True) 6080 6081 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6082 return self.expression(exp.AlterColumn, this=column, drop=True) 6083 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6084 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 6085 if self._match(TokenType.COMMENT): 6086 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6087 if self._match_text_seq("DROP", "NOT", "NULL"): 6088 return self.expression( 6089 exp.AlterColumn, 6090 this=column, 6091 drop=True, 6092 allow_null=True, 6093 ) 6094 if self._match_text_seq("SET", "NOT", "NULL"): 6095 return self.expression( 6096 exp.AlterColumn, 6097 this=column, 6098 allow_null=False, 6099 ) 6100 self._match_text_seq("SET", "DATA") 6101 self._match_text_seq("TYPE") 6102 return self.expression( 6103 exp.AlterColumn, 6104 this=column, 6105 dtype=self._parse_types(), 6106 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6107 using=self._match(TokenType.USING) and self._parse_conjunction(), 6108 ) 6109 6110 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6111 if self._match_texts(("ALL", "EVEN", "AUTO")): 6112 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6113 6114 self._match_text_seq("KEY", "DISTKEY") 6115 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6116 6117 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6118 if compound: 6119 self._match_text_seq("SORTKEY") 6120 6121 if self._match(TokenType.L_PAREN, advance=False): 6122 return self.expression( 6123 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6124 ) 6125 6126 self._match_texts(("AUTO", "NONE")) 6127 return self.expression( 6128 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6129 ) 6130 6131 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6132 index = self._index - 1 6133 6134 partition_exists = self._parse_exists() 6135 if self._match(TokenType.PARTITION, advance=False): 6136 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6137 6138 self._retreat(index) 6139 return self._parse_csv(self._parse_drop_column) 6140 6141 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6142 if self._match(TokenType.COLUMN): 6143 exists = self._parse_exists() 6144 old_column = self._parse_column() 6145 to = self._match_text_seq("TO") 6146 new_column = self._parse_column() 6147 6148 if old_column is None or to is None or new_column is None: 6149 return None 6150 6151 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6152 6153 self._match_text_seq("TO") 6154 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6155 6156 def _parse_alter_table_set(self) -> exp.AlterSet: 6157 alter_set = self.expression(exp.AlterSet) 6158 6159 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6160 "TABLE", "PROPERTIES" 6161 ): 6162 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_conjunction)) 6163 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6164 alter_set.set("expressions", [self._parse_conjunction()]) 6165 elif self._match_texts(("LOGGED", "UNLOGGED")): 6166 alter_set.set("option", exp.var(self._prev.text.upper())) 6167 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6168 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6169 elif self._match_text_seq("LOCATION"): 6170 alter_set.set("location", self._parse_field()) 6171 elif self._match_text_seq("ACCESS", "METHOD"): 6172 alter_set.set("access_method", self._parse_field()) 6173 elif self._match_text_seq("TABLESPACE"): 6174 alter_set.set("tablespace", self._parse_field()) 6175 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6176 alter_set.set("file_format", [self._parse_field()]) 6177 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6178 alter_set.set("file_format", self._parse_wrapped_options()) 6179 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6180 alter_set.set("copy_options", self._parse_wrapped_options()) 6181 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6182 alter_set.set("tag", self._parse_csv(self._parse_conjunction)) 6183 else: 6184 if self._match_text_seq("SERDE"): 6185 alter_set.set("serde", self._parse_field()) 6186 6187 alter_set.set("expressions", [self._parse_properties()]) 6188 6189 return alter_set 6190 6191 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6192 start = self._prev 6193 6194 if not self._match(TokenType.TABLE): 6195 return self._parse_as_command(start) 6196 6197 exists = self._parse_exists() 6198 only = self._match_text_seq("ONLY") 6199 this = self._parse_table(schema=True) 6200 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6201 6202 if self._next: 6203 self._advance() 6204 6205 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6206 if parser: 6207 actions = ensure_list(parser(self)) 6208 options = self._parse_csv(self._parse_property) 6209 6210 if not self._curr and actions: 6211 return self.expression( 6212 exp.AlterTable, 6213 this=this, 6214 exists=exists, 6215 actions=actions, 6216 only=only, 6217 options=options, 6218 cluster=cluster, 6219 ) 6220 6221 return self._parse_as_command(start) 6222 6223 def _parse_merge(self) -> exp.Merge: 6224 self._match(TokenType.INTO) 6225 target = self._parse_table() 6226 6227 if target and self._match(TokenType.ALIAS, advance=False): 6228 target.set("alias", self._parse_table_alias()) 6229 6230 self._match(TokenType.USING) 6231 using = self._parse_table() 6232 6233 self._match(TokenType.ON) 6234 on = self._parse_conjunction() 6235 6236 return self.expression( 6237 exp.Merge, 6238 this=target, 6239 using=using, 6240 on=on, 6241 expressions=self._parse_when_matched(), 6242 ) 6243 6244 def _parse_when_matched(self) -> t.List[exp.When]: 6245 whens = [] 6246 6247 while self._match(TokenType.WHEN): 6248 matched = not self._match(TokenType.NOT) 6249 self._match_text_seq("MATCHED") 6250 source = ( 6251 False 6252 if self._match_text_seq("BY", "TARGET") 6253 else self._match_text_seq("BY", "SOURCE") 6254 ) 6255 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 6256 6257 self._match(TokenType.THEN) 6258 6259 if self._match(TokenType.INSERT): 6260 _this = self._parse_star() 6261 if _this: 6262 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6263 else: 6264 then = self.expression( 6265 exp.Insert, 6266 this=self._parse_value(), 6267 expression=self._match_text_seq("VALUES") and self._parse_value(), 6268 ) 6269 elif self._match(TokenType.UPDATE): 6270 expressions = self._parse_star() 6271 if expressions: 6272 then = self.expression(exp.Update, expressions=expressions) 6273 else: 6274 then = self.expression( 6275 exp.Update, 6276 expressions=self._match(TokenType.SET) 6277 and self._parse_csv(self._parse_equality), 6278 ) 6279 elif self._match(TokenType.DELETE): 6280 then = self.expression(exp.Var, this=self._prev.text) 6281 else: 6282 then = None 6283 6284 whens.append( 6285 self.expression( 6286 exp.When, 6287 matched=matched, 6288 source=source, 6289 condition=condition, 6290 then=then, 6291 ) 6292 ) 6293 return whens 6294 6295 def _parse_show(self) -> t.Optional[exp.Expression]: 6296 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6297 if parser: 6298 return parser(self) 6299 return self._parse_as_command(self._prev) 6300 6301 def _parse_set_item_assignment( 6302 self, kind: t.Optional[str] = None 6303 ) -> t.Optional[exp.Expression]: 6304 index = self._index 6305 6306 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6307 return self._parse_set_transaction(global_=kind == "GLOBAL") 6308 6309 left = self._parse_primary() or self._parse_column() 6310 assignment_delimiter = self._match_texts(("=", "TO")) 6311 6312 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6313 self._retreat(index) 6314 return None 6315 6316 right = self._parse_statement() or self._parse_id_var() 6317 if isinstance(right, (exp.Column, exp.Identifier)): 6318 right = exp.var(right.name) 6319 6320 this = self.expression(exp.EQ, this=left, expression=right) 6321 return self.expression(exp.SetItem, this=this, kind=kind) 6322 6323 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6324 self._match_text_seq("TRANSACTION") 6325 characteristics = self._parse_csv( 6326 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6327 ) 6328 return self.expression( 6329 exp.SetItem, 6330 expressions=characteristics, 6331 kind="TRANSACTION", 6332 **{"global": global_}, # type: ignore 6333 ) 6334 6335 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6336 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6337 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6338 6339 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6340 index = self._index 6341 set_ = self.expression( 6342 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6343 ) 6344 6345 if self._curr: 6346 self._retreat(index) 6347 return self._parse_as_command(self._prev) 6348 6349 return set_ 6350 6351 def _parse_var_from_options( 6352 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6353 ) -> t.Optional[exp.Var]: 6354 start = self._curr 6355 if not start: 6356 return None 6357 6358 option = start.text.upper() 6359 continuations = options.get(option) 6360 6361 index = self._index 6362 self._advance() 6363 for keywords in continuations or []: 6364 if isinstance(keywords, str): 6365 keywords = (keywords,) 6366 6367 if self._match_text_seq(*keywords): 6368 option = f"{option} {' '.join(keywords)}" 6369 break 6370 else: 6371 if continuations or continuations is None: 6372 if raise_unmatched: 6373 self.raise_error(f"Unknown option {option}") 6374 6375 self._retreat(index) 6376 return None 6377 6378 return exp.var(option) 6379 6380 def _parse_as_command(self, start: Token) -> exp.Command: 6381 while self._curr: 6382 self._advance() 6383 text = self._find_sql(start, self._prev) 6384 size = len(start.text) 6385 self._warn_unsupported() 6386 return exp.Command(this=text[:size], expression=text[size:]) 6387 6388 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6389 settings = [] 6390 6391 self._match_l_paren() 6392 kind = self._parse_id_var() 6393 6394 if self._match(TokenType.L_PAREN): 6395 while True: 6396 key = self._parse_id_var() 6397 value = self._parse_primary() 6398 6399 if not key and value is None: 6400 break 6401 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6402 self._match(TokenType.R_PAREN) 6403 6404 self._match_r_paren() 6405 6406 return self.expression( 6407 exp.DictProperty, 6408 this=this, 6409 kind=kind.this if kind else None, 6410 settings=settings, 6411 ) 6412 6413 def _parse_dict_range(self, this: str) -> exp.DictRange: 6414 self._match_l_paren() 6415 has_min = self._match_text_seq("MIN") 6416 if has_min: 6417 min = self._parse_var() or self._parse_primary() 6418 self._match_text_seq("MAX") 6419 max = self._parse_var() or self._parse_primary() 6420 else: 6421 max = self._parse_var() or self._parse_primary() 6422 min = exp.Literal.number(0) 6423 self._match_r_paren() 6424 return self.expression(exp.DictRange, this=this, min=min, max=max) 6425 6426 def _parse_comprehension( 6427 self, this: t.Optional[exp.Expression] 6428 ) -> t.Optional[exp.Comprehension]: 6429 index = self._index 6430 expression = self._parse_column() 6431 if not self._match(TokenType.IN): 6432 self._retreat(index - 1) 6433 return None 6434 iterator = self._parse_column() 6435 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6436 return self.expression( 6437 exp.Comprehension, 6438 this=this, 6439 expression=expression, 6440 iterator=iterator, 6441 condition=condition, 6442 ) 6443 6444 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6445 if self._match(TokenType.HEREDOC_STRING): 6446 return self.expression(exp.Heredoc, this=self._prev.text) 6447 6448 if not self._match_text_seq("$"): 6449 return None 6450 6451 tags = ["$"] 6452 tag_text = None 6453 6454 if self._is_connected(): 6455 self._advance() 6456 tags.append(self._prev.text.upper()) 6457 else: 6458 self.raise_error("No closing $ found") 6459 6460 if tags[-1] != "$": 6461 if self._is_connected() and self._match_text_seq("$"): 6462 tag_text = tags[-1] 6463 tags.append("$") 6464 else: 6465 self.raise_error("No closing $ found") 6466 6467 heredoc_start = self._curr 6468 6469 while self._curr: 6470 if self._match_text_seq(*tags, advance=False): 6471 this = self._find_sql(heredoc_start, self._prev) 6472 self._advance(len(tags)) 6473 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6474 6475 self._advance() 6476 6477 self.raise_error(f"No closing {''.join(tags)} found") 6478 return None 6479 6480 def _find_parser( 6481 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6482 ) -> t.Optional[t.Callable]: 6483 if not self._curr: 6484 return None 6485 6486 index = self._index 6487 this = [] 6488 while True: 6489 # The current token might be multiple words 6490 curr = self._curr.text.upper() 6491 key = curr.split(" ") 6492 this.append(curr) 6493 6494 self._advance() 6495 result, trie = in_trie(trie, key) 6496 if result == TrieResult.FAILED: 6497 break 6498 6499 if result == TrieResult.EXISTS: 6500 subparser = parsers[" ".join(this)] 6501 return subparser 6502 6503 self._retreat(index) 6504 return None 6505 6506 def _match(self, token_type, advance=True, expression=None): 6507 if not self._curr: 6508 return None 6509 6510 if self._curr.token_type == token_type: 6511 if advance: 6512 self._advance() 6513 self._add_comments(expression) 6514 return True 6515 6516 return None 6517 6518 def _match_set(self, types, advance=True): 6519 if not self._curr: 6520 return None 6521 6522 if self._curr.token_type in types: 6523 if advance: 6524 self._advance() 6525 return True 6526 6527 return None 6528 6529 def _match_pair(self, token_type_a, token_type_b, advance=True): 6530 if not self._curr or not self._next: 6531 return None 6532 6533 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6534 if advance: 6535 self._advance(2) 6536 return True 6537 6538 return None 6539 6540 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6541 if not self._match(TokenType.L_PAREN, expression=expression): 6542 self.raise_error("Expecting (") 6543 6544 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6545 if not self._match(TokenType.R_PAREN, expression=expression): 6546 self.raise_error("Expecting )") 6547 6548 def _match_texts(self, texts, advance=True): 6549 if self._curr and self._curr.text.upper() in texts: 6550 if advance: 6551 self._advance() 6552 return True 6553 return None 6554 6555 def _match_text_seq(self, *texts, advance=True): 6556 index = self._index 6557 for text in texts: 6558 if self._curr and self._curr.text.upper() == text: 6559 self._advance() 6560 else: 6561 self._retreat(index) 6562 return None 6563 6564 if not advance: 6565 self._retreat(index) 6566 6567 return True 6568 6569 def _replace_lambda( 6570 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6571 ) -> t.Optional[exp.Expression]: 6572 if not node: 6573 return node 6574 6575 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6576 6577 for column in node.find_all(exp.Column): 6578 typ = lambda_types.get(column.parts[0].name) 6579 if typ is not None: 6580 dot_or_id = column.to_dot() if column.table else column.this 6581 6582 if typ: 6583 dot_or_id = self.expression( 6584 exp.Cast, 6585 this=dot_or_id, 6586 to=typ, 6587 ) 6588 6589 parent = column.parent 6590 6591 while isinstance(parent, exp.Dot): 6592 if not isinstance(parent.parent, exp.Dot): 6593 parent.replace(dot_or_id) 6594 break 6595 parent = parent.parent 6596 else: 6597 if column is node: 6598 node = dot_or_id 6599 else: 6600 column.replace(dot_or_id) 6601 return node 6602 6603 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6604 start = self._prev 6605 6606 # Not to be confused with TRUNCATE(number, decimals) function call 6607 if self._match(TokenType.L_PAREN): 6608 self._retreat(self._index - 2) 6609 return self._parse_function() 6610 6611 # Clickhouse supports TRUNCATE DATABASE as well 6612 is_database = self._match(TokenType.DATABASE) 6613 6614 self._match(TokenType.TABLE) 6615 6616 exists = self._parse_exists(not_=False) 6617 6618 expressions = self._parse_csv( 6619 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6620 ) 6621 6622 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6623 6624 if self._match_text_seq("RESTART", "IDENTITY"): 6625 identity = "RESTART" 6626 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6627 identity = "CONTINUE" 6628 else: 6629 identity = None 6630 6631 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6632 option = self._prev.text 6633 else: 6634 option = None 6635 6636 partition = self._parse_partition() 6637 6638 # Fallback case 6639 if self._curr: 6640 return self._parse_as_command(start) 6641 6642 return self.expression( 6643 exp.TruncateTable, 6644 expressions=expressions, 6645 is_database=is_database, 6646 exists=exists, 6647 cluster=cluster, 6648 identity=identity, 6649 option=option, 6650 partition=partition, 6651 ) 6652 6653 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6654 this = self._parse_ordered(self._parse_opclass) 6655 6656 if not self._match(TokenType.WITH): 6657 return this 6658 6659 op = self._parse_var(any_token=True) 6660 6661 return self.expression(exp.WithOperator, this=this, op=op) 6662 6663 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6664 self._match(TokenType.EQ) 6665 self._match(TokenType.L_PAREN) 6666 6667 opts: t.List[t.Optional[exp.Expression]] = [] 6668 while self._curr and not self._match(TokenType.R_PAREN): 6669 if self._match_text_seq("FORMAT_NAME", "="): 6670 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6671 # so we parse it separately to use _parse_field() 6672 prop = self.expression( 6673 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6674 ) 6675 opts.append(prop) 6676 else: 6677 opts.append(self._parse_property()) 6678 6679 self._match(TokenType.COMMA) 6680 6681 return opts 6682 6683 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6684 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6685 6686 options = [] 6687 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6688 option = self._parse_var(any_token=True) 6689 prev = self._prev.text.upper() 6690 6691 # Different dialects might separate options and values by white space, "=" and "AS" 6692 self._match(TokenType.EQ) 6693 self._match(TokenType.ALIAS) 6694 6695 param = self.expression(exp.CopyParameter, this=option) 6696 6697 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6698 TokenType.L_PAREN, advance=False 6699 ): 6700 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6701 param.set("expressions", self._parse_wrapped_options()) 6702 elif prev == "FILE_FORMAT": 6703 # T-SQL's external file format case 6704 param.set("expression", self._parse_field()) 6705 else: 6706 param.set("expression", self._parse_unquoted_field()) 6707 6708 options.append(param) 6709 self._match(sep) 6710 6711 return options 6712 6713 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6714 expr = self.expression(exp.Credentials) 6715 6716 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6717 expr.set("storage", self._parse_field()) 6718 if self._match_text_seq("CREDENTIALS"): 6719 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6720 creds = ( 6721 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6722 ) 6723 expr.set("credentials", creds) 6724 if self._match_text_seq("ENCRYPTION"): 6725 expr.set("encryption", self._parse_wrapped_options()) 6726 if self._match_text_seq("IAM_ROLE"): 6727 expr.set("iam_role", self._parse_field()) 6728 if self._match_text_seq("REGION"): 6729 expr.set("region", self._parse_field()) 6730 6731 return expr 6732 6733 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6734 return self._parse_field() 6735 6736 def _parse_copy(self) -> exp.Copy | exp.Command: 6737 start = self._prev 6738 6739 self._match(TokenType.INTO) 6740 6741 this = ( 6742 self._parse_select(nested=True, parse_subquery_alias=False) 6743 if self._match(TokenType.L_PAREN, advance=False) 6744 else self._parse_table(schema=True) 6745 ) 6746 6747 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6748 6749 files = self._parse_csv(self._parse_file_location) 6750 credentials = self._parse_credentials() 6751 6752 self._match_text_seq("WITH") 6753 6754 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6755 6756 # Fallback case 6757 if self._curr: 6758 return self._parse_as_command(start) 6759 6760 return self.expression( 6761 exp.Copy, 6762 this=this, 6763 kind=kind, 6764 credentials=credentials, 6765 files=files, 6766 params=params, 6767 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
122class Parser(metaclass=_Parser): 123 """ 124 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 125 126 Args: 127 error_level: The desired error level. 128 Default: ErrorLevel.IMMEDIATE 129 error_message_context: The amount of context to capture from a query string when displaying 130 the error message (in number of characters). 131 Default: 100 132 max_errors: Maximum number of error messages to include in a raised ParseError. 133 This is only relevant if error_level is ErrorLevel.RAISE. 134 Default: 3 135 """ 136 137 FUNCTIONS: t.Dict[str, t.Callable] = { 138 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 139 "CONCAT": lambda args, dialect: exp.Concat( 140 expressions=args, 141 safe=not dialect.STRICT_STRING_CONCAT, 142 coalesce=dialect.CONCAT_COALESCE, 143 ), 144 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 145 expressions=args, 146 safe=not dialect.STRICT_STRING_CONCAT, 147 coalesce=dialect.CONCAT_COALESCE, 148 ), 149 "DATE_TO_DATE_STR": lambda args: exp.Cast( 150 this=seq_get(args, 0), 151 to=exp.DataType(this=exp.DataType.Type.TEXT), 152 ), 153 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 154 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 155 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 156 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 157 "LIKE": build_like, 158 "LOG": build_logarithm, 159 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 160 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 161 "MOD": build_mod, 162 "TIME_TO_TIME_STR": lambda args: exp.Cast( 163 this=seq_get(args, 0), 164 to=exp.DataType(this=exp.DataType.Type.TEXT), 165 ), 166 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 167 this=exp.Cast( 168 this=seq_get(args, 0), 169 to=exp.DataType(this=exp.DataType.Type.TEXT), 170 ), 171 start=exp.Literal.number(1), 172 length=exp.Literal.number(10), 173 ), 174 "VAR_MAP": build_var_map, 175 "LOWER": build_lower, 176 "UPPER": build_upper, 177 "HEX": build_hex, 178 "TO_HEX": build_hex, 179 } 180 181 NO_PAREN_FUNCTIONS = { 182 TokenType.CURRENT_DATE: exp.CurrentDate, 183 TokenType.CURRENT_DATETIME: exp.CurrentDate, 184 TokenType.CURRENT_TIME: exp.CurrentTime, 185 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 186 TokenType.CURRENT_USER: exp.CurrentUser, 187 } 188 189 STRUCT_TYPE_TOKENS = { 190 TokenType.NESTED, 191 TokenType.OBJECT, 192 TokenType.STRUCT, 193 } 194 195 NESTED_TYPE_TOKENS = { 196 TokenType.ARRAY, 197 TokenType.LOWCARDINALITY, 198 TokenType.MAP, 199 TokenType.NULLABLE, 200 *STRUCT_TYPE_TOKENS, 201 } 202 203 ENUM_TYPE_TOKENS = { 204 TokenType.ENUM, 205 TokenType.ENUM8, 206 TokenType.ENUM16, 207 } 208 209 AGGREGATE_TYPE_TOKENS = { 210 TokenType.AGGREGATEFUNCTION, 211 TokenType.SIMPLEAGGREGATEFUNCTION, 212 } 213 214 TYPE_TOKENS = { 215 TokenType.BIT, 216 TokenType.BOOLEAN, 217 TokenType.TINYINT, 218 TokenType.UTINYINT, 219 TokenType.SMALLINT, 220 TokenType.USMALLINT, 221 TokenType.INT, 222 TokenType.UINT, 223 TokenType.BIGINT, 224 TokenType.UBIGINT, 225 TokenType.INT128, 226 TokenType.UINT128, 227 TokenType.INT256, 228 TokenType.UINT256, 229 TokenType.MEDIUMINT, 230 TokenType.UMEDIUMINT, 231 TokenType.FIXEDSTRING, 232 TokenType.FLOAT, 233 TokenType.DOUBLE, 234 TokenType.CHAR, 235 TokenType.NCHAR, 236 TokenType.VARCHAR, 237 TokenType.NVARCHAR, 238 TokenType.BPCHAR, 239 TokenType.TEXT, 240 TokenType.MEDIUMTEXT, 241 TokenType.LONGTEXT, 242 TokenType.MEDIUMBLOB, 243 TokenType.LONGBLOB, 244 TokenType.BINARY, 245 TokenType.VARBINARY, 246 TokenType.JSON, 247 TokenType.JSONB, 248 TokenType.INTERVAL, 249 TokenType.TINYBLOB, 250 TokenType.TINYTEXT, 251 TokenType.TIME, 252 TokenType.TIMETZ, 253 TokenType.TIMESTAMP, 254 TokenType.TIMESTAMP_S, 255 TokenType.TIMESTAMP_MS, 256 TokenType.TIMESTAMP_NS, 257 TokenType.TIMESTAMPTZ, 258 TokenType.TIMESTAMPLTZ, 259 TokenType.TIMESTAMPNTZ, 260 TokenType.DATETIME, 261 TokenType.DATETIME64, 262 TokenType.DATE, 263 TokenType.DATE32, 264 TokenType.INT4RANGE, 265 TokenType.INT4MULTIRANGE, 266 TokenType.INT8RANGE, 267 TokenType.INT8MULTIRANGE, 268 TokenType.NUMRANGE, 269 TokenType.NUMMULTIRANGE, 270 TokenType.TSRANGE, 271 TokenType.TSMULTIRANGE, 272 TokenType.TSTZRANGE, 273 TokenType.TSTZMULTIRANGE, 274 TokenType.DATERANGE, 275 TokenType.DATEMULTIRANGE, 276 TokenType.DECIMAL, 277 TokenType.UDECIMAL, 278 TokenType.BIGDECIMAL, 279 TokenType.UUID, 280 TokenType.GEOGRAPHY, 281 TokenType.GEOMETRY, 282 TokenType.HLLSKETCH, 283 TokenType.HSTORE, 284 TokenType.PSEUDO_TYPE, 285 TokenType.SUPER, 286 TokenType.SERIAL, 287 TokenType.SMALLSERIAL, 288 TokenType.BIGSERIAL, 289 TokenType.XML, 290 TokenType.YEAR, 291 TokenType.UNIQUEIDENTIFIER, 292 TokenType.USERDEFINED, 293 TokenType.MONEY, 294 TokenType.SMALLMONEY, 295 TokenType.ROWVERSION, 296 TokenType.IMAGE, 297 TokenType.VARIANT, 298 TokenType.OBJECT, 299 TokenType.OBJECT_IDENTIFIER, 300 TokenType.INET, 301 TokenType.IPADDRESS, 302 TokenType.IPPREFIX, 303 TokenType.IPV4, 304 TokenType.IPV6, 305 TokenType.UNKNOWN, 306 TokenType.NULL, 307 TokenType.NAME, 308 TokenType.TDIGEST, 309 *ENUM_TYPE_TOKENS, 310 *NESTED_TYPE_TOKENS, 311 *AGGREGATE_TYPE_TOKENS, 312 } 313 314 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 315 TokenType.BIGINT: TokenType.UBIGINT, 316 TokenType.INT: TokenType.UINT, 317 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 318 TokenType.SMALLINT: TokenType.USMALLINT, 319 TokenType.TINYINT: TokenType.UTINYINT, 320 TokenType.DECIMAL: TokenType.UDECIMAL, 321 } 322 323 SUBQUERY_PREDICATES = { 324 TokenType.ANY: exp.Any, 325 TokenType.ALL: exp.All, 326 TokenType.EXISTS: exp.Exists, 327 TokenType.SOME: exp.Any, 328 } 329 330 RESERVED_TOKENS = { 331 *Tokenizer.SINGLE_TOKENS.values(), 332 TokenType.SELECT, 333 } - {TokenType.IDENTIFIER} 334 335 DB_CREATABLES = { 336 TokenType.DATABASE, 337 TokenType.DICTIONARY, 338 TokenType.MODEL, 339 TokenType.SCHEMA, 340 TokenType.SEQUENCE, 341 TokenType.STORAGE_INTEGRATION, 342 TokenType.TABLE, 343 TokenType.TAG, 344 TokenType.VIEW, 345 TokenType.WAREHOUSE, 346 TokenType.STREAMLIT, 347 } 348 349 CREATABLES = { 350 TokenType.COLUMN, 351 TokenType.CONSTRAINT, 352 TokenType.FOREIGN_KEY, 353 TokenType.FUNCTION, 354 TokenType.INDEX, 355 TokenType.PROCEDURE, 356 *DB_CREATABLES, 357 } 358 359 # Tokens that can represent identifiers 360 ID_VAR_TOKENS = { 361 TokenType.VAR, 362 TokenType.ANTI, 363 TokenType.APPLY, 364 TokenType.ASC, 365 TokenType.ASOF, 366 TokenType.AUTO_INCREMENT, 367 TokenType.BEGIN, 368 TokenType.BPCHAR, 369 TokenType.CACHE, 370 TokenType.CASE, 371 TokenType.COLLATE, 372 TokenType.COMMAND, 373 TokenType.COMMENT, 374 TokenType.COMMIT, 375 TokenType.CONSTRAINT, 376 TokenType.COPY, 377 TokenType.DEFAULT, 378 TokenType.DELETE, 379 TokenType.DESC, 380 TokenType.DESCRIBE, 381 TokenType.DICTIONARY, 382 TokenType.DIV, 383 TokenType.END, 384 TokenType.EXECUTE, 385 TokenType.ESCAPE, 386 TokenType.FALSE, 387 TokenType.FIRST, 388 TokenType.FILTER, 389 TokenType.FINAL, 390 TokenType.FORMAT, 391 TokenType.FULL, 392 TokenType.IDENTIFIER, 393 TokenType.IS, 394 TokenType.ISNULL, 395 TokenType.INTERVAL, 396 TokenType.KEEP, 397 TokenType.KILL, 398 TokenType.LEFT, 399 TokenType.LOAD, 400 TokenType.MERGE, 401 TokenType.NATURAL, 402 TokenType.NEXT, 403 TokenType.OFFSET, 404 TokenType.OPERATOR, 405 TokenType.ORDINALITY, 406 TokenType.OVERLAPS, 407 TokenType.OVERWRITE, 408 TokenType.PARTITION, 409 TokenType.PERCENT, 410 TokenType.PIVOT, 411 TokenType.PRAGMA, 412 TokenType.RANGE, 413 TokenType.RECURSIVE, 414 TokenType.REFERENCES, 415 TokenType.REFRESH, 416 TokenType.REPLACE, 417 TokenType.RIGHT, 418 TokenType.ROLLUP, 419 TokenType.ROW, 420 TokenType.ROWS, 421 TokenType.SEMI, 422 TokenType.SET, 423 TokenType.SETTINGS, 424 TokenType.SHOW, 425 TokenType.TEMPORARY, 426 TokenType.TOP, 427 TokenType.TRUE, 428 TokenType.TRUNCATE, 429 TokenType.UNIQUE, 430 TokenType.UNNEST, 431 TokenType.UNPIVOT, 432 TokenType.UPDATE, 433 TokenType.USE, 434 TokenType.VOLATILE, 435 TokenType.WINDOW, 436 *CREATABLES, 437 *SUBQUERY_PREDICATES, 438 *TYPE_TOKENS, 439 *NO_PAREN_FUNCTIONS, 440 } 441 442 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 443 444 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 445 TokenType.ANTI, 446 TokenType.APPLY, 447 TokenType.ASOF, 448 TokenType.FULL, 449 TokenType.LEFT, 450 TokenType.LOCK, 451 TokenType.NATURAL, 452 TokenType.OFFSET, 453 TokenType.RIGHT, 454 TokenType.SEMI, 455 TokenType.WINDOW, 456 } 457 458 ALIAS_TOKENS = ID_VAR_TOKENS 459 460 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 461 462 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 463 464 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 465 466 FUNC_TOKENS = { 467 TokenType.COLLATE, 468 TokenType.COMMAND, 469 TokenType.CURRENT_DATE, 470 TokenType.CURRENT_DATETIME, 471 TokenType.CURRENT_TIMESTAMP, 472 TokenType.CURRENT_TIME, 473 TokenType.CURRENT_USER, 474 TokenType.FILTER, 475 TokenType.FIRST, 476 TokenType.FORMAT, 477 TokenType.GLOB, 478 TokenType.IDENTIFIER, 479 TokenType.INDEX, 480 TokenType.ISNULL, 481 TokenType.ILIKE, 482 TokenType.INSERT, 483 TokenType.LIKE, 484 TokenType.MERGE, 485 TokenType.OFFSET, 486 TokenType.PRIMARY_KEY, 487 TokenType.RANGE, 488 TokenType.REPLACE, 489 TokenType.RLIKE, 490 TokenType.ROW, 491 TokenType.UNNEST, 492 TokenType.VAR, 493 TokenType.LEFT, 494 TokenType.RIGHT, 495 TokenType.SEQUENCE, 496 TokenType.DATE, 497 TokenType.DATETIME, 498 TokenType.TABLE, 499 TokenType.TIMESTAMP, 500 TokenType.TIMESTAMPTZ, 501 TokenType.TRUNCATE, 502 TokenType.WINDOW, 503 TokenType.XOR, 504 *TYPE_TOKENS, 505 *SUBQUERY_PREDICATES, 506 } 507 508 CONJUNCTION = { 509 TokenType.AND: exp.And, 510 TokenType.OR: exp.Or, 511 } 512 513 EQUALITY = { 514 TokenType.EQ: exp.EQ, 515 TokenType.NEQ: exp.NEQ, 516 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 517 } 518 519 COMPARISON = { 520 TokenType.GT: exp.GT, 521 TokenType.GTE: exp.GTE, 522 TokenType.LT: exp.LT, 523 TokenType.LTE: exp.LTE, 524 } 525 526 BITWISE = { 527 TokenType.AMP: exp.BitwiseAnd, 528 TokenType.CARET: exp.BitwiseXor, 529 TokenType.PIPE: exp.BitwiseOr, 530 } 531 532 TERM = { 533 TokenType.DASH: exp.Sub, 534 TokenType.PLUS: exp.Add, 535 TokenType.MOD: exp.Mod, 536 TokenType.COLLATE: exp.Collate, 537 } 538 539 FACTOR = { 540 TokenType.DIV: exp.IntDiv, 541 TokenType.LR_ARROW: exp.Distance, 542 TokenType.SLASH: exp.Div, 543 TokenType.STAR: exp.Mul, 544 } 545 546 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 547 548 TIMES = { 549 TokenType.TIME, 550 TokenType.TIMETZ, 551 } 552 553 TIMESTAMPS = { 554 TokenType.TIMESTAMP, 555 TokenType.TIMESTAMPTZ, 556 TokenType.TIMESTAMPLTZ, 557 *TIMES, 558 } 559 560 SET_OPERATIONS = { 561 TokenType.UNION, 562 TokenType.INTERSECT, 563 TokenType.EXCEPT, 564 } 565 566 JOIN_METHODS = { 567 TokenType.ASOF, 568 TokenType.NATURAL, 569 TokenType.POSITIONAL, 570 } 571 572 JOIN_SIDES = { 573 TokenType.LEFT, 574 TokenType.RIGHT, 575 TokenType.FULL, 576 } 577 578 JOIN_KINDS = { 579 TokenType.INNER, 580 TokenType.OUTER, 581 TokenType.CROSS, 582 TokenType.SEMI, 583 TokenType.ANTI, 584 } 585 586 JOIN_HINTS: t.Set[str] = set() 587 588 LAMBDAS = { 589 TokenType.ARROW: lambda self, expressions: self.expression( 590 exp.Lambda, 591 this=self._replace_lambda( 592 self._parse_conjunction(), 593 expressions, 594 ), 595 expressions=expressions, 596 ), 597 TokenType.FARROW: lambda self, expressions: self.expression( 598 exp.Kwarg, 599 this=exp.var(expressions[0].name), 600 expression=self._parse_conjunction(), 601 ), 602 } 603 604 COLUMN_OPERATORS = { 605 TokenType.DOT: None, 606 TokenType.DCOLON: lambda self, this, to: self.expression( 607 exp.Cast if self.STRICT_CAST else exp.TryCast, 608 this=this, 609 to=to, 610 ), 611 TokenType.ARROW: lambda self, this, path: self.expression( 612 exp.JSONExtract, 613 this=this, 614 expression=self.dialect.to_json_path(path), 615 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 616 ), 617 TokenType.DARROW: lambda self, this, path: self.expression( 618 exp.JSONExtractScalar, 619 this=this, 620 expression=self.dialect.to_json_path(path), 621 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 622 ), 623 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 624 exp.JSONBExtract, 625 this=this, 626 expression=path, 627 ), 628 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 629 exp.JSONBExtractScalar, 630 this=this, 631 expression=path, 632 ), 633 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 634 exp.JSONBContains, 635 this=this, 636 expression=key, 637 ), 638 } 639 640 EXPRESSION_PARSERS = { 641 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 642 exp.Column: lambda self: self._parse_column(), 643 exp.Condition: lambda self: self._parse_conjunction(), 644 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 645 exp.Expression: lambda self: self._parse_expression(), 646 exp.From: lambda self: self._parse_from(joins=True), 647 exp.Group: lambda self: self._parse_group(), 648 exp.Having: lambda self: self._parse_having(), 649 exp.Identifier: lambda self: self._parse_id_var(), 650 exp.Join: lambda self: self._parse_join(), 651 exp.Lambda: lambda self: self._parse_lambda(), 652 exp.Lateral: lambda self: self._parse_lateral(), 653 exp.Limit: lambda self: self._parse_limit(), 654 exp.Offset: lambda self: self._parse_offset(), 655 exp.Order: lambda self: self._parse_order(), 656 exp.Ordered: lambda self: self._parse_ordered(), 657 exp.Properties: lambda self: self._parse_properties(), 658 exp.Qualify: lambda self: self._parse_qualify(), 659 exp.Returning: lambda self: self._parse_returning(), 660 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 661 exp.Table: lambda self: self._parse_table_parts(), 662 exp.TableAlias: lambda self: self._parse_table_alias(), 663 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 664 exp.Where: lambda self: self._parse_where(), 665 exp.Window: lambda self: self._parse_named_window(), 666 exp.With: lambda self: self._parse_with(), 667 "JOIN_TYPE": lambda self: self._parse_join_parts(), 668 } 669 670 STATEMENT_PARSERS = { 671 TokenType.ALTER: lambda self: self._parse_alter(), 672 TokenType.BEGIN: lambda self: self._parse_transaction(), 673 TokenType.CACHE: lambda self: self._parse_cache(), 674 TokenType.COMMENT: lambda self: self._parse_comment(), 675 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 676 TokenType.COPY: lambda self: self._parse_copy(), 677 TokenType.CREATE: lambda self: self._parse_create(), 678 TokenType.DELETE: lambda self: self._parse_delete(), 679 TokenType.DESC: lambda self: self._parse_describe(), 680 TokenType.DESCRIBE: lambda self: self._parse_describe(), 681 TokenType.DROP: lambda self: self._parse_drop(), 682 TokenType.INSERT: lambda self: self._parse_insert(), 683 TokenType.KILL: lambda self: self._parse_kill(), 684 TokenType.LOAD: lambda self: self._parse_load(), 685 TokenType.MERGE: lambda self: self._parse_merge(), 686 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 687 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 688 TokenType.REFRESH: lambda self: self._parse_refresh(), 689 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 690 TokenType.SET: lambda self: self._parse_set(), 691 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 692 TokenType.UNCACHE: lambda self: self._parse_uncache(), 693 TokenType.UPDATE: lambda self: self._parse_update(), 694 TokenType.USE: lambda self: self.expression( 695 exp.Use, 696 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 697 this=self._parse_table(schema=False), 698 ), 699 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 700 } 701 702 UNARY_PARSERS = { 703 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 704 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 705 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 706 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 707 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 708 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 709 } 710 711 STRING_PARSERS = { 712 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 713 exp.RawString, this=token.text 714 ), 715 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 716 exp.National, this=token.text 717 ), 718 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 719 TokenType.STRING: lambda self, token: self.expression( 720 exp.Literal, this=token.text, is_string=True 721 ), 722 TokenType.UNICODE_STRING: lambda self, token: self.expression( 723 exp.UnicodeString, 724 this=token.text, 725 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 726 ), 727 } 728 729 NUMERIC_PARSERS = { 730 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 731 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 732 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 733 TokenType.NUMBER: lambda self, token: self.expression( 734 exp.Literal, this=token.text, is_string=False 735 ), 736 } 737 738 PRIMARY_PARSERS = { 739 **STRING_PARSERS, 740 **NUMERIC_PARSERS, 741 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 742 TokenType.NULL: lambda self, _: self.expression(exp.Null), 743 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 744 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 745 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 746 TokenType.STAR: lambda self, _: self.expression( 747 exp.Star, 748 **{ 749 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 750 "replace": self._parse_star_op("REPLACE"), 751 "rename": self._parse_star_op("RENAME"), 752 }, 753 ), 754 } 755 756 PLACEHOLDER_PARSERS = { 757 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 758 TokenType.PARAMETER: lambda self: self._parse_parameter(), 759 TokenType.COLON: lambda self: ( 760 self.expression(exp.Placeholder, this=self._prev.text) 761 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 762 else None 763 ), 764 } 765 766 RANGE_PARSERS = { 767 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 768 TokenType.GLOB: binary_range_parser(exp.Glob), 769 TokenType.ILIKE: binary_range_parser(exp.ILike), 770 TokenType.IN: lambda self, this: self._parse_in(this), 771 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 772 TokenType.IS: lambda self, this: self._parse_is(this), 773 TokenType.LIKE: binary_range_parser(exp.Like), 774 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 775 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 776 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 777 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 778 } 779 780 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 781 "ALLOWED_VALUES": lambda self: self.expression( 782 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 783 ), 784 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 785 "AUTO": lambda self: self._parse_auto_property(), 786 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 787 "BACKUP": lambda self: self.expression( 788 exp.BackupProperty, this=self._parse_var(any_token=True) 789 ), 790 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 791 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 792 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 793 "CHECKSUM": lambda self: self._parse_checksum(), 794 "CLUSTER BY": lambda self: self._parse_cluster(), 795 "CLUSTERED": lambda self: self._parse_clustered_by(), 796 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 797 exp.CollateProperty, **kwargs 798 ), 799 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 800 "CONTAINS": lambda self: self._parse_contains_property(), 801 "COPY": lambda self: self._parse_copy_property(), 802 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 803 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 804 "DEFINER": lambda self: self._parse_definer(), 805 "DETERMINISTIC": lambda self: self.expression( 806 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 807 ), 808 "DISTKEY": lambda self: self._parse_distkey(), 809 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 810 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 811 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 812 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 813 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 814 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 815 "FREESPACE": lambda self: self._parse_freespace(), 816 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 817 "HEAP": lambda self: self.expression(exp.HeapProperty), 818 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 819 "IMMUTABLE": lambda self: self.expression( 820 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 821 ), 822 "INHERITS": lambda self: self.expression( 823 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 824 ), 825 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 826 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 827 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 828 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 829 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 830 "LIKE": lambda self: self._parse_create_like(), 831 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 832 "LOCK": lambda self: self._parse_locking(), 833 "LOCKING": lambda self: self._parse_locking(), 834 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 835 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 836 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 837 "MODIFIES": lambda self: self._parse_modifies_property(), 838 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 839 "NO": lambda self: self._parse_no_property(), 840 "ON": lambda self: self._parse_on_property(), 841 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 842 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 843 "PARTITION": lambda self: self._parse_partitioned_of(), 844 "PARTITION BY": lambda self: self._parse_partitioned_by(), 845 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 846 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 847 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 848 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 849 "READS": lambda self: self._parse_reads_property(), 850 "REMOTE": lambda self: self._parse_remote_with_connection(), 851 "RETURNS": lambda self: self._parse_returns(), 852 "STRICT": lambda self: self.expression(exp.StrictProperty), 853 "ROW": lambda self: self._parse_row(), 854 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 855 "SAMPLE": lambda self: self.expression( 856 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 857 ), 858 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 859 "SETTINGS": lambda self: self.expression( 860 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 861 ), 862 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 863 "SORTKEY": lambda self: self._parse_sortkey(), 864 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 865 "STABLE": lambda self: self.expression( 866 exp.StabilityProperty, this=exp.Literal.string("STABLE") 867 ), 868 "STORED": lambda self: self._parse_stored(), 869 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 870 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 871 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 872 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 873 "TO": lambda self: self._parse_to_table(), 874 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 875 "TRANSFORM": lambda self: self.expression( 876 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 877 ), 878 "TTL": lambda self: self._parse_ttl(), 879 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 880 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 881 "VOLATILE": lambda self: self._parse_volatile_property(), 882 "WITH": lambda self: self._parse_with_property(), 883 } 884 885 CONSTRAINT_PARSERS = { 886 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 887 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 888 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 889 "CHARACTER SET": lambda self: self.expression( 890 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 891 ), 892 "CHECK": lambda self: self.expression( 893 exp.CheckColumnConstraint, 894 this=self._parse_wrapped(self._parse_conjunction), 895 enforced=self._match_text_seq("ENFORCED"), 896 ), 897 "COLLATE": lambda self: self.expression( 898 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 899 ), 900 "COMMENT": lambda self: self.expression( 901 exp.CommentColumnConstraint, this=self._parse_string() 902 ), 903 "COMPRESS": lambda self: self._parse_compress(), 904 "CLUSTERED": lambda self: self.expression( 905 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 906 ), 907 "NONCLUSTERED": lambda self: self.expression( 908 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 909 ), 910 "DEFAULT": lambda self: self.expression( 911 exp.DefaultColumnConstraint, this=self._parse_bitwise() 912 ), 913 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 914 "EPHEMERAL": lambda self: self.expression( 915 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 916 ), 917 "EXCLUDE": lambda self: self.expression( 918 exp.ExcludeColumnConstraint, this=self._parse_index_params() 919 ), 920 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 921 "FORMAT": lambda self: self.expression( 922 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 923 ), 924 "GENERATED": lambda self: self._parse_generated_as_identity(), 925 "IDENTITY": lambda self: self._parse_auto_increment(), 926 "INLINE": lambda self: self._parse_inline(), 927 "LIKE": lambda self: self._parse_create_like(), 928 "NOT": lambda self: self._parse_not_constraint(), 929 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 930 "ON": lambda self: ( 931 self._match(TokenType.UPDATE) 932 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 933 ) 934 or self.expression(exp.OnProperty, this=self._parse_id_var()), 935 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 936 "PERIOD": lambda self: self._parse_period_for_system_time(), 937 "PRIMARY KEY": lambda self: self._parse_primary_key(), 938 "REFERENCES": lambda self: self._parse_references(match=False), 939 "TITLE": lambda self: self.expression( 940 exp.TitleColumnConstraint, this=self._parse_var_or_string() 941 ), 942 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 943 "UNIQUE": lambda self: self._parse_unique(), 944 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 945 "WITH": lambda self: self.expression( 946 exp.Properties, expressions=self._parse_wrapped_properties() 947 ), 948 } 949 950 ALTER_PARSERS = { 951 "ADD": lambda self: self._parse_alter_table_add(), 952 "ALTER": lambda self: self._parse_alter_table_alter(), 953 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 954 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 955 "DROP": lambda self: self._parse_alter_table_drop(), 956 "RENAME": lambda self: self._parse_alter_table_rename(), 957 "SET": lambda self: self._parse_alter_table_set(), 958 } 959 960 ALTER_ALTER_PARSERS = { 961 "DISTKEY": lambda self: self._parse_alter_diststyle(), 962 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 963 "SORTKEY": lambda self: self._parse_alter_sortkey(), 964 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 965 } 966 967 SCHEMA_UNNAMED_CONSTRAINTS = { 968 "CHECK", 969 "EXCLUDE", 970 "FOREIGN KEY", 971 "LIKE", 972 "PERIOD", 973 "PRIMARY KEY", 974 "UNIQUE", 975 } 976 977 NO_PAREN_FUNCTION_PARSERS = { 978 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 979 "CASE": lambda self: self._parse_case(), 980 "IF": lambda self: self._parse_if(), 981 "NEXT": lambda self: self._parse_next_value_for(), 982 } 983 984 INVALID_FUNC_NAME_TOKENS = { 985 TokenType.IDENTIFIER, 986 TokenType.STRING, 987 } 988 989 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 990 991 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 992 993 FUNCTION_PARSERS = { 994 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 995 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 996 "DECODE": lambda self: self._parse_decode(), 997 "EXTRACT": lambda self: self._parse_extract(), 998 "JSON_OBJECT": lambda self: self._parse_json_object(), 999 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1000 "JSON_TABLE": lambda self: self._parse_json_table(), 1001 "MATCH": lambda self: self._parse_match_against(), 1002 "OPENJSON": lambda self: self._parse_open_json(), 1003 "POSITION": lambda self: self._parse_position(), 1004 "PREDICT": lambda self: self._parse_predict(), 1005 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1006 "STRING_AGG": lambda self: self._parse_string_agg(), 1007 "SUBSTRING": lambda self: self._parse_substring(), 1008 "TRIM": lambda self: self._parse_trim(), 1009 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1010 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1011 } 1012 1013 QUERY_MODIFIER_PARSERS = { 1014 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1015 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1016 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1017 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1018 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1019 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1020 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1021 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1022 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1023 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1024 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1025 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1026 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1027 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1028 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1029 TokenType.CLUSTER_BY: lambda self: ( 1030 "cluster", 1031 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1032 ), 1033 TokenType.DISTRIBUTE_BY: lambda self: ( 1034 "distribute", 1035 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1036 ), 1037 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1038 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1039 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1040 } 1041 1042 SET_PARSERS = { 1043 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1044 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1045 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1046 "TRANSACTION": lambda self: self._parse_set_transaction(), 1047 } 1048 1049 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1050 1051 TYPE_LITERAL_PARSERS = { 1052 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1053 } 1054 1055 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1056 1057 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1058 1059 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1060 1061 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1062 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1063 "ISOLATION": ( 1064 ("LEVEL", "REPEATABLE", "READ"), 1065 ("LEVEL", "READ", "COMMITTED"), 1066 ("LEVEL", "READ", "UNCOMITTED"), 1067 ("LEVEL", "SERIALIZABLE"), 1068 ), 1069 "READ": ("WRITE", "ONLY"), 1070 } 1071 1072 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1073 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1074 ) 1075 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1076 1077 CREATE_SEQUENCE: OPTIONS_TYPE = { 1078 "SCALE": ("EXTEND", "NOEXTEND"), 1079 "SHARD": ("EXTEND", "NOEXTEND"), 1080 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1081 **dict.fromkeys( 1082 ( 1083 "SESSION", 1084 "GLOBAL", 1085 "KEEP", 1086 "NOKEEP", 1087 "ORDER", 1088 "NOORDER", 1089 "NOCACHE", 1090 "CYCLE", 1091 "NOCYCLE", 1092 "NOMINVALUE", 1093 "NOMAXVALUE", 1094 "NOSCALE", 1095 "NOSHARD", 1096 ), 1097 tuple(), 1098 ), 1099 } 1100 1101 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1102 1103 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1104 1105 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1106 1107 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1108 1109 CLONE_KEYWORDS = {"CLONE", "COPY"} 1110 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1111 1112 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1113 1114 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1115 1116 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1117 1118 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1119 1120 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1121 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1122 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1123 1124 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1125 1126 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1127 1128 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1129 1130 DISTINCT_TOKENS = {TokenType.DISTINCT} 1131 1132 NULL_TOKENS = {TokenType.NULL} 1133 1134 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1135 1136 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1137 1138 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1139 1140 STRICT_CAST = True 1141 1142 PREFIXED_PIVOT_COLUMNS = False 1143 IDENTIFY_PIVOT_STRINGS = False 1144 1145 LOG_DEFAULTS_TO_LN = False 1146 1147 # Whether ADD is present for each column added by ALTER TABLE 1148 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1149 1150 # Whether the table sample clause expects CSV syntax 1151 TABLESAMPLE_CSV = False 1152 1153 # The default method used for table sampling 1154 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1155 1156 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1157 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1158 1159 # Whether the TRIM function expects the characters to trim as its first argument 1160 TRIM_PATTERN_FIRST = False 1161 1162 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1163 STRING_ALIASES = False 1164 1165 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1166 MODIFIERS_ATTACHED_TO_UNION = True 1167 UNION_MODIFIERS = {"order", "limit", "offset"} 1168 1169 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1170 NO_PAREN_IF_COMMANDS = True 1171 1172 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1173 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1174 1175 # Whether the `:` operator is used to extract a value from a JSON document 1176 COLON_IS_JSON_EXTRACT = False 1177 1178 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1179 # If this is True and '(' is not found, the keyword will be treated as an identifier 1180 VALUES_FOLLOWED_BY_PAREN = True 1181 1182 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1183 SUPPORTS_IMPLICIT_UNNEST = False 1184 1185 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1186 INTERVAL_SPANS = True 1187 1188 # Whether a PARTITION clause can follow a table reference 1189 SUPPORTS_PARTITION_SELECTION = False 1190 1191 __slots__ = ( 1192 "error_level", 1193 "error_message_context", 1194 "max_errors", 1195 "dialect", 1196 "sql", 1197 "errors", 1198 "_tokens", 1199 "_index", 1200 "_curr", 1201 "_next", 1202 "_prev", 1203 "_prev_comments", 1204 ) 1205 1206 # Autofilled 1207 SHOW_TRIE: t.Dict = {} 1208 SET_TRIE: t.Dict = {} 1209 1210 def __init__( 1211 self, 1212 error_level: t.Optional[ErrorLevel] = None, 1213 error_message_context: int = 100, 1214 max_errors: int = 3, 1215 dialect: DialectType = None, 1216 ): 1217 from sqlglot.dialects import Dialect 1218 1219 self.error_level = error_level or ErrorLevel.IMMEDIATE 1220 self.error_message_context = error_message_context 1221 self.max_errors = max_errors 1222 self.dialect = Dialect.get_or_raise(dialect) 1223 self.reset() 1224 1225 def reset(self): 1226 self.sql = "" 1227 self.errors = [] 1228 self._tokens = [] 1229 self._index = 0 1230 self._curr = None 1231 self._next = None 1232 self._prev = None 1233 self._prev_comments = None 1234 1235 def parse( 1236 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1237 ) -> t.List[t.Optional[exp.Expression]]: 1238 """ 1239 Parses a list of tokens and returns a list of syntax trees, one tree 1240 per parsed SQL statement. 1241 1242 Args: 1243 raw_tokens: The list of tokens. 1244 sql: The original SQL string, used to produce helpful debug messages. 1245 1246 Returns: 1247 The list of the produced syntax trees. 1248 """ 1249 return self._parse( 1250 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1251 ) 1252 1253 def parse_into( 1254 self, 1255 expression_types: exp.IntoType, 1256 raw_tokens: t.List[Token], 1257 sql: t.Optional[str] = None, 1258 ) -> t.List[t.Optional[exp.Expression]]: 1259 """ 1260 Parses a list of tokens into a given Expression type. If a collection of Expression 1261 types is given instead, this method will try to parse the token list into each one 1262 of them, stopping at the first for which the parsing succeeds. 1263 1264 Args: 1265 expression_types: The expression type(s) to try and parse the token list into. 1266 raw_tokens: The list of tokens. 1267 sql: The original SQL string, used to produce helpful debug messages. 1268 1269 Returns: 1270 The target Expression. 1271 """ 1272 errors = [] 1273 for expression_type in ensure_list(expression_types): 1274 parser = self.EXPRESSION_PARSERS.get(expression_type) 1275 if not parser: 1276 raise TypeError(f"No parser registered for {expression_type}") 1277 1278 try: 1279 return self._parse(parser, raw_tokens, sql) 1280 except ParseError as e: 1281 e.errors[0]["into_expression"] = expression_type 1282 errors.append(e) 1283 1284 raise ParseError( 1285 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1286 errors=merge_errors(errors), 1287 ) from errors[-1] 1288 1289 def _parse( 1290 self, 1291 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1292 raw_tokens: t.List[Token], 1293 sql: t.Optional[str] = None, 1294 ) -> t.List[t.Optional[exp.Expression]]: 1295 self.reset() 1296 self.sql = sql or "" 1297 1298 total = len(raw_tokens) 1299 chunks: t.List[t.List[Token]] = [[]] 1300 1301 for i, token in enumerate(raw_tokens): 1302 if token.token_type == TokenType.SEMICOLON: 1303 if token.comments: 1304 chunks.append([token]) 1305 1306 if i < total - 1: 1307 chunks.append([]) 1308 else: 1309 chunks[-1].append(token) 1310 1311 expressions = [] 1312 1313 for tokens in chunks: 1314 self._index = -1 1315 self._tokens = tokens 1316 self._advance() 1317 1318 expressions.append(parse_method(self)) 1319 1320 if self._index < len(self._tokens): 1321 self.raise_error("Invalid expression / Unexpected token") 1322 1323 self.check_errors() 1324 1325 return expressions 1326 1327 def check_errors(self) -> None: 1328 """Logs or raises any found errors, depending on the chosen error level setting.""" 1329 if self.error_level == ErrorLevel.WARN: 1330 for error in self.errors: 1331 logger.error(str(error)) 1332 elif self.error_level == ErrorLevel.RAISE and self.errors: 1333 raise ParseError( 1334 concat_messages(self.errors, self.max_errors), 1335 errors=merge_errors(self.errors), 1336 ) 1337 1338 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1339 """ 1340 Appends an error in the list of recorded errors or raises it, depending on the chosen 1341 error level setting. 1342 """ 1343 token = token or self._curr or self._prev or Token.string("") 1344 start = token.start 1345 end = token.end + 1 1346 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1347 highlight = self.sql[start:end] 1348 end_context = self.sql[end : end + self.error_message_context] 1349 1350 error = ParseError.new( 1351 f"{message}. Line {token.line}, Col: {token.col}.\n" 1352 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1353 description=message, 1354 line=token.line, 1355 col=token.col, 1356 start_context=start_context, 1357 highlight=highlight, 1358 end_context=end_context, 1359 ) 1360 1361 if self.error_level == ErrorLevel.IMMEDIATE: 1362 raise error 1363 1364 self.errors.append(error) 1365 1366 def expression( 1367 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1368 ) -> E: 1369 """ 1370 Creates a new, validated Expression. 1371 1372 Args: 1373 exp_class: The expression class to instantiate. 1374 comments: An optional list of comments to attach to the expression. 1375 kwargs: The arguments to set for the expression along with their respective values. 1376 1377 Returns: 1378 The target expression. 1379 """ 1380 instance = exp_class(**kwargs) 1381 instance.add_comments(comments) if comments else self._add_comments(instance) 1382 return self.validate_expression(instance) 1383 1384 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1385 if expression and self._prev_comments: 1386 expression.add_comments(self._prev_comments) 1387 self._prev_comments = None 1388 1389 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1390 """ 1391 Validates an Expression, making sure that all its mandatory arguments are set. 1392 1393 Args: 1394 expression: The expression to validate. 1395 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1396 1397 Returns: 1398 The validated expression. 1399 """ 1400 if self.error_level != ErrorLevel.IGNORE: 1401 for error_message in expression.error_messages(args): 1402 self.raise_error(error_message) 1403 1404 return expression 1405 1406 def _find_sql(self, start: Token, end: Token) -> str: 1407 return self.sql[start.start : end.end + 1] 1408 1409 def _is_connected(self) -> bool: 1410 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1411 1412 def _advance(self, times: int = 1) -> None: 1413 self._index += times 1414 self._curr = seq_get(self._tokens, self._index) 1415 self._next = seq_get(self._tokens, self._index + 1) 1416 1417 if self._index > 0: 1418 self._prev = self._tokens[self._index - 1] 1419 self._prev_comments = self._prev.comments 1420 else: 1421 self._prev = None 1422 self._prev_comments = None 1423 1424 def _retreat(self, index: int) -> None: 1425 if index != self._index: 1426 self._advance(index - self._index) 1427 1428 def _warn_unsupported(self) -> None: 1429 if len(self._tokens) <= 1: 1430 return 1431 1432 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1433 # interested in emitting a warning for the one being currently processed. 1434 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1435 1436 logger.warning( 1437 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1438 ) 1439 1440 def _parse_command(self) -> exp.Command: 1441 self._warn_unsupported() 1442 return self.expression( 1443 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1444 ) 1445 1446 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1447 """ 1448 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1449 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1450 the parser state accordingly 1451 """ 1452 index = self._index 1453 error_level = self.error_level 1454 1455 self.error_level = ErrorLevel.IMMEDIATE 1456 try: 1457 this = parse_method() 1458 except ParseError: 1459 this = None 1460 finally: 1461 if not this or retreat: 1462 self._retreat(index) 1463 self.error_level = error_level 1464 1465 return this 1466 1467 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1468 start = self._prev 1469 exists = self._parse_exists() if allow_exists else None 1470 1471 self._match(TokenType.ON) 1472 1473 materialized = self._match_text_seq("MATERIALIZED") 1474 kind = self._match_set(self.CREATABLES) and self._prev 1475 if not kind: 1476 return self._parse_as_command(start) 1477 1478 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1479 this = self._parse_user_defined_function(kind=kind.token_type) 1480 elif kind.token_type == TokenType.TABLE: 1481 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1482 elif kind.token_type == TokenType.COLUMN: 1483 this = self._parse_column() 1484 else: 1485 this = self._parse_id_var() 1486 1487 self._match(TokenType.IS) 1488 1489 return self.expression( 1490 exp.Comment, 1491 this=this, 1492 kind=kind.text, 1493 expression=self._parse_string(), 1494 exists=exists, 1495 materialized=materialized, 1496 ) 1497 1498 def _parse_to_table( 1499 self, 1500 ) -> exp.ToTableProperty: 1501 table = self._parse_table_parts(schema=True) 1502 return self.expression(exp.ToTableProperty, this=table) 1503 1504 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1505 def _parse_ttl(self) -> exp.Expression: 1506 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1507 this = self._parse_bitwise() 1508 1509 if self._match_text_seq("DELETE"): 1510 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1511 if self._match_text_seq("RECOMPRESS"): 1512 return self.expression( 1513 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1514 ) 1515 if self._match_text_seq("TO", "DISK"): 1516 return self.expression( 1517 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1518 ) 1519 if self._match_text_seq("TO", "VOLUME"): 1520 return self.expression( 1521 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1522 ) 1523 1524 return this 1525 1526 expressions = self._parse_csv(_parse_ttl_action) 1527 where = self._parse_where() 1528 group = self._parse_group() 1529 1530 aggregates = None 1531 if group and self._match(TokenType.SET): 1532 aggregates = self._parse_csv(self._parse_set_item) 1533 1534 return self.expression( 1535 exp.MergeTreeTTL, 1536 expressions=expressions, 1537 where=where, 1538 group=group, 1539 aggregates=aggregates, 1540 ) 1541 1542 def _parse_statement(self) -> t.Optional[exp.Expression]: 1543 if self._curr is None: 1544 return None 1545 1546 if self._match_set(self.STATEMENT_PARSERS): 1547 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1548 1549 if self._match_set(self.dialect.tokenizer.COMMANDS): 1550 return self._parse_command() 1551 1552 expression = self._parse_expression() 1553 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1554 return self._parse_query_modifiers(expression) 1555 1556 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1557 start = self._prev 1558 temporary = self._match(TokenType.TEMPORARY) 1559 materialized = self._match_text_seq("MATERIALIZED") 1560 1561 kind = self._match_set(self.CREATABLES) and self._prev.text 1562 if not kind: 1563 return self._parse_as_command(start) 1564 1565 if_exists = exists or self._parse_exists() 1566 table = self._parse_table_parts( 1567 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1568 ) 1569 1570 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1571 1572 if self._match(TokenType.L_PAREN, advance=False): 1573 expressions = self._parse_wrapped_csv(self._parse_types) 1574 else: 1575 expressions = None 1576 1577 return self.expression( 1578 exp.Drop, 1579 comments=start.comments, 1580 exists=if_exists, 1581 this=table, 1582 expressions=expressions, 1583 kind=kind.upper(), 1584 temporary=temporary, 1585 materialized=materialized, 1586 cascade=self._match_text_seq("CASCADE"), 1587 constraints=self._match_text_seq("CONSTRAINTS"), 1588 purge=self._match_text_seq("PURGE"), 1589 cluster=cluster, 1590 ) 1591 1592 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1593 return ( 1594 self._match_text_seq("IF") 1595 and (not not_ or self._match(TokenType.NOT)) 1596 and self._match(TokenType.EXISTS) 1597 ) 1598 1599 def _parse_create(self) -> exp.Create | exp.Command: 1600 # Note: this can't be None because we've matched a statement parser 1601 start = self._prev 1602 comments = self._prev_comments 1603 1604 replace = ( 1605 start.token_type == TokenType.REPLACE 1606 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1607 or self._match_pair(TokenType.OR, TokenType.ALTER) 1608 ) 1609 1610 unique = self._match(TokenType.UNIQUE) 1611 1612 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1613 self._advance() 1614 1615 properties = None 1616 create_token = self._match_set(self.CREATABLES) and self._prev 1617 1618 if not create_token: 1619 # exp.Properties.Location.POST_CREATE 1620 properties = self._parse_properties() 1621 create_token = self._match_set(self.CREATABLES) and self._prev 1622 1623 if not properties or not create_token: 1624 return self._parse_as_command(start) 1625 1626 exists = self._parse_exists(not_=True) 1627 this = None 1628 expression: t.Optional[exp.Expression] = None 1629 indexes = None 1630 no_schema_binding = None 1631 begin = None 1632 end = None 1633 clone = None 1634 1635 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1636 nonlocal properties 1637 if properties and temp_props: 1638 properties.expressions.extend(temp_props.expressions) 1639 elif temp_props: 1640 properties = temp_props 1641 1642 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1643 this = self._parse_user_defined_function(kind=create_token.token_type) 1644 1645 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1646 extend_props(self._parse_properties()) 1647 1648 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1649 extend_props(self._parse_properties()) 1650 1651 if not expression: 1652 if self._match(TokenType.COMMAND): 1653 expression = self._parse_as_command(self._prev) 1654 else: 1655 begin = self._match(TokenType.BEGIN) 1656 return_ = self._match_text_seq("RETURN") 1657 1658 if self._match(TokenType.STRING, advance=False): 1659 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1660 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1661 expression = self._parse_string() 1662 extend_props(self._parse_properties()) 1663 else: 1664 expression = self._parse_statement() 1665 1666 end = self._match_text_seq("END") 1667 1668 if return_: 1669 expression = self.expression(exp.Return, this=expression) 1670 elif create_token.token_type == TokenType.INDEX: 1671 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1672 if not self._match(TokenType.ON): 1673 index = self._parse_id_var() 1674 anonymous = False 1675 else: 1676 index = None 1677 anonymous = True 1678 1679 this = self._parse_index(index=index, anonymous=anonymous) 1680 elif create_token.token_type in self.DB_CREATABLES: 1681 table_parts = self._parse_table_parts( 1682 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1683 ) 1684 1685 # exp.Properties.Location.POST_NAME 1686 self._match(TokenType.COMMA) 1687 extend_props(self._parse_properties(before=True)) 1688 1689 this = self._parse_schema(this=table_parts) 1690 1691 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1692 extend_props(self._parse_properties()) 1693 1694 self._match(TokenType.ALIAS) 1695 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1696 # exp.Properties.Location.POST_ALIAS 1697 extend_props(self._parse_properties()) 1698 1699 if create_token.token_type == TokenType.SEQUENCE: 1700 expression = self._parse_types() 1701 extend_props(self._parse_properties()) 1702 else: 1703 expression = self._parse_ddl_select() 1704 1705 if create_token.token_type == TokenType.TABLE: 1706 # exp.Properties.Location.POST_EXPRESSION 1707 extend_props(self._parse_properties()) 1708 1709 indexes = [] 1710 while True: 1711 index = self._parse_index() 1712 1713 # exp.Properties.Location.POST_INDEX 1714 extend_props(self._parse_properties()) 1715 1716 if not index: 1717 break 1718 else: 1719 self._match(TokenType.COMMA) 1720 indexes.append(index) 1721 elif create_token.token_type == TokenType.VIEW: 1722 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1723 no_schema_binding = True 1724 1725 shallow = self._match_text_seq("SHALLOW") 1726 1727 if self._match_texts(self.CLONE_KEYWORDS): 1728 copy = self._prev.text.lower() == "copy" 1729 clone = self.expression( 1730 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1731 ) 1732 1733 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1734 return self._parse_as_command(start) 1735 1736 return self.expression( 1737 exp.Create, 1738 comments=comments, 1739 this=this, 1740 kind=create_token.text.upper(), 1741 replace=replace, 1742 unique=unique, 1743 expression=expression, 1744 exists=exists, 1745 properties=properties, 1746 indexes=indexes, 1747 no_schema_binding=no_schema_binding, 1748 begin=begin, 1749 end=end, 1750 clone=clone, 1751 ) 1752 1753 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1754 seq = exp.SequenceProperties() 1755 1756 options = [] 1757 index = self._index 1758 1759 while self._curr: 1760 self._match(TokenType.COMMA) 1761 if self._match_text_seq("INCREMENT"): 1762 self._match_text_seq("BY") 1763 self._match_text_seq("=") 1764 seq.set("increment", self._parse_term()) 1765 elif self._match_text_seq("MINVALUE"): 1766 seq.set("minvalue", self._parse_term()) 1767 elif self._match_text_seq("MAXVALUE"): 1768 seq.set("maxvalue", self._parse_term()) 1769 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1770 self._match_text_seq("=") 1771 seq.set("start", self._parse_term()) 1772 elif self._match_text_seq("CACHE"): 1773 # T-SQL allows empty CACHE which is initialized dynamically 1774 seq.set("cache", self._parse_number() or True) 1775 elif self._match_text_seq("OWNED", "BY"): 1776 # "OWNED BY NONE" is the default 1777 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1778 else: 1779 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1780 if opt: 1781 options.append(opt) 1782 else: 1783 break 1784 1785 seq.set("options", options if options else None) 1786 return None if self._index == index else seq 1787 1788 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1789 # only used for teradata currently 1790 self._match(TokenType.COMMA) 1791 1792 kwargs = { 1793 "no": self._match_text_seq("NO"), 1794 "dual": self._match_text_seq("DUAL"), 1795 "before": self._match_text_seq("BEFORE"), 1796 "default": self._match_text_seq("DEFAULT"), 1797 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1798 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1799 "after": self._match_text_seq("AFTER"), 1800 "minimum": self._match_texts(("MIN", "MINIMUM")), 1801 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1802 } 1803 1804 if self._match_texts(self.PROPERTY_PARSERS): 1805 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1806 try: 1807 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1808 except TypeError: 1809 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1810 1811 return None 1812 1813 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1814 return self._parse_wrapped_csv(self._parse_property) 1815 1816 def _parse_property(self) -> t.Optional[exp.Expression]: 1817 if self._match_texts(self.PROPERTY_PARSERS): 1818 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1819 1820 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1821 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1822 1823 if self._match_text_seq("COMPOUND", "SORTKEY"): 1824 return self._parse_sortkey(compound=True) 1825 1826 if self._match_text_seq("SQL", "SECURITY"): 1827 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1828 1829 index = self._index 1830 key = self._parse_column() 1831 1832 if not self._match(TokenType.EQ): 1833 self._retreat(index) 1834 return self._parse_sequence_properties() 1835 1836 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1837 if isinstance(key, exp.Column): 1838 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1839 1840 value = self._parse_bitwise() or self._parse_var(any_token=True) 1841 1842 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1843 if isinstance(value, exp.Column): 1844 value = exp.var(value.name) 1845 1846 return self.expression(exp.Property, this=key, value=value) 1847 1848 def _parse_stored(self) -> exp.FileFormatProperty: 1849 self._match(TokenType.ALIAS) 1850 1851 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1852 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1853 1854 return self.expression( 1855 exp.FileFormatProperty, 1856 this=( 1857 self.expression( 1858 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1859 ) 1860 if input_format or output_format 1861 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1862 ), 1863 ) 1864 1865 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1866 field = self._parse_field() 1867 if isinstance(field, exp.Identifier) and not field.quoted: 1868 field = exp.var(field) 1869 1870 return field 1871 1872 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1873 self._match(TokenType.EQ) 1874 self._match(TokenType.ALIAS) 1875 1876 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1877 1878 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1879 properties = [] 1880 while True: 1881 if before: 1882 prop = self._parse_property_before() 1883 else: 1884 prop = self._parse_property() 1885 if not prop: 1886 break 1887 for p in ensure_list(prop): 1888 properties.append(p) 1889 1890 if properties: 1891 return self.expression(exp.Properties, expressions=properties) 1892 1893 return None 1894 1895 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1896 return self.expression( 1897 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1898 ) 1899 1900 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1901 if self._index >= 2: 1902 pre_volatile_token = self._tokens[self._index - 2] 1903 else: 1904 pre_volatile_token = None 1905 1906 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1907 return exp.VolatileProperty() 1908 1909 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1910 1911 def _parse_retention_period(self) -> exp.Var: 1912 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1913 number = self._parse_number() 1914 number_str = f"{number} " if number else "" 1915 unit = self._parse_var(any_token=True) 1916 return exp.var(f"{number_str}{unit}") 1917 1918 def _parse_system_versioning_property( 1919 self, with_: bool = False 1920 ) -> exp.WithSystemVersioningProperty: 1921 self._match(TokenType.EQ) 1922 prop = self.expression( 1923 exp.WithSystemVersioningProperty, 1924 **{ # type: ignore 1925 "on": True, 1926 "with": with_, 1927 }, 1928 ) 1929 1930 if self._match_text_seq("OFF"): 1931 prop.set("on", False) 1932 return prop 1933 1934 self._match(TokenType.ON) 1935 if self._match(TokenType.L_PAREN): 1936 while self._curr and not self._match(TokenType.R_PAREN): 1937 if self._match_text_seq("HISTORY_TABLE", "="): 1938 prop.set("this", self._parse_table_parts()) 1939 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1940 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1941 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1942 prop.set("retention_period", self._parse_retention_period()) 1943 1944 self._match(TokenType.COMMA) 1945 1946 return prop 1947 1948 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1949 self._match(TokenType.EQ) 1950 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1951 prop = self.expression(exp.DataDeletionProperty, on=on) 1952 1953 if self._match(TokenType.L_PAREN): 1954 while self._curr and not self._match(TokenType.R_PAREN): 1955 if self._match_text_seq("FILTER_COLUMN", "="): 1956 prop.set("filter_column", self._parse_column()) 1957 elif self._match_text_seq("RETENTION_PERIOD", "="): 1958 prop.set("retention_period", self._parse_retention_period()) 1959 1960 self._match(TokenType.COMMA) 1961 1962 return prop 1963 1964 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1965 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1966 prop = self._parse_system_versioning_property(with_=True) 1967 self._match_r_paren() 1968 return prop 1969 1970 if self._match(TokenType.L_PAREN, advance=False): 1971 return self._parse_wrapped_properties() 1972 1973 if self._match_text_seq("JOURNAL"): 1974 return self._parse_withjournaltable() 1975 1976 if self._match_texts(self.VIEW_ATTRIBUTES): 1977 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1978 1979 if self._match_text_seq("DATA"): 1980 return self._parse_withdata(no=False) 1981 elif self._match_text_seq("NO", "DATA"): 1982 return self._parse_withdata(no=True) 1983 1984 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1985 return self._parse_serde_properties(with_=True) 1986 1987 if not self._next: 1988 return None 1989 1990 return self._parse_withisolatedloading() 1991 1992 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1993 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1994 self._match(TokenType.EQ) 1995 1996 user = self._parse_id_var() 1997 self._match(TokenType.PARAMETER) 1998 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1999 2000 if not user or not host: 2001 return None 2002 2003 return exp.DefinerProperty(this=f"{user}@{host}") 2004 2005 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2006 self._match(TokenType.TABLE) 2007 self._match(TokenType.EQ) 2008 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2009 2010 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2011 return self.expression(exp.LogProperty, no=no) 2012 2013 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2014 return self.expression(exp.JournalProperty, **kwargs) 2015 2016 def _parse_checksum(self) -> exp.ChecksumProperty: 2017 self._match(TokenType.EQ) 2018 2019 on = None 2020 if self._match(TokenType.ON): 2021 on = True 2022 elif self._match_text_seq("OFF"): 2023 on = False 2024 2025 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2026 2027 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2028 return self.expression( 2029 exp.Cluster, 2030 expressions=( 2031 self._parse_wrapped_csv(self._parse_ordered) 2032 if wrapped 2033 else self._parse_csv(self._parse_ordered) 2034 ), 2035 ) 2036 2037 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2038 self._match_text_seq("BY") 2039 2040 self._match_l_paren() 2041 expressions = self._parse_csv(self._parse_column) 2042 self._match_r_paren() 2043 2044 if self._match_text_seq("SORTED", "BY"): 2045 self._match_l_paren() 2046 sorted_by = self._parse_csv(self._parse_ordered) 2047 self._match_r_paren() 2048 else: 2049 sorted_by = None 2050 2051 self._match(TokenType.INTO) 2052 buckets = self._parse_number() 2053 self._match_text_seq("BUCKETS") 2054 2055 return self.expression( 2056 exp.ClusteredByProperty, 2057 expressions=expressions, 2058 sorted_by=sorted_by, 2059 buckets=buckets, 2060 ) 2061 2062 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2063 if not self._match_text_seq("GRANTS"): 2064 self._retreat(self._index - 1) 2065 return None 2066 2067 return self.expression(exp.CopyGrantsProperty) 2068 2069 def _parse_freespace(self) -> exp.FreespaceProperty: 2070 self._match(TokenType.EQ) 2071 return self.expression( 2072 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2073 ) 2074 2075 def _parse_mergeblockratio( 2076 self, no: bool = False, default: bool = False 2077 ) -> exp.MergeBlockRatioProperty: 2078 if self._match(TokenType.EQ): 2079 return self.expression( 2080 exp.MergeBlockRatioProperty, 2081 this=self._parse_number(), 2082 percent=self._match(TokenType.PERCENT), 2083 ) 2084 2085 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2086 2087 def _parse_datablocksize( 2088 self, 2089 default: t.Optional[bool] = None, 2090 minimum: t.Optional[bool] = None, 2091 maximum: t.Optional[bool] = None, 2092 ) -> exp.DataBlocksizeProperty: 2093 self._match(TokenType.EQ) 2094 size = self._parse_number() 2095 2096 units = None 2097 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2098 units = self._prev.text 2099 2100 return self.expression( 2101 exp.DataBlocksizeProperty, 2102 size=size, 2103 units=units, 2104 default=default, 2105 minimum=minimum, 2106 maximum=maximum, 2107 ) 2108 2109 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2110 self._match(TokenType.EQ) 2111 always = self._match_text_seq("ALWAYS") 2112 manual = self._match_text_seq("MANUAL") 2113 never = self._match_text_seq("NEVER") 2114 default = self._match_text_seq("DEFAULT") 2115 2116 autotemp = None 2117 if self._match_text_seq("AUTOTEMP"): 2118 autotemp = self._parse_schema() 2119 2120 return self.expression( 2121 exp.BlockCompressionProperty, 2122 always=always, 2123 manual=manual, 2124 never=never, 2125 default=default, 2126 autotemp=autotemp, 2127 ) 2128 2129 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2130 index = self._index 2131 no = self._match_text_seq("NO") 2132 concurrent = self._match_text_seq("CONCURRENT") 2133 2134 if not self._match_text_seq("ISOLATED", "LOADING"): 2135 self._retreat(index) 2136 return None 2137 2138 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2139 return self.expression( 2140 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2141 ) 2142 2143 def _parse_locking(self) -> exp.LockingProperty: 2144 if self._match(TokenType.TABLE): 2145 kind = "TABLE" 2146 elif self._match(TokenType.VIEW): 2147 kind = "VIEW" 2148 elif self._match(TokenType.ROW): 2149 kind = "ROW" 2150 elif self._match_text_seq("DATABASE"): 2151 kind = "DATABASE" 2152 else: 2153 kind = None 2154 2155 if kind in ("DATABASE", "TABLE", "VIEW"): 2156 this = self._parse_table_parts() 2157 else: 2158 this = None 2159 2160 if self._match(TokenType.FOR): 2161 for_or_in = "FOR" 2162 elif self._match(TokenType.IN): 2163 for_or_in = "IN" 2164 else: 2165 for_or_in = None 2166 2167 if self._match_text_seq("ACCESS"): 2168 lock_type = "ACCESS" 2169 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2170 lock_type = "EXCLUSIVE" 2171 elif self._match_text_seq("SHARE"): 2172 lock_type = "SHARE" 2173 elif self._match_text_seq("READ"): 2174 lock_type = "READ" 2175 elif self._match_text_seq("WRITE"): 2176 lock_type = "WRITE" 2177 elif self._match_text_seq("CHECKSUM"): 2178 lock_type = "CHECKSUM" 2179 else: 2180 lock_type = None 2181 2182 override = self._match_text_seq("OVERRIDE") 2183 2184 return self.expression( 2185 exp.LockingProperty, 2186 this=this, 2187 kind=kind, 2188 for_or_in=for_or_in, 2189 lock_type=lock_type, 2190 override=override, 2191 ) 2192 2193 def _parse_partition_by(self) -> t.List[exp.Expression]: 2194 if self._match(TokenType.PARTITION_BY): 2195 return self._parse_csv(self._parse_conjunction) 2196 return [] 2197 2198 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2199 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2200 if self._match_text_seq("MINVALUE"): 2201 return exp.var("MINVALUE") 2202 if self._match_text_seq("MAXVALUE"): 2203 return exp.var("MAXVALUE") 2204 return self._parse_bitwise() 2205 2206 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2207 expression = None 2208 from_expressions = None 2209 to_expressions = None 2210 2211 if self._match(TokenType.IN): 2212 this = self._parse_wrapped_csv(self._parse_bitwise) 2213 elif self._match(TokenType.FROM): 2214 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2215 self._match_text_seq("TO") 2216 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2217 elif self._match_text_seq("WITH", "(", "MODULUS"): 2218 this = self._parse_number() 2219 self._match_text_seq(",", "REMAINDER") 2220 expression = self._parse_number() 2221 self._match_r_paren() 2222 else: 2223 self.raise_error("Failed to parse partition bound spec.") 2224 2225 return self.expression( 2226 exp.PartitionBoundSpec, 2227 this=this, 2228 expression=expression, 2229 from_expressions=from_expressions, 2230 to_expressions=to_expressions, 2231 ) 2232 2233 # https://www.postgresql.org/docs/current/sql-createtable.html 2234 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2235 if not self._match_text_seq("OF"): 2236 self._retreat(self._index - 1) 2237 return None 2238 2239 this = self._parse_table(schema=True) 2240 2241 if self._match(TokenType.DEFAULT): 2242 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2243 elif self._match_text_seq("FOR", "VALUES"): 2244 expression = self._parse_partition_bound_spec() 2245 else: 2246 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2247 2248 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2249 2250 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2251 self._match(TokenType.EQ) 2252 return self.expression( 2253 exp.PartitionedByProperty, 2254 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2255 ) 2256 2257 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2258 if self._match_text_seq("AND", "STATISTICS"): 2259 statistics = True 2260 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2261 statistics = False 2262 else: 2263 statistics = None 2264 2265 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2266 2267 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2268 if self._match_text_seq("SQL"): 2269 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2270 return None 2271 2272 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2273 if self._match_text_seq("SQL", "DATA"): 2274 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2275 return None 2276 2277 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2278 if self._match_text_seq("PRIMARY", "INDEX"): 2279 return exp.NoPrimaryIndexProperty() 2280 if self._match_text_seq("SQL"): 2281 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2282 return None 2283 2284 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2285 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2286 return exp.OnCommitProperty() 2287 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2288 return exp.OnCommitProperty(delete=True) 2289 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2290 2291 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2292 if self._match_text_seq("SQL", "DATA"): 2293 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2294 return None 2295 2296 def _parse_distkey(self) -> exp.DistKeyProperty: 2297 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2298 2299 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2300 table = self._parse_table(schema=True) 2301 2302 options = [] 2303 while self._match_texts(("INCLUDING", "EXCLUDING")): 2304 this = self._prev.text.upper() 2305 2306 id_var = self._parse_id_var() 2307 if not id_var: 2308 return None 2309 2310 options.append( 2311 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2312 ) 2313 2314 return self.expression(exp.LikeProperty, this=table, expressions=options) 2315 2316 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2317 return self.expression( 2318 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2319 ) 2320 2321 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2322 self._match(TokenType.EQ) 2323 return self.expression( 2324 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2325 ) 2326 2327 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2328 self._match_text_seq("WITH", "CONNECTION") 2329 return self.expression( 2330 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2331 ) 2332 2333 def _parse_returns(self) -> exp.ReturnsProperty: 2334 value: t.Optional[exp.Expression] 2335 null = None 2336 is_table = self._match(TokenType.TABLE) 2337 2338 if is_table: 2339 if self._match(TokenType.LT): 2340 value = self.expression( 2341 exp.Schema, 2342 this="TABLE", 2343 expressions=self._parse_csv(self._parse_struct_types), 2344 ) 2345 if not self._match(TokenType.GT): 2346 self.raise_error("Expecting >") 2347 else: 2348 value = self._parse_schema(exp.var("TABLE")) 2349 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2350 null = True 2351 value = None 2352 else: 2353 value = self._parse_types() 2354 2355 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2356 2357 def _parse_describe(self) -> exp.Describe: 2358 kind = self._match_set(self.CREATABLES) and self._prev.text 2359 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2360 if self._match(TokenType.DOT): 2361 style = None 2362 self._retreat(self._index - 2) 2363 this = self._parse_table(schema=True) 2364 properties = self._parse_properties() 2365 expressions = properties.expressions if properties else None 2366 return self.expression( 2367 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2368 ) 2369 2370 def _parse_insert(self) -> exp.Insert: 2371 comments = ensure_list(self._prev_comments) 2372 hint = self._parse_hint() 2373 overwrite = self._match(TokenType.OVERWRITE) 2374 ignore = self._match(TokenType.IGNORE) 2375 local = self._match_text_seq("LOCAL") 2376 alternative = None 2377 is_function = None 2378 2379 if self._match_text_seq("DIRECTORY"): 2380 this: t.Optional[exp.Expression] = self.expression( 2381 exp.Directory, 2382 this=self._parse_var_or_string(), 2383 local=local, 2384 row_format=self._parse_row_format(match_row=True), 2385 ) 2386 else: 2387 if self._match(TokenType.OR): 2388 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2389 2390 self._match(TokenType.INTO) 2391 comments += ensure_list(self._prev_comments) 2392 self._match(TokenType.TABLE) 2393 is_function = self._match(TokenType.FUNCTION) 2394 2395 this = ( 2396 self._parse_table(schema=True, parse_partition=True) 2397 if not is_function 2398 else self._parse_function() 2399 ) 2400 2401 returning = self._parse_returning() 2402 2403 return self.expression( 2404 exp.Insert, 2405 comments=comments, 2406 hint=hint, 2407 is_function=is_function, 2408 this=this, 2409 stored=self._match_text_seq("STORED") and self._parse_stored(), 2410 by_name=self._match_text_seq("BY", "NAME"), 2411 exists=self._parse_exists(), 2412 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2413 and self._parse_conjunction(), 2414 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2415 conflict=self._parse_on_conflict(), 2416 returning=returning or self._parse_returning(), 2417 overwrite=overwrite, 2418 alternative=alternative, 2419 ignore=ignore, 2420 ) 2421 2422 def _parse_kill(self) -> exp.Kill: 2423 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2424 2425 return self.expression( 2426 exp.Kill, 2427 this=self._parse_primary(), 2428 kind=kind, 2429 ) 2430 2431 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2432 conflict = self._match_text_seq("ON", "CONFLICT") 2433 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2434 2435 if not conflict and not duplicate: 2436 return None 2437 2438 conflict_keys = None 2439 constraint = None 2440 2441 if conflict: 2442 if self._match_text_seq("ON", "CONSTRAINT"): 2443 constraint = self._parse_id_var() 2444 elif self._match(TokenType.L_PAREN): 2445 conflict_keys = self._parse_csv(self._parse_id_var) 2446 self._match_r_paren() 2447 2448 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2449 if self._prev.token_type == TokenType.UPDATE: 2450 self._match(TokenType.SET) 2451 expressions = self._parse_csv(self._parse_equality) 2452 else: 2453 expressions = None 2454 2455 return self.expression( 2456 exp.OnConflict, 2457 duplicate=duplicate, 2458 expressions=expressions, 2459 action=action, 2460 conflict_keys=conflict_keys, 2461 constraint=constraint, 2462 ) 2463 2464 def _parse_returning(self) -> t.Optional[exp.Returning]: 2465 if not self._match(TokenType.RETURNING): 2466 return None 2467 return self.expression( 2468 exp.Returning, 2469 expressions=self._parse_csv(self._parse_expression), 2470 into=self._match(TokenType.INTO) and self._parse_table_part(), 2471 ) 2472 2473 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2474 if not self._match(TokenType.FORMAT): 2475 return None 2476 return self._parse_row_format() 2477 2478 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2479 index = self._index 2480 with_ = with_ or self._match_text_seq("WITH") 2481 2482 if not self._match(TokenType.SERDE_PROPERTIES): 2483 self._retreat(index) 2484 return None 2485 return self.expression( 2486 exp.SerdeProperties, 2487 **{ # type: ignore 2488 "expressions": self._parse_wrapped_properties(), 2489 "with": with_, 2490 }, 2491 ) 2492 2493 def _parse_row_format( 2494 self, match_row: bool = False 2495 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2496 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2497 return None 2498 2499 if self._match_text_seq("SERDE"): 2500 this = self._parse_string() 2501 2502 serde_properties = self._parse_serde_properties() 2503 2504 return self.expression( 2505 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2506 ) 2507 2508 self._match_text_seq("DELIMITED") 2509 2510 kwargs = {} 2511 2512 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2513 kwargs["fields"] = self._parse_string() 2514 if self._match_text_seq("ESCAPED", "BY"): 2515 kwargs["escaped"] = self._parse_string() 2516 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2517 kwargs["collection_items"] = self._parse_string() 2518 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2519 kwargs["map_keys"] = self._parse_string() 2520 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2521 kwargs["lines"] = self._parse_string() 2522 if self._match_text_seq("NULL", "DEFINED", "AS"): 2523 kwargs["null"] = self._parse_string() 2524 2525 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2526 2527 def _parse_load(self) -> exp.LoadData | exp.Command: 2528 if self._match_text_seq("DATA"): 2529 local = self._match_text_seq("LOCAL") 2530 self._match_text_seq("INPATH") 2531 inpath = self._parse_string() 2532 overwrite = self._match(TokenType.OVERWRITE) 2533 self._match_pair(TokenType.INTO, TokenType.TABLE) 2534 2535 return self.expression( 2536 exp.LoadData, 2537 this=self._parse_table(schema=True), 2538 local=local, 2539 overwrite=overwrite, 2540 inpath=inpath, 2541 partition=self._parse_partition(), 2542 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2543 serde=self._match_text_seq("SERDE") and self._parse_string(), 2544 ) 2545 return self._parse_as_command(self._prev) 2546 2547 def _parse_delete(self) -> exp.Delete: 2548 # This handles MySQL's "Multiple-Table Syntax" 2549 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2550 tables = None 2551 comments = self._prev_comments 2552 if not self._match(TokenType.FROM, advance=False): 2553 tables = self._parse_csv(self._parse_table) or None 2554 2555 returning = self._parse_returning() 2556 2557 return self.expression( 2558 exp.Delete, 2559 comments=comments, 2560 tables=tables, 2561 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2562 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2563 where=self._parse_where(), 2564 returning=returning or self._parse_returning(), 2565 limit=self._parse_limit(), 2566 ) 2567 2568 def _parse_update(self) -> exp.Update: 2569 comments = self._prev_comments 2570 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2571 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2572 returning = self._parse_returning() 2573 return self.expression( 2574 exp.Update, 2575 comments=comments, 2576 **{ # type: ignore 2577 "this": this, 2578 "expressions": expressions, 2579 "from": self._parse_from(joins=True), 2580 "where": self._parse_where(), 2581 "returning": returning or self._parse_returning(), 2582 "order": self._parse_order(), 2583 "limit": self._parse_limit(), 2584 }, 2585 ) 2586 2587 def _parse_uncache(self) -> exp.Uncache: 2588 if not self._match(TokenType.TABLE): 2589 self.raise_error("Expecting TABLE after UNCACHE") 2590 2591 return self.expression( 2592 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2593 ) 2594 2595 def _parse_cache(self) -> exp.Cache: 2596 lazy = self._match_text_seq("LAZY") 2597 self._match(TokenType.TABLE) 2598 table = self._parse_table(schema=True) 2599 2600 options = [] 2601 if self._match_text_seq("OPTIONS"): 2602 self._match_l_paren() 2603 k = self._parse_string() 2604 self._match(TokenType.EQ) 2605 v = self._parse_string() 2606 options = [k, v] 2607 self._match_r_paren() 2608 2609 self._match(TokenType.ALIAS) 2610 return self.expression( 2611 exp.Cache, 2612 this=table, 2613 lazy=lazy, 2614 options=options, 2615 expression=self._parse_select(nested=True), 2616 ) 2617 2618 def _parse_partition(self) -> t.Optional[exp.Partition]: 2619 if not self._match(TokenType.PARTITION): 2620 return None 2621 2622 return self.expression( 2623 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2624 ) 2625 2626 def _parse_value(self) -> t.Optional[exp.Tuple]: 2627 if self._match(TokenType.L_PAREN): 2628 expressions = self._parse_csv(self._parse_expression) 2629 self._match_r_paren() 2630 return self.expression(exp.Tuple, expressions=expressions) 2631 2632 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2633 expression = self._parse_expression() 2634 if expression: 2635 return self.expression(exp.Tuple, expressions=[expression]) 2636 return None 2637 2638 def _parse_projections(self) -> t.List[exp.Expression]: 2639 return self._parse_expressions() 2640 2641 def _parse_select( 2642 self, 2643 nested: bool = False, 2644 table: bool = False, 2645 parse_subquery_alias: bool = True, 2646 parse_set_operation: bool = True, 2647 ) -> t.Optional[exp.Expression]: 2648 cte = self._parse_with() 2649 2650 if cte: 2651 this = self._parse_statement() 2652 2653 if not this: 2654 self.raise_error("Failed to parse any statement following CTE") 2655 return cte 2656 2657 if "with" in this.arg_types: 2658 this.set("with", cte) 2659 else: 2660 self.raise_error(f"{this.key} does not support CTE") 2661 this = cte 2662 2663 return this 2664 2665 # duckdb supports leading with FROM x 2666 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2667 2668 if self._match(TokenType.SELECT): 2669 comments = self._prev_comments 2670 2671 hint = self._parse_hint() 2672 all_ = self._match(TokenType.ALL) 2673 distinct = self._match_set(self.DISTINCT_TOKENS) 2674 2675 kind = ( 2676 self._match(TokenType.ALIAS) 2677 and self._match_texts(("STRUCT", "VALUE")) 2678 and self._prev.text.upper() 2679 ) 2680 2681 if distinct: 2682 distinct = self.expression( 2683 exp.Distinct, 2684 on=self._parse_value() if self._match(TokenType.ON) else None, 2685 ) 2686 2687 if all_ and distinct: 2688 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2689 2690 limit = self._parse_limit(top=True) 2691 projections = self._parse_projections() 2692 2693 this = self.expression( 2694 exp.Select, 2695 kind=kind, 2696 hint=hint, 2697 distinct=distinct, 2698 expressions=projections, 2699 limit=limit, 2700 ) 2701 this.comments = comments 2702 2703 into = self._parse_into() 2704 if into: 2705 this.set("into", into) 2706 2707 if not from_: 2708 from_ = self._parse_from() 2709 2710 if from_: 2711 this.set("from", from_) 2712 2713 this = self._parse_query_modifiers(this) 2714 elif (table or nested) and self._match(TokenType.L_PAREN): 2715 if self._match(TokenType.PIVOT): 2716 this = self._parse_simplified_pivot() 2717 elif self._match(TokenType.FROM): 2718 this = exp.select("*").from_( 2719 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2720 ) 2721 else: 2722 this = ( 2723 self._parse_table() 2724 if table 2725 else self._parse_select(nested=True, parse_set_operation=False) 2726 ) 2727 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2728 2729 self._match_r_paren() 2730 2731 # We return early here so that the UNION isn't attached to the subquery by the 2732 # following call to _parse_set_operations, but instead becomes the parent node 2733 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2734 elif self._match(TokenType.VALUES, advance=False): 2735 this = self._parse_derived_table_values() 2736 elif from_: 2737 this = exp.select("*").from_(from_.this, copy=False) 2738 else: 2739 this = None 2740 2741 if parse_set_operation: 2742 return self._parse_set_operations(this) 2743 return this 2744 2745 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2746 if not skip_with_token and not self._match(TokenType.WITH): 2747 return None 2748 2749 comments = self._prev_comments 2750 recursive = self._match(TokenType.RECURSIVE) 2751 2752 expressions = [] 2753 while True: 2754 expressions.append(self._parse_cte()) 2755 2756 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2757 break 2758 else: 2759 self._match(TokenType.WITH) 2760 2761 return self.expression( 2762 exp.With, comments=comments, expressions=expressions, recursive=recursive 2763 ) 2764 2765 def _parse_cte(self) -> exp.CTE: 2766 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2767 if not alias or not alias.this: 2768 self.raise_error("Expected CTE to have alias") 2769 2770 self._match(TokenType.ALIAS) 2771 2772 if self._match_text_seq("NOT", "MATERIALIZED"): 2773 materialized = False 2774 elif self._match_text_seq("MATERIALIZED"): 2775 materialized = True 2776 else: 2777 materialized = None 2778 2779 return self.expression( 2780 exp.CTE, 2781 this=self._parse_wrapped(self._parse_statement), 2782 alias=alias, 2783 materialized=materialized, 2784 ) 2785 2786 def _parse_table_alias( 2787 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2788 ) -> t.Optional[exp.TableAlias]: 2789 any_token = self._match(TokenType.ALIAS) 2790 alias = ( 2791 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2792 or self._parse_string_as_identifier() 2793 ) 2794 2795 index = self._index 2796 if self._match(TokenType.L_PAREN): 2797 columns = self._parse_csv(self._parse_function_parameter) 2798 self._match_r_paren() if columns else self._retreat(index) 2799 else: 2800 columns = None 2801 2802 if not alias and not columns: 2803 return None 2804 2805 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2806 2807 # We bubble up comments from the Identifier to the TableAlias 2808 if isinstance(alias, exp.Identifier): 2809 table_alias.add_comments(alias.pop_comments()) 2810 2811 return table_alias 2812 2813 def _parse_subquery( 2814 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2815 ) -> t.Optional[exp.Subquery]: 2816 if not this: 2817 return None 2818 2819 return self.expression( 2820 exp.Subquery, 2821 this=this, 2822 pivots=self._parse_pivots(), 2823 alias=self._parse_table_alias() if parse_alias else None, 2824 ) 2825 2826 def _implicit_unnests_to_explicit(self, this: E) -> E: 2827 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2828 2829 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2830 for i, join in enumerate(this.args.get("joins") or []): 2831 table = join.this 2832 normalized_table = table.copy() 2833 normalized_table.meta["maybe_column"] = True 2834 normalized_table = _norm(normalized_table, dialect=self.dialect) 2835 2836 if isinstance(table, exp.Table) and not join.args.get("on"): 2837 if normalized_table.parts[0].name in refs: 2838 table_as_column = table.to_column() 2839 unnest = exp.Unnest(expressions=[table_as_column]) 2840 2841 # Table.to_column creates a parent Alias node that we want to convert to 2842 # a TableAlias and attach to the Unnest, so it matches the parser's output 2843 if isinstance(table.args.get("alias"), exp.TableAlias): 2844 table_as_column.replace(table_as_column.this) 2845 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2846 2847 table.replace(unnest) 2848 2849 refs.add(normalized_table.alias_or_name) 2850 2851 return this 2852 2853 def _parse_query_modifiers( 2854 self, this: t.Optional[exp.Expression] 2855 ) -> t.Optional[exp.Expression]: 2856 if isinstance(this, (exp.Query, exp.Table)): 2857 for join in self._parse_joins(): 2858 this.append("joins", join) 2859 for lateral in iter(self._parse_lateral, None): 2860 this.append("laterals", lateral) 2861 2862 while True: 2863 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2864 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2865 key, expression = parser(self) 2866 2867 if expression: 2868 this.set(key, expression) 2869 if key == "limit": 2870 offset = expression.args.pop("offset", None) 2871 2872 if offset: 2873 offset = exp.Offset(expression=offset) 2874 this.set("offset", offset) 2875 2876 limit_by_expressions = expression.expressions 2877 expression.set("expressions", None) 2878 offset.set("expressions", limit_by_expressions) 2879 continue 2880 break 2881 2882 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2883 this = self._implicit_unnests_to_explicit(this) 2884 2885 return this 2886 2887 def _parse_hint(self) -> t.Optional[exp.Hint]: 2888 if self._match(TokenType.HINT): 2889 hints = [] 2890 for hint in iter( 2891 lambda: self._parse_csv( 2892 lambda: self._parse_function() or self._parse_var(upper=True) 2893 ), 2894 [], 2895 ): 2896 hints.extend(hint) 2897 2898 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2899 self.raise_error("Expected */ after HINT") 2900 2901 return self.expression(exp.Hint, expressions=hints) 2902 2903 return None 2904 2905 def _parse_into(self) -> t.Optional[exp.Into]: 2906 if not self._match(TokenType.INTO): 2907 return None 2908 2909 temp = self._match(TokenType.TEMPORARY) 2910 unlogged = self._match_text_seq("UNLOGGED") 2911 self._match(TokenType.TABLE) 2912 2913 return self.expression( 2914 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2915 ) 2916 2917 def _parse_from( 2918 self, joins: bool = False, skip_from_token: bool = False 2919 ) -> t.Optional[exp.From]: 2920 if not skip_from_token and not self._match(TokenType.FROM): 2921 return None 2922 2923 return self.expression( 2924 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2925 ) 2926 2927 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2928 return self.expression( 2929 exp.MatchRecognizeMeasure, 2930 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2931 this=self._parse_expression(), 2932 ) 2933 2934 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2935 if not self._match(TokenType.MATCH_RECOGNIZE): 2936 return None 2937 2938 self._match_l_paren() 2939 2940 partition = self._parse_partition_by() 2941 order = self._parse_order() 2942 2943 measures = ( 2944 self._parse_csv(self._parse_match_recognize_measure) 2945 if self._match_text_seq("MEASURES") 2946 else None 2947 ) 2948 2949 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2950 rows = exp.var("ONE ROW PER MATCH") 2951 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2952 text = "ALL ROWS PER MATCH" 2953 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2954 text += " SHOW EMPTY MATCHES" 2955 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2956 text += " OMIT EMPTY MATCHES" 2957 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2958 text += " WITH UNMATCHED ROWS" 2959 rows = exp.var(text) 2960 else: 2961 rows = None 2962 2963 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2964 text = "AFTER MATCH SKIP" 2965 if self._match_text_seq("PAST", "LAST", "ROW"): 2966 text += " PAST LAST ROW" 2967 elif self._match_text_seq("TO", "NEXT", "ROW"): 2968 text += " TO NEXT ROW" 2969 elif self._match_text_seq("TO", "FIRST"): 2970 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2971 elif self._match_text_seq("TO", "LAST"): 2972 text += f" TO LAST {self._advance_any().text}" # type: ignore 2973 after = exp.var(text) 2974 else: 2975 after = None 2976 2977 if self._match_text_seq("PATTERN"): 2978 self._match_l_paren() 2979 2980 if not self._curr: 2981 self.raise_error("Expecting )", self._curr) 2982 2983 paren = 1 2984 start = self._curr 2985 2986 while self._curr and paren > 0: 2987 if self._curr.token_type == TokenType.L_PAREN: 2988 paren += 1 2989 if self._curr.token_type == TokenType.R_PAREN: 2990 paren -= 1 2991 2992 end = self._prev 2993 self._advance() 2994 2995 if paren > 0: 2996 self.raise_error("Expecting )", self._curr) 2997 2998 pattern = exp.var(self._find_sql(start, end)) 2999 else: 3000 pattern = None 3001 3002 define = ( 3003 self._parse_csv(self._parse_name_as_expression) 3004 if self._match_text_seq("DEFINE") 3005 else None 3006 ) 3007 3008 self._match_r_paren() 3009 3010 return self.expression( 3011 exp.MatchRecognize, 3012 partition_by=partition, 3013 order=order, 3014 measures=measures, 3015 rows=rows, 3016 after=after, 3017 pattern=pattern, 3018 define=define, 3019 alias=self._parse_table_alias(), 3020 ) 3021 3022 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3023 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3024 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3025 cross_apply = False 3026 3027 if cross_apply is not None: 3028 this = self._parse_select(table=True) 3029 view = None 3030 outer = None 3031 elif self._match(TokenType.LATERAL): 3032 this = self._parse_select(table=True) 3033 view = self._match(TokenType.VIEW) 3034 outer = self._match(TokenType.OUTER) 3035 else: 3036 return None 3037 3038 if not this: 3039 this = ( 3040 self._parse_unnest() 3041 or self._parse_function() 3042 or self._parse_id_var(any_token=False) 3043 ) 3044 3045 while self._match(TokenType.DOT): 3046 this = exp.Dot( 3047 this=this, 3048 expression=self._parse_function() or self._parse_id_var(any_token=False), 3049 ) 3050 3051 if view: 3052 table = self._parse_id_var(any_token=False) 3053 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3054 table_alias: t.Optional[exp.TableAlias] = self.expression( 3055 exp.TableAlias, this=table, columns=columns 3056 ) 3057 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3058 # We move the alias from the lateral's child node to the lateral itself 3059 table_alias = this.args["alias"].pop() 3060 else: 3061 table_alias = self._parse_table_alias() 3062 3063 return self.expression( 3064 exp.Lateral, 3065 this=this, 3066 view=view, 3067 outer=outer, 3068 alias=table_alias, 3069 cross_apply=cross_apply, 3070 ) 3071 3072 def _parse_join_parts( 3073 self, 3074 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3075 return ( 3076 self._match_set(self.JOIN_METHODS) and self._prev, 3077 self._match_set(self.JOIN_SIDES) and self._prev, 3078 self._match_set(self.JOIN_KINDS) and self._prev, 3079 ) 3080 3081 def _parse_join( 3082 self, skip_join_token: bool = False, parse_bracket: bool = False 3083 ) -> t.Optional[exp.Join]: 3084 if self._match(TokenType.COMMA): 3085 return self.expression(exp.Join, this=self._parse_table()) 3086 3087 index = self._index 3088 method, side, kind = self._parse_join_parts() 3089 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3090 join = self._match(TokenType.JOIN) 3091 3092 if not skip_join_token and not join: 3093 self._retreat(index) 3094 kind = None 3095 method = None 3096 side = None 3097 3098 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3099 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3100 3101 if not skip_join_token and not join and not outer_apply and not cross_apply: 3102 return None 3103 3104 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3105 3106 if method: 3107 kwargs["method"] = method.text 3108 if side: 3109 kwargs["side"] = side.text 3110 if kind: 3111 kwargs["kind"] = kind.text 3112 if hint: 3113 kwargs["hint"] = hint 3114 3115 if self._match(TokenType.MATCH_CONDITION): 3116 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3117 3118 if self._match(TokenType.ON): 3119 kwargs["on"] = self._parse_conjunction() 3120 elif self._match(TokenType.USING): 3121 kwargs["using"] = self._parse_wrapped_id_vars() 3122 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3123 kind and kind.token_type == TokenType.CROSS 3124 ): 3125 index = self._index 3126 joins: t.Optional[list] = list(self._parse_joins()) 3127 3128 if joins and self._match(TokenType.ON): 3129 kwargs["on"] = self._parse_conjunction() 3130 elif joins and self._match(TokenType.USING): 3131 kwargs["using"] = self._parse_wrapped_id_vars() 3132 else: 3133 joins = None 3134 self._retreat(index) 3135 3136 kwargs["this"].set("joins", joins if joins else None) 3137 3138 comments = [c for token in (method, side, kind) if token for c in token.comments] 3139 return self.expression(exp.Join, comments=comments, **kwargs) 3140 3141 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3142 this = self._parse_conjunction() 3143 3144 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3145 return this 3146 3147 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3148 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3149 3150 return this 3151 3152 def _parse_index_params(self) -> exp.IndexParameters: 3153 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3154 3155 if self._match(TokenType.L_PAREN, advance=False): 3156 columns = self._parse_wrapped_csv(self._parse_with_operator) 3157 else: 3158 columns = None 3159 3160 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3161 partition_by = self._parse_partition_by() 3162 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3163 tablespace = ( 3164 self._parse_var(any_token=True) 3165 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3166 else None 3167 ) 3168 where = self._parse_where() 3169 3170 return self.expression( 3171 exp.IndexParameters, 3172 using=using, 3173 columns=columns, 3174 include=include, 3175 partition_by=partition_by, 3176 where=where, 3177 with_storage=with_storage, 3178 tablespace=tablespace, 3179 ) 3180 3181 def _parse_index( 3182 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3183 ) -> t.Optional[exp.Index]: 3184 if index or anonymous: 3185 unique = None 3186 primary = None 3187 amp = None 3188 3189 self._match(TokenType.ON) 3190 self._match(TokenType.TABLE) # hive 3191 table = self._parse_table_parts(schema=True) 3192 else: 3193 unique = self._match(TokenType.UNIQUE) 3194 primary = self._match_text_seq("PRIMARY") 3195 amp = self._match_text_seq("AMP") 3196 3197 if not self._match(TokenType.INDEX): 3198 return None 3199 3200 index = self._parse_id_var() 3201 table = None 3202 3203 params = self._parse_index_params() 3204 3205 return self.expression( 3206 exp.Index, 3207 this=index, 3208 table=table, 3209 unique=unique, 3210 primary=primary, 3211 amp=amp, 3212 params=params, 3213 ) 3214 3215 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3216 hints: t.List[exp.Expression] = [] 3217 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3218 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3219 hints.append( 3220 self.expression( 3221 exp.WithTableHint, 3222 expressions=self._parse_csv( 3223 lambda: self._parse_function() or self._parse_var(any_token=True) 3224 ), 3225 ) 3226 ) 3227 self._match_r_paren() 3228 else: 3229 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3230 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3231 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3232 3233 self._match_texts(("INDEX", "KEY")) 3234 if self._match(TokenType.FOR): 3235 hint.set("target", self._advance_any() and self._prev.text.upper()) 3236 3237 hint.set("expressions", self._parse_wrapped_id_vars()) 3238 hints.append(hint) 3239 3240 return hints or None 3241 3242 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3243 return ( 3244 (not schema and self._parse_function(optional_parens=False)) 3245 or self._parse_id_var(any_token=False) 3246 or self._parse_string_as_identifier() 3247 or self._parse_placeholder() 3248 ) 3249 3250 def _parse_table_parts( 3251 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3252 ) -> exp.Table: 3253 catalog = None 3254 db = None 3255 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3256 3257 while self._match(TokenType.DOT): 3258 if catalog: 3259 # This allows nesting the table in arbitrarily many dot expressions if needed 3260 table = self.expression( 3261 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3262 ) 3263 else: 3264 catalog = db 3265 db = table 3266 # "" used for tsql FROM a..b case 3267 table = self._parse_table_part(schema=schema) or "" 3268 3269 if ( 3270 wildcard 3271 and self._is_connected() 3272 and (isinstance(table, exp.Identifier) or not table) 3273 and self._match(TokenType.STAR) 3274 ): 3275 if isinstance(table, exp.Identifier): 3276 table.args["this"] += "*" 3277 else: 3278 table = exp.Identifier(this="*") 3279 3280 # We bubble up comments from the Identifier to the Table 3281 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3282 3283 if is_db_reference: 3284 catalog = db 3285 db = table 3286 table = None 3287 3288 if not table and not is_db_reference: 3289 self.raise_error(f"Expected table name but got {self._curr}") 3290 if not db and is_db_reference: 3291 self.raise_error(f"Expected database name but got {self._curr}") 3292 3293 return self.expression( 3294 exp.Table, 3295 comments=comments, 3296 this=table, 3297 db=db, 3298 catalog=catalog, 3299 pivots=self._parse_pivots(), 3300 ) 3301 3302 def _parse_table( 3303 self, 3304 schema: bool = False, 3305 joins: bool = False, 3306 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3307 parse_bracket: bool = False, 3308 is_db_reference: bool = False, 3309 parse_partition: bool = False, 3310 ) -> t.Optional[exp.Expression]: 3311 lateral = self._parse_lateral() 3312 if lateral: 3313 return lateral 3314 3315 unnest = self._parse_unnest() 3316 if unnest: 3317 return unnest 3318 3319 values = self._parse_derived_table_values() 3320 if values: 3321 return values 3322 3323 subquery = self._parse_select(table=True) 3324 if subquery: 3325 if not subquery.args.get("pivots"): 3326 subquery.set("pivots", self._parse_pivots()) 3327 return subquery 3328 3329 bracket = parse_bracket and self._parse_bracket(None) 3330 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3331 3332 only = self._match(TokenType.ONLY) 3333 3334 this = t.cast( 3335 exp.Expression, 3336 bracket 3337 or self._parse_bracket( 3338 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3339 ), 3340 ) 3341 3342 if only: 3343 this.set("only", only) 3344 3345 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3346 self._match_text_seq("*") 3347 3348 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3349 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3350 this.set("partition", self._parse_partition()) 3351 3352 if schema: 3353 return self._parse_schema(this=this) 3354 3355 version = self._parse_version() 3356 3357 if version: 3358 this.set("version", version) 3359 3360 if self.dialect.ALIAS_POST_TABLESAMPLE: 3361 table_sample = self._parse_table_sample() 3362 3363 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3364 if alias: 3365 this.set("alias", alias) 3366 3367 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3368 return self.expression( 3369 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3370 ) 3371 3372 this.set("hints", self._parse_table_hints()) 3373 3374 if not this.args.get("pivots"): 3375 this.set("pivots", self._parse_pivots()) 3376 3377 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3378 table_sample = self._parse_table_sample() 3379 3380 if table_sample: 3381 table_sample.set("this", this) 3382 this = table_sample 3383 3384 if joins: 3385 for join in self._parse_joins(): 3386 this.append("joins", join) 3387 3388 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3389 this.set("ordinality", True) 3390 this.set("alias", self._parse_table_alias()) 3391 3392 return this 3393 3394 def _parse_version(self) -> t.Optional[exp.Version]: 3395 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3396 this = "TIMESTAMP" 3397 elif self._match(TokenType.VERSION_SNAPSHOT): 3398 this = "VERSION" 3399 else: 3400 return None 3401 3402 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3403 kind = self._prev.text.upper() 3404 start = self._parse_bitwise() 3405 self._match_texts(("TO", "AND")) 3406 end = self._parse_bitwise() 3407 expression: t.Optional[exp.Expression] = self.expression( 3408 exp.Tuple, expressions=[start, end] 3409 ) 3410 elif self._match_text_seq("CONTAINED", "IN"): 3411 kind = "CONTAINED IN" 3412 expression = self.expression( 3413 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3414 ) 3415 elif self._match(TokenType.ALL): 3416 kind = "ALL" 3417 expression = None 3418 else: 3419 self._match_text_seq("AS", "OF") 3420 kind = "AS OF" 3421 expression = self._parse_type() 3422 3423 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3424 3425 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3426 if not self._match(TokenType.UNNEST): 3427 return None 3428 3429 expressions = self._parse_wrapped_csv(self._parse_equality) 3430 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3431 3432 alias = self._parse_table_alias() if with_alias else None 3433 3434 if alias: 3435 if self.dialect.UNNEST_COLUMN_ONLY: 3436 if alias.args.get("columns"): 3437 self.raise_error("Unexpected extra column alias in unnest.") 3438 3439 alias.set("columns", [alias.this]) 3440 alias.set("this", None) 3441 3442 columns = alias.args.get("columns") or [] 3443 if offset and len(expressions) < len(columns): 3444 offset = columns.pop() 3445 3446 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3447 self._match(TokenType.ALIAS) 3448 offset = self._parse_id_var( 3449 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3450 ) or exp.to_identifier("offset") 3451 3452 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3453 3454 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3455 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3456 if not is_derived and not self._match_text_seq("VALUES"): 3457 return None 3458 3459 expressions = self._parse_csv(self._parse_value) 3460 alias = self._parse_table_alias() 3461 3462 if is_derived: 3463 self._match_r_paren() 3464 3465 return self.expression( 3466 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3467 ) 3468 3469 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3470 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3471 as_modifier and self._match_text_seq("USING", "SAMPLE") 3472 ): 3473 return None 3474 3475 bucket_numerator = None 3476 bucket_denominator = None 3477 bucket_field = None 3478 percent = None 3479 size = None 3480 seed = None 3481 3482 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3483 matched_l_paren = self._match(TokenType.L_PAREN) 3484 3485 if self.TABLESAMPLE_CSV: 3486 num = None 3487 expressions = self._parse_csv(self._parse_primary) 3488 else: 3489 expressions = None 3490 num = ( 3491 self._parse_factor() 3492 if self._match(TokenType.NUMBER, advance=False) 3493 else self._parse_primary() or self._parse_placeholder() 3494 ) 3495 3496 if self._match_text_seq("BUCKET"): 3497 bucket_numerator = self._parse_number() 3498 self._match_text_seq("OUT", "OF") 3499 bucket_denominator = bucket_denominator = self._parse_number() 3500 self._match(TokenType.ON) 3501 bucket_field = self._parse_field() 3502 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3503 percent = num 3504 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3505 size = num 3506 else: 3507 percent = num 3508 3509 if matched_l_paren: 3510 self._match_r_paren() 3511 3512 if self._match(TokenType.L_PAREN): 3513 method = self._parse_var(upper=True) 3514 seed = self._match(TokenType.COMMA) and self._parse_number() 3515 self._match_r_paren() 3516 elif self._match_texts(("SEED", "REPEATABLE")): 3517 seed = self._parse_wrapped(self._parse_number) 3518 3519 if not method and self.DEFAULT_SAMPLING_METHOD: 3520 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3521 3522 return self.expression( 3523 exp.TableSample, 3524 expressions=expressions, 3525 method=method, 3526 bucket_numerator=bucket_numerator, 3527 bucket_denominator=bucket_denominator, 3528 bucket_field=bucket_field, 3529 percent=percent, 3530 size=size, 3531 seed=seed, 3532 ) 3533 3534 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3535 return list(iter(self._parse_pivot, None)) or None 3536 3537 def _parse_joins(self) -> t.Iterator[exp.Join]: 3538 return iter(self._parse_join, None) 3539 3540 # https://duckdb.org/docs/sql/statements/pivot 3541 def _parse_simplified_pivot(self) -> exp.Pivot: 3542 def _parse_on() -> t.Optional[exp.Expression]: 3543 this = self._parse_bitwise() 3544 return self._parse_in(this) if self._match(TokenType.IN) else this 3545 3546 this = self._parse_table() 3547 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3548 using = self._match(TokenType.USING) and self._parse_csv( 3549 lambda: self._parse_alias(self._parse_function()) 3550 ) 3551 group = self._parse_group() 3552 return self.expression( 3553 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3554 ) 3555 3556 def _parse_pivot_in(self) -> exp.In: 3557 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3558 this = self._parse_conjunction() 3559 3560 self._match(TokenType.ALIAS) 3561 alias = self._parse_field() 3562 if alias: 3563 return self.expression(exp.PivotAlias, this=this, alias=alias) 3564 3565 return this 3566 3567 value = self._parse_column() 3568 3569 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3570 self.raise_error("Expecting IN (") 3571 3572 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3573 3574 self._match_r_paren() 3575 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3576 3577 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3578 index = self._index 3579 include_nulls = None 3580 3581 if self._match(TokenType.PIVOT): 3582 unpivot = False 3583 elif self._match(TokenType.UNPIVOT): 3584 unpivot = True 3585 3586 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3587 if self._match_text_seq("INCLUDE", "NULLS"): 3588 include_nulls = True 3589 elif self._match_text_seq("EXCLUDE", "NULLS"): 3590 include_nulls = False 3591 else: 3592 return None 3593 3594 expressions = [] 3595 3596 if not self._match(TokenType.L_PAREN): 3597 self._retreat(index) 3598 return None 3599 3600 if unpivot: 3601 expressions = self._parse_csv(self._parse_column) 3602 else: 3603 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3604 3605 if not expressions: 3606 self.raise_error("Failed to parse PIVOT's aggregation list") 3607 3608 if not self._match(TokenType.FOR): 3609 self.raise_error("Expecting FOR") 3610 3611 field = self._parse_pivot_in() 3612 3613 self._match_r_paren() 3614 3615 pivot = self.expression( 3616 exp.Pivot, 3617 expressions=expressions, 3618 field=field, 3619 unpivot=unpivot, 3620 include_nulls=include_nulls, 3621 ) 3622 3623 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3624 pivot.set("alias", self._parse_table_alias()) 3625 3626 if not unpivot: 3627 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3628 3629 columns: t.List[exp.Expression] = [] 3630 for fld in pivot.args["field"].expressions: 3631 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3632 for name in names: 3633 if self.PREFIXED_PIVOT_COLUMNS: 3634 name = f"{name}_{field_name}" if name else field_name 3635 else: 3636 name = f"{field_name}_{name}" if name else field_name 3637 3638 columns.append(exp.to_identifier(name)) 3639 3640 pivot.set("columns", columns) 3641 3642 return pivot 3643 3644 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3645 return [agg.alias for agg in aggregations] 3646 3647 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3648 if not skip_where_token and not self._match(TokenType.PREWHERE): 3649 return None 3650 3651 return self.expression( 3652 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3653 ) 3654 3655 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3656 if not skip_where_token and not self._match(TokenType.WHERE): 3657 return None 3658 3659 return self.expression( 3660 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3661 ) 3662 3663 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3664 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3665 return None 3666 3667 elements: t.Dict[str, t.Any] = defaultdict(list) 3668 3669 if self._match(TokenType.ALL): 3670 elements["all"] = True 3671 elif self._match(TokenType.DISTINCT): 3672 elements["all"] = False 3673 3674 while True: 3675 expressions = self._parse_csv( 3676 lambda: None 3677 if self._match(TokenType.ROLLUP, advance=False) 3678 else self._parse_conjunction() 3679 ) 3680 if expressions: 3681 elements["expressions"].extend(expressions) 3682 3683 grouping_sets = self._parse_grouping_sets() 3684 if grouping_sets: 3685 elements["grouping_sets"].extend(grouping_sets) 3686 3687 rollup = None 3688 cube = None 3689 totals = None 3690 3691 index = self._index 3692 with_ = self._match(TokenType.WITH) 3693 if self._match(TokenType.ROLLUP): 3694 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3695 elements["rollup"].extend(ensure_list(rollup)) 3696 3697 if self._match(TokenType.CUBE): 3698 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3699 elements["cube"].extend(ensure_list(cube)) 3700 3701 if self._match_text_seq("TOTALS"): 3702 totals = True 3703 elements["totals"] = True # type: ignore 3704 3705 if not (grouping_sets or rollup or cube or totals): 3706 if with_: 3707 self._retreat(index) 3708 break 3709 3710 return self.expression(exp.Group, **elements) # type: ignore 3711 3712 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3713 if not self._match(TokenType.GROUPING_SETS): 3714 return None 3715 3716 return self._parse_wrapped_csv(self._parse_grouping_set) 3717 3718 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3719 if self._match(TokenType.L_PAREN): 3720 grouping_set = self._parse_csv(self._parse_column) 3721 self._match_r_paren() 3722 return self.expression(exp.Tuple, expressions=grouping_set) 3723 3724 return self._parse_column() 3725 3726 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3727 if not skip_having_token and not self._match(TokenType.HAVING): 3728 return None 3729 return self.expression(exp.Having, this=self._parse_conjunction()) 3730 3731 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3732 if not self._match(TokenType.QUALIFY): 3733 return None 3734 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3735 3736 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3737 if skip_start_token: 3738 start = None 3739 elif self._match(TokenType.START_WITH): 3740 start = self._parse_conjunction() 3741 else: 3742 return None 3743 3744 self._match(TokenType.CONNECT_BY) 3745 nocycle = self._match_text_seq("NOCYCLE") 3746 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3747 exp.Prior, this=self._parse_bitwise() 3748 ) 3749 connect = self._parse_conjunction() 3750 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3751 3752 if not start and self._match(TokenType.START_WITH): 3753 start = self._parse_conjunction() 3754 3755 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3756 3757 def _parse_name_as_expression(self) -> exp.Alias: 3758 return self.expression( 3759 exp.Alias, 3760 alias=self._parse_id_var(any_token=True), 3761 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3762 ) 3763 3764 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3765 if self._match_text_seq("INTERPOLATE"): 3766 return self._parse_wrapped_csv(self._parse_name_as_expression) 3767 return None 3768 3769 def _parse_order( 3770 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3771 ) -> t.Optional[exp.Expression]: 3772 siblings = None 3773 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3774 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3775 return this 3776 3777 siblings = True 3778 3779 return self.expression( 3780 exp.Order, 3781 this=this, 3782 expressions=self._parse_csv(self._parse_ordered), 3783 interpolate=self._parse_interpolate(), 3784 siblings=siblings, 3785 ) 3786 3787 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3788 if not self._match(token): 3789 return None 3790 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3791 3792 def _parse_ordered( 3793 self, parse_method: t.Optional[t.Callable] = None 3794 ) -> t.Optional[exp.Ordered]: 3795 this = parse_method() if parse_method else self._parse_conjunction() 3796 if not this: 3797 return None 3798 3799 asc = self._match(TokenType.ASC) 3800 desc = self._match(TokenType.DESC) or (asc and False) 3801 3802 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3803 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3804 3805 nulls_first = is_nulls_first or False 3806 explicitly_null_ordered = is_nulls_first or is_nulls_last 3807 3808 if ( 3809 not explicitly_null_ordered 3810 and ( 3811 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3812 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3813 ) 3814 and self.dialect.NULL_ORDERING != "nulls_are_last" 3815 ): 3816 nulls_first = True 3817 3818 if self._match_text_seq("WITH", "FILL"): 3819 with_fill = self.expression( 3820 exp.WithFill, 3821 **{ # type: ignore 3822 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3823 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3824 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3825 }, 3826 ) 3827 else: 3828 with_fill = None 3829 3830 return self.expression( 3831 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3832 ) 3833 3834 def _parse_limit( 3835 self, 3836 this: t.Optional[exp.Expression] = None, 3837 top: bool = False, 3838 skip_limit_token: bool = False, 3839 ) -> t.Optional[exp.Expression]: 3840 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3841 comments = self._prev_comments 3842 if top: 3843 limit_paren = self._match(TokenType.L_PAREN) 3844 expression = self._parse_term() if limit_paren else self._parse_number() 3845 3846 if limit_paren: 3847 self._match_r_paren() 3848 else: 3849 expression = self._parse_term() 3850 3851 if self._match(TokenType.COMMA): 3852 offset = expression 3853 expression = self._parse_term() 3854 else: 3855 offset = None 3856 3857 limit_exp = self.expression( 3858 exp.Limit, 3859 this=this, 3860 expression=expression, 3861 offset=offset, 3862 comments=comments, 3863 expressions=self._parse_limit_by(), 3864 ) 3865 3866 return limit_exp 3867 3868 if self._match(TokenType.FETCH): 3869 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3870 direction = self._prev.text.upper() if direction else "FIRST" 3871 3872 count = self._parse_field(tokens=self.FETCH_TOKENS) 3873 percent = self._match(TokenType.PERCENT) 3874 3875 self._match_set((TokenType.ROW, TokenType.ROWS)) 3876 3877 only = self._match_text_seq("ONLY") 3878 with_ties = self._match_text_seq("WITH", "TIES") 3879 3880 if only and with_ties: 3881 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3882 3883 return self.expression( 3884 exp.Fetch, 3885 direction=direction, 3886 count=count, 3887 percent=percent, 3888 with_ties=with_ties, 3889 ) 3890 3891 return this 3892 3893 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3894 if not self._match(TokenType.OFFSET): 3895 return this 3896 3897 count = self._parse_term() 3898 self._match_set((TokenType.ROW, TokenType.ROWS)) 3899 3900 return self.expression( 3901 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3902 ) 3903 3904 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3905 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3906 3907 def _parse_locks(self) -> t.List[exp.Lock]: 3908 locks = [] 3909 while True: 3910 if self._match_text_seq("FOR", "UPDATE"): 3911 update = True 3912 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3913 "LOCK", "IN", "SHARE", "MODE" 3914 ): 3915 update = False 3916 else: 3917 break 3918 3919 expressions = None 3920 if self._match_text_seq("OF"): 3921 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3922 3923 wait: t.Optional[bool | exp.Expression] = None 3924 if self._match_text_seq("NOWAIT"): 3925 wait = True 3926 elif self._match_text_seq("WAIT"): 3927 wait = self._parse_primary() 3928 elif self._match_text_seq("SKIP", "LOCKED"): 3929 wait = False 3930 3931 locks.append( 3932 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3933 ) 3934 3935 return locks 3936 3937 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3938 while this and self._match_set(self.SET_OPERATIONS): 3939 token_type = self._prev.token_type 3940 3941 if token_type == TokenType.UNION: 3942 operation = exp.Union 3943 elif token_type == TokenType.EXCEPT: 3944 operation = exp.Except 3945 else: 3946 operation = exp.Intersect 3947 3948 comments = self._prev.comments 3949 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3950 by_name = self._match_text_seq("BY", "NAME") 3951 expression = self._parse_select(nested=True, parse_set_operation=False) 3952 3953 this = self.expression( 3954 operation, 3955 comments=comments, 3956 this=this, 3957 distinct=distinct, 3958 by_name=by_name, 3959 expression=expression, 3960 ) 3961 3962 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3963 expression = this.expression 3964 3965 if expression: 3966 for arg in self.UNION_MODIFIERS: 3967 expr = expression.args.get(arg) 3968 if expr: 3969 this.set(arg, expr.pop()) 3970 3971 return this 3972 3973 def _parse_expression(self) -> t.Optional[exp.Expression]: 3974 return self._parse_alias(self._parse_conjunction()) 3975 3976 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3977 this = self._parse_equality() 3978 3979 if self._match(TokenType.COLON_EQ): 3980 this = self.expression( 3981 exp.PropertyEQ, 3982 this=this, 3983 comments=self._prev_comments, 3984 expression=self._parse_conjunction(), 3985 ) 3986 3987 while self._match_set(self.CONJUNCTION): 3988 this = self.expression( 3989 self.CONJUNCTION[self._prev.token_type], 3990 this=this, 3991 comments=self._prev_comments, 3992 expression=self._parse_equality(), 3993 ) 3994 return this 3995 3996 def _parse_equality(self) -> t.Optional[exp.Expression]: 3997 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3998 3999 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4000 return self._parse_tokens(self._parse_range, self.COMPARISON) 4001 4002 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4003 this = this or self._parse_bitwise() 4004 negate = self._match(TokenType.NOT) 4005 4006 if self._match_set(self.RANGE_PARSERS): 4007 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4008 if not expression: 4009 return this 4010 4011 this = expression 4012 elif self._match(TokenType.ISNULL): 4013 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4014 4015 # Postgres supports ISNULL and NOTNULL for conditions. 4016 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4017 if self._match(TokenType.NOTNULL): 4018 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4019 this = self.expression(exp.Not, this=this) 4020 4021 if negate: 4022 this = self.expression(exp.Not, this=this) 4023 4024 if self._match(TokenType.IS): 4025 this = self._parse_is(this) 4026 4027 return this 4028 4029 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4030 index = self._index - 1 4031 negate = self._match(TokenType.NOT) 4032 4033 if self._match_text_seq("DISTINCT", "FROM"): 4034 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4035 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4036 4037 expression = self._parse_null() or self._parse_boolean() 4038 if not expression: 4039 self._retreat(index) 4040 return None 4041 4042 this = self.expression(exp.Is, this=this, expression=expression) 4043 return self.expression(exp.Not, this=this) if negate else this 4044 4045 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4046 unnest = self._parse_unnest(with_alias=False) 4047 if unnest: 4048 this = self.expression(exp.In, this=this, unnest=unnest) 4049 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4050 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4051 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4052 4053 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4054 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4055 else: 4056 this = self.expression(exp.In, this=this, expressions=expressions) 4057 4058 if matched_l_paren: 4059 self._match_r_paren(this) 4060 elif not self._match(TokenType.R_BRACKET, expression=this): 4061 self.raise_error("Expecting ]") 4062 else: 4063 this = self.expression(exp.In, this=this, field=self._parse_field()) 4064 4065 return this 4066 4067 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4068 low = self._parse_bitwise() 4069 self._match(TokenType.AND) 4070 high = self._parse_bitwise() 4071 return self.expression(exp.Between, this=this, low=low, high=high) 4072 4073 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4074 if not self._match(TokenType.ESCAPE): 4075 return this 4076 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4077 4078 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4079 index = self._index 4080 4081 if not self._match(TokenType.INTERVAL) and match_interval: 4082 return None 4083 4084 if self._match(TokenType.STRING, advance=False): 4085 this = self._parse_primary() 4086 else: 4087 this = self._parse_term() 4088 4089 if not this or ( 4090 isinstance(this, exp.Column) 4091 and not this.table 4092 and not this.this.quoted 4093 and this.name.upper() == "IS" 4094 ): 4095 self._retreat(index) 4096 return None 4097 4098 unit = self._parse_function() or ( 4099 not self._match(TokenType.ALIAS, advance=False) 4100 and self._parse_var(any_token=True, upper=True) 4101 ) 4102 4103 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4104 # each INTERVAL expression into this canonical form so it's easy to transpile 4105 if this and this.is_number: 4106 this = exp.Literal.string(this.name) 4107 elif this and this.is_string: 4108 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4109 if len(parts) == 1: 4110 if unit: 4111 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4112 self._retreat(self._index - 1) 4113 4114 this = exp.Literal.string(parts[0][0]) 4115 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4116 4117 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4118 unit = self.expression( 4119 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4120 ) 4121 4122 interval = self.expression(exp.Interval, this=this, unit=unit) 4123 4124 index = self._index 4125 self._match(TokenType.PLUS) 4126 4127 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4128 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4129 return self.expression( 4130 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4131 ) 4132 4133 self._retreat(index) 4134 return interval 4135 4136 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4137 this = self._parse_term() 4138 4139 while True: 4140 if self._match_set(self.BITWISE): 4141 this = self.expression( 4142 self.BITWISE[self._prev.token_type], 4143 this=this, 4144 expression=self._parse_term(), 4145 ) 4146 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4147 this = self.expression( 4148 exp.DPipe, 4149 this=this, 4150 expression=self._parse_term(), 4151 safe=not self.dialect.STRICT_STRING_CONCAT, 4152 ) 4153 elif self._match(TokenType.DQMARK): 4154 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4155 elif self._match_pair(TokenType.LT, TokenType.LT): 4156 this = self.expression( 4157 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4158 ) 4159 elif self._match_pair(TokenType.GT, TokenType.GT): 4160 this = self.expression( 4161 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4162 ) 4163 else: 4164 break 4165 4166 return this 4167 4168 def _parse_term(self) -> t.Optional[exp.Expression]: 4169 return self._parse_tokens(self._parse_factor, self.TERM) 4170 4171 def _parse_factor(self) -> t.Optional[exp.Expression]: 4172 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4173 this = parse_method() 4174 4175 while self._match_set(self.FACTOR): 4176 klass = self.FACTOR[self._prev.token_type] 4177 comments = self._prev_comments 4178 expression = parse_method() 4179 4180 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4181 self._retreat(self._index - 1) 4182 return this 4183 4184 this = self.expression(klass, this=this, comments=comments, expression=expression) 4185 4186 if isinstance(this, exp.Div): 4187 this.args["typed"] = self.dialect.TYPED_DIVISION 4188 this.args["safe"] = self.dialect.SAFE_DIVISION 4189 4190 return this 4191 4192 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4193 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4194 4195 def _parse_unary(self) -> t.Optional[exp.Expression]: 4196 if self._match_set(self.UNARY_PARSERS): 4197 return self.UNARY_PARSERS[self._prev.token_type](self) 4198 return self._parse_at_time_zone(self._parse_type()) 4199 4200 def _parse_type( 4201 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4202 ) -> t.Optional[exp.Expression]: 4203 interval = parse_interval and self._parse_interval() 4204 if interval: 4205 return interval 4206 4207 index = self._index 4208 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4209 4210 if data_type: 4211 index2 = self._index 4212 this = self._parse_primary() 4213 4214 if isinstance(this, exp.Literal): 4215 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4216 if parser: 4217 return parser(self, this, data_type) 4218 4219 return self.expression(exp.Cast, this=this, to=data_type) 4220 4221 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4222 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4223 # 4224 # If the index difference here is greater than 1, that means the parser itself must have 4225 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4226 # 4227 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4228 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4229 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4230 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4231 # 4232 # In these cases, we don't really want to return the converted type, but instead retreat 4233 # and try to parse a Column or Identifier in the section below. 4234 if data_type.expressions and index2 - index > 1: 4235 self._retreat(index2) 4236 return self._parse_column_ops(data_type) 4237 4238 self._retreat(index) 4239 4240 if fallback_to_identifier: 4241 return self._parse_id_var() 4242 4243 this = self._parse_column() 4244 return this and self._parse_column_ops(this) 4245 4246 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4247 this = self._parse_type() 4248 if not this: 4249 return None 4250 4251 if isinstance(this, exp.Column) and not this.table: 4252 this = exp.var(this.name.upper()) 4253 4254 return self.expression( 4255 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4256 ) 4257 4258 def _parse_types( 4259 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4260 ) -> t.Optional[exp.Expression]: 4261 index = self._index 4262 4263 this: t.Optional[exp.Expression] = None 4264 prefix = self._match_text_seq("SYSUDTLIB", ".") 4265 4266 if not self._match_set(self.TYPE_TOKENS): 4267 identifier = allow_identifiers and self._parse_id_var( 4268 any_token=False, tokens=(TokenType.VAR,) 4269 ) 4270 if identifier: 4271 tokens = self.dialect.tokenize(identifier.name) 4272 4273 if len(tokens) != 1: 4274 self.raise_error("Unexpected identifier", self._prev) 4275 4276 if tokens[0].token_type in self.TYPE_TOKENS: 4277 self._prev = tokens[0] 4278 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4279 type_name = identifier.name 4280 4281 while self._match(TokenType.DOT): 4282 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4283 4284 this = exp.DataType.build(type_name, udt=True) 4285 else: 4286 self._retreat(self._index - 1) 4287 return None 4288 else: 4289 return None 4290 4291 type_token = self._prev.token_type 4292 4293 if type_token == TokenType.PSEUDO_TYPE: 4294 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4295 4296 if type_token == TokenType.OBJECT_IDENTIFIER: 4297 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4298 4299 nested = type_token in self.NESTED_TYPE_TOKENS 4300 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4301 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4302 expressions = None 4303 maybe_func = False 4304 4305 if self._match(TokenType.L_PAREN): 4306 if is_struct: 4307 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4308 elif nested: 4309 expressions = self._parse_csv( 4310 lambda: self._parse_types( 4311 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4312 ) 4313 ) 4314 elif type_token in self.ENUM_TYPE_TOKENS: 4315 expressions = self._parse_csv(self._parse_equality) 4316 elif is_aggregate: 4317 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4318 any_token=False, tokens=(TokenType.VAR,) 4319 ) 4320 if not func_or_ident or not self._match(TokenType.COMMA): 4321 return None 4322 expressions = self._parse_csv( 4323 lambda: self._parse_types( 4324 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4325 ) 4326 ) 4327 expressions.insert(0, func_or_ident) 4328 else: 4329 expressions = self._parse_csv(self._parse_type_size) 4330 4331 if not expressions or not self._match(TokenType.R_PAREN): 4332 self._retreat(index) 4333 return None 4334 4335 maybe_func = True 4336 4337 values: t.Optional[t.List[exp.Expression]] = None 4338 4339 if nested and self._match(TokenType.LT): 4340 if is_struct: 4341 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4342 else: 4343 expressions = self._parse_csv( 4344 lambda: self._parse_types( 4345 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4346 ) 4347 ) 4348 4349 if not self._match(TokenType.GT): 4350 self.raise_error("Expecting >") 4351 4352 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4353 values = self._parse_csv(self._parse_conjunction) 4354 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4355 4356 if type_token in self.TIMESTAMPS: 4357 if self._match_text_seq("WITH", "TIME", "ZONE"): 4358 maybe_func = False 4359 tz_type = ( 4360 exp.DataType.Type.TIMETZ 4361 if type_token in self.TIMES 4362 else exp.DataType.Type.TIMESTAMPTZ 4363 ) 4364 this = exp.DataType(this=tz_type, expressions=expressions) 4365 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4366 maybe_func = False 4367 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4368 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4369 maybe_func = False 4370 elif type_token == TokenType.INTERVAL: 4371 unit = self._parse_var(upper=True) 4372 if unit: 4373 if self._match_text_seq("TO"): 4374 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4375 4376 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4377 else: 4378 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4379 4380 if maybe_func and check_func: 4381 index2 = self._index 4382 peek = self._parse_string() 4383 4384 if not peek: 4385 self._retreat(index) 4386 return None 4387 4388 self._retreat(index2) 4389 4390 if not this: 4391 if self._match_text_seq("UNSIGNED"): 4392 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4393 if not unsigned_type_token: 4394 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4395 4396 type_token = unsigned_type_token or type_token 4397 4398 this = exp.DataType( 4399 this=exp.DataType.Type[type_token.value], 4400 expressions=expressions, 4401 nested=nested, 4402 values=values, 4403 prefix=prefix, 4404 ) 4405 elif expressions: 4406 this.set("expressions", expressions) 4407 4408 index = self._index 4409 4410 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4411 matched_array = self._match(TokenType.ARRAY) 4412 4413 while self._curr: 4414 matched_l_bracket = self._match(TokenType.L_BRACKET) 4415 if not matched_l_bracket and not matched_array: 4416 break 4417 4418 matched_array = False 4419 values = self._parse_csv(self._parse_conjunction) or None 4420 if values and not schema: 4421 self._retreat(index) 4422 break 4423 4424 this = exp.DataType( 4425 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4426 ) 4427 self._match(TokenType.R_BRACKET) 4428 4429 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4430 converter = self.TYPE_CONVERTER.get(this.this) 4431 if converter: 4432 this = converter(t.cast(exp.DataType, this)) 4433 4434 return this 4435 4436 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4437 index = self._index 4438 this = ( 4439 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4440 or self._parse_id_var() 4441 ) 4442 self._match(TokenType.COLON) 4443 4444 if ( 4445 type_required 4446 and not isinstance(this, exp.DataType) 4447 and not self._match_set(self.TYPE_TOKENS, advance=False) 4448 ): 4449 self._retreat(index) 4450 return self._parse_types() 4451 4452 return self._parse_column_def(this) 4453 4454 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4455 if not self._match_text_seq("AT", "TIME", "ZONE"): 4456 return this 4457 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4458 4459 def _parse_column(self) -> t.Optional[exp.Expression]: 4460 this = self._parse_column_reference() 4461 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4462 4463 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4464 this = self._parse_field() 4465 if ( 4466 not this 4467 and self._match(TokenType.VALUES, advance=False) 4468 and self.VALUES_FOLLOWED_BY_PAREN 4469 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4470 ): 4471 this = self._parse_id_var() 4472 4473 if isinstance(this, exp.Identifier): 4474 # We bubble up comments from the Identifier to the Column 4475 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4476 4477 return this 4478 4479 def _parse_colon_as_json_extract( 4480 self, this: t.Optional[exp.Expression] 4481 ) -> t.Optional[exp.Expression]: 4482 casts = [] 4483 json_path = [] 4484 4485 while self._match(TokenType.COLON): 4486 start_index = self._index 4487 path = self._parse_column_ops(self._parse_field(any_token=True)) 4488 4489 # The cast :: operator has a lower precedence than the extraction operator :, so 4490 # we rearrange the AST appropriately to avoid casting the JSON path 4491 while isinstance(path, exp.Cast): 4492 casts.append(path.to) 4493 path = path.this 4494 4495 if casts: 4496 dcolon_offset = next( 4497 i 4498 for i, t in enumerate(self._tokens[start_index:]) 4499 if t.token_type == TokenType.DCOLON 4500 ) 4501 end_token = self._tokens[start_index + dcolon_offset - 1] 4502 else: 4503 end_token = self._prev 4504 4505 if path: 4506 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4507 4508 if json_path: 4509 this = self.expression( 4510 exp.JSONExtract, 4511 this=this, 4512 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4513 ) 4514 4515 while casts: 4516 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4517 4518 return this 4519 4520 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4521 this = self._parse_bracket(this) 4522 4523 while self._match_set(self.COLUMN_OPERATORS): 4524 op_token = self._prev.token_type 4525 op = self.COLUMN_OPERATORS.get(op_token) 4526 4527 if op_token == TokenType.DCOLON: 4528 field = self._parse_types() 4529 if not field: 4530 self.raise_error("Expected type") 4531 elif op and self._curr: 4532 field = self._parse_column_reference() 4533 else: 4534 field = self._parse_field(any_token=True, anonymous_func=True) 4535 4536 if isinstance(field, exp.Func) and this: 4537 # bigquery allows function calls like x.y.count(...) 4538 # SAFE.SUBSTR(...) 4539 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4540 this = exp.replace_tree( 4541 this, 4542 lambda n: ( 4543 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4544 if n.table 4545 else n.this 4546 ) 4547 if isinstance(n, exp.Column) 4548 else n, 4549 ) 4550 4551 if op: 4552 this = op(self, this, field) 4553 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4554 this = self.expression( 4555 exp.Column, 4556 this=field, 4557 table=this.this, 4558 db=this.args.get("table"), 4559 catalog=this.args.get("db"), 4560 ) 4561 else: 4562 this = self.expression(exp.Dot, this=this, expression=field) 4563 4564 this = self._parse_bracket(this) 4565 4566 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4567 4568 def _parse_primary(self) -> t.Optional[exp.Expression]: 4569 if self._match_set(self.PRIMARY_PARSERS): 4570 token_type = self._prev.token_type 4571 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4572 4573 if token_type == TokenType.STRING: 4574 expressions = [primary] 4575 while self._match(TokenType.STRING): 4576 expressions.append(exp.Literal.string(self._prev.text)) 4577 4578 if len(expressions) > 1: 4579 return self.expression(exp.Concat, expressions=expressions) 4580 4581 return primary 4582 4583 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4584 return exp.Literal.number(f"0.{self._prev.text}") 4585 4586 if self._match(TokenType.L_PAREN): 4587 comments = self._prev_comments 4588 query = self._parse_select() 4589 4590 if query: 4591 expressions = [query] 4592 else: 4593 expressions = self._parse_expressions() 4594 4595 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4596 4597 if not this and self._match(TokenType.R_PAREN, advance=False): 4598 this = self.expression(exp.Tuple) 4599 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4600 this = self._parse_subquery(this=this, parse_alias=False) 4601 elif isinstance(this, exp.Subquery): 4602 this = self._parse_subquery( 4603 this=self._parse_set_operations(this), parse_alias=False 4604 ) 4605 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4606 this = self.expression(exp.Tuple, expressions=expressions) 4607 else: 4608 this = self.expression(exp.Paren, this=this) 4609 4610 if this: 4611 this.add_comments(comments) 4612 4613 self._match_r_paren(expression=this) 4614 return this 4615 4616 return None 4617 4618 def _parse_field( 4619 self, 4620 any_token: bool = False, 4621 tokens: t.Optional[t.Collection[TokenType]] = None, 4622 anonymous_func: bool = False, 4623 ) -> t.Optional[exp.Expression]: 4624 if anonymous_func: 4625 field = ( 4626 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4627 or self._parse_primary() 4628 ) 4629 else: 4630 field = self._parse_primary() or self._parse_function( 4631 anonymous=anonymous_func, any_token=any_token 4632 ) 4633 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4634 4635 def _parse_function( 4636 self, 4637 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4638 anonymous: bool = False, 4639 optional_parens: bool = True, 4640 any_token: bool = False, 4641 ) -> t.Optional[exp.Expression]: 4642 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4643 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4644 fn_syntax = False 4645 if ( 4646 self._match(TokenType.L_BRACE, advance=False) 4647 and self._next 4648 and self._next.text.upper() == "FN" 4649 ): 4650 self._advance(2) 4651 fn_syntax = True 4652 4653 func = self._parse_function_call( 4654 functions=functions, 4655 anonymous=anonymous, 4656 optional_parens=optional_parens, 4657 any_token=any_token, 4658 ) 4659 4660 if fn_syntax: 4661 self._match(TokenType.R_BRACE) 4662 4663 return func 4664 4665 def _parse_function_call( 4666 self, 4667 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4668 anonymous: bool = False, 4669 optional_parens: bool = True, 4670 any_token: bool = False, 4671 ) -> t.Optional[exp.Expression]: 4672 if not self._curr: 4673 return None 4674 4675 comments = self._curr.comments 4676 token_type = self._curr.token_type 4677 this = self._curr.text 4678 upper = this.upper() 4679 4680 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4681 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4682 self._advance() 4683 return self._parse_window(parser(self)) 4684 4685 if not self._next or self._next.token_type != TokenType.L_PAREN: 4686 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4687 self._advance() 4688 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4689 4690 return None 4691 4692 if any_token: 4693 if token_type in self.RESERVED_TOKENS: 4694 return None 4695 elif token_type not in self.FUNC_TOKENS: 4696 return None 4697 4698 self._advance(2) 4699 4700 parser = self.FUNCTION_PARSERS.get(upper) 4701 if parser and not anonymous: 4702 this = parser(self) 4703 else: 4704 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4705 4706 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4707 this = self.expression(subquery_predicate, this=self._parse_select()) 4708 self._match_r_paren() 4709 return this 4710 4711 if functions is None: 4712 functions = self.FUNCTIONS 4713 4714 function = functions.get(upper) 4715 4716 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4717 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4718 4719 if alias: 4720 args = self._kv_to_prop_eq(args) 4721 4722 if function and not anonymous: 4723 if "dialect" in function.__code__.co_varnames: 4724 func = function(args, dialect=self.dialect) 4725 else: 4726 func = function(args) 4727 4728 func = self.validate_expression(func, args) 4729 if not self.dialect.NORMALIZE_FUNCTIONS: 4730 func.meta["name"] = this 4731 4732 this = func 4733 else: 4734 if token_type == TokenType.IDENTIFIER: 4735 this = exp.Identifier(this=this, quoted=True) 4736 this = self.expression(exp.Anonymous, this=this, expressions=args) 4737 4738 if isinstance(this, exp.Expression): 4739 this.add_comments(comments) 4740 4741 self._match_r_paren(this) 4742 return self._parse_window(this) 4743 4744 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4745 transformed = [] 4746 4747 for e in expressions: 4748 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4749 if isinstance(e, exp.Alias): 4750 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4751 4752 if not isinstance(e, exp.PropertyEQ): 4753 e = self.expression( 4754 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4755 ) 4756 4757 if isinstance(e.this, exp.Column): 4758 e.this.replace(e.this.this) 4759 4760 transformed.append(e) 4761 4762 return transformed 4763 4764 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4765 return self._parse_column_def(self._parse_id_var()) 4766 4767 def _parse_user_defined_function( 4768 self, kind: t.Optional[TokenType] = None 4769 ) -> t.Optional[exp.Expression]: 4770 this = self._parse_id_var() 4771 4772 while self._match(TokenType.DOT): 4773 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4774 4775 if not self._match(TokenType.L_PAREN): 4776 return this 4777 4778 expressions = self._parse_csv(self._parse_function_parameter) 4779 self._match_r_paren() 4780 return self.expression( 4781 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4782 ) 4783 4784 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4785 literal = self._parse_primary() 4786 if literal: 4787 return self.expression(exp.Introducer, this=token.text, expression=literal) 4788 4789 return self.expression(exp.Identifier, this=token.text) 4790 4791 def _parse_session_parameter(self) -> exp.SessionParameter: 4792 kind = None 4793 this = self._parse_id_var() or self._parse_primary() 4794 4795 if this and self._match(TokenType.DOT): 4796 kind = this.name 4797 this = self._parse_var() or self._parse_primary() 4798 4799 return self.expression(exp.SessionParameter, this=this, kind=kind) 4800 4801 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4802 return self._parse_id_var() 4803 4804 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4805 index = self._index 4806 4807 if self._match(TokenType.L_PAREN): 4808 expressions = t.cast( 4809 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4810 ) 4811 4812 if not self._match(TokenType.R_PAREN): 4813 self._retreat(index) 4814 else: 4815 expressions = [self._parse_lambda_arg()] 4816 4817 if self._match_set(self.LAMBDAS): 4818 return self.LAMBDAS[self._prev.token_type](self, expressions) 4819 4820 self._retreat(index) 4821 4822 this: t.Optional[exp.Expression] 4823 4824 if self._match(TokenType.DISTINCT): 4825 this = self.expression( 4826 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4827 ) 4828 else: 4829 this = self._parse_select_or_expression(alias=alias) 4830 4831 return self._parse_limit( 4832 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4833 ) 4834 4835 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4836 index = self._index 4837 if not self._match(TokenType.L_PAREN): 4838 return this 4839 4840 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4841 # expr can be of both types 4842 if self._match_set(self.SELECT_START_TOKENS): 4843 self._retreat(index) 4844 return this 4845 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4846 self._match_r_paren() 4847 return self.expression(exp.Schema, this=this, expressions=args) 4848 4849 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4850 return self._parse_column_def(self._parse_field(any_token=True)) 4851 4852 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4853 # column defs are not really columns, they're identifiers 4854 if isinstance(this, exp.Column): 4855 this = this.this 4856 4857 kind = self._parse_types(schema=True) 4858 4859 if self._match_text_seq("FOR", "ORDINALITY"): 4860 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4861 4862 constraints: t.List[exp.Expression] = [] 4863 4864 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4865 ("ALIAS", "MATERIALIZED") 4866 ): 4867 persisted = self._prev.text.upper() == "MATERIALIZED" 4868 constraints.append( 4869 self.expression( 4870 exp.ComputedColumnConstraint, 4871 this=self._parse_conjunction(), 4872 persisted=persisted or self._match_text_seq("PERSISTED"), 4873 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4874 ) 4875 ) 4876 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4877 self._match(TokenType.ALIAS) 4878 constraints.append( 4879 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4880 ) 4881 4882 while True: 4883 constraint = self._parse_column_constraint() 4884 if not constraint: 4885 break 4886 constraints.append(constraint) 4887 4888 if not kind and not constraints: 4889 return this 4890 4891 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4892 4893 def _parse_auto_increment( 4894 self, 4895 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4896 start = None 4897 increment = None 4898 4899 if self._match(TokenType.L_PAREN, advance=False): 4900 args = self._parse_wrapped_csv(self._parse_bitwise) 4901 start = seq_get(args, 0) 4902 increment = seq_get(args, 1) 4903 elif self._match_text_seq("START"): 4904 start = self._parse_bitwise() 4905 self._match_text_seq("INCREMENT") 4906 increment = self._parse_bitwise() 4907 4908 if start and increment: 4909 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4910 4911 return exp.AutoIncrementColumnConstraint() 4912 4913 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4914 if not self._match_text_seq("REFRESH"): 4915 self._retreat(self._index - 1) 4916 return None 4917 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4918 4919 def _parse_compress(self) -> exp.CompressColumnConstraint: 4920 if self._match(TokenType.L_PAREN, advance=False): 4921 return self.expression( 4922 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4923 ) 4924 4925 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4926 4927 def _parse_generated_as_identity( 4928 self, 4929 ) -> ( 4930 exp.GeneratedAsIdentityColumnConstraint 4931 | exp.ComputedColumnConstraint 4932 | exp.GeneratedAsRowColumnConstraint 4933 ): 4934 if self._match_text_seq("BY", "DEFAULT"): 4935 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4936 this = self.expression( 4937 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4938 ) 4939 else: 4940 self._match_text_seq("ALWAYS") 4941 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4942 4943 self._match(TokenType.ALIAS) 4944 4945 if self._match_text_seq("ROW"): 4946 start = self._match_text_seq("START") 4947 if not start: 4948 self._match(TokenType.END) 4949 hidden = self._match_text_seq("HIDDEN") 4950 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4951 4952 identity = self._match_text_seq("IDENTITY") 4953 4954 if self._match(TokenType.L_PAREN): 4955 if self._match(TokenType.START_WITH): 4956 this.set("start", self._parse_bitwise()) 4957 if self._match_text_seq("INCREMENT", "BY"): 4958 this.set("increment", self._parse_bitwise()) 4959 if self._match_text_seq("MINVALUE"): 4960 this.set("minvalue", self._parse_bitwise()) 4961 if self._match_text_seq("MAXVALUE"): 4962 this.set("maxvalue", self._parse_bitwise()) 4963 4964 if self._match_text_seq("CYCLE"): 4965 this.set("cycle", True) 4966 elif self._match_text_seq("NO", "CYCLE"): 4967 this.set("cycle", False) 4968 4969 if not identity: 4970 this.set("expression", self._parse_range()) 4971 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4972 args = self._parse_csv(self._parse_bitwise) 4973 this.set("start", seq_get(args, 0)) 4974 this.set("increment", seq_get(args, 1)) 4975 4976 self._match_r_paren() 4977 4978 return this 4979 4980 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4981 self._match_text_seq("LENGTH") 4982 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4983 4984 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4985 if self._match_text_seq("NULL"): 4986 return self.expression(exp.NotNullColumnConstraint) 4987 if self._match_text_seq("CASESPECIFIC"): 4988 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4989 if self._match_text_seq("FOR", "REPLICATION"): 4990 return self.expression(exp.NotForReplicationColumnConstraint) 4991 return None 4992 4993 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4994 if self._match(TokenType.CONSTRAINT): 4995 this = self._parse_id_var() 4996 else: 4997 this = None 4998 4999 if self._match_texts(self.CONSTRAINT_PARSERS): 5000 return self.expression( 5001 exp.ColumnConstraint, 5002 this=this, 5003 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5004 ) 5005 5006 return this 5007 5008 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5009 if not self._match(TokenType.CONSTRAINT): 5010 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5011 5012 return self.expression( 5013 exp.Constraint, 5014 this=self._parse_id_var(), 5015 expressions=self._parse_unnamed_constraints(), 5016 ) 5017 5018 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5019 constraints = [] 5020 while True: 5021 constraint = self._parse_unnamed_constraint() or self._parse_function() 5022 if not constraint: 5023 break 5024 constraints.append(constraint) 5025 5026 return constraints 5027 5028 def _parse_unnamed_constraint( 5029 self, constraints: t.Optional[t.Collection[str]] = None 5030 ) -> t.Optional[exp.Expression]: 5031 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5032 constraints or self.CONSTRAINT_PARSERS 5033 ): 5034 return None 5035 5036 constraint = self._prev.text.upper() 5037 if constraint not in self.CONSTRAINT_PARSERS: 5038 self.raise_error(f"No parser found for schema constraint {constraint}.") 5039 5040 return self.CONSTRAINT_PARSERS[constraint](self) 5041 5042 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5043 self._match_text_seq("KEY") 5044 return self.expression( 5045 exp.UniqueColumnConstraint, 5046 this=self._parse_schema(self._parse_id_var(any_token=False)), 5047 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5048 on_conflict=self._parse_on_conflict(), 5049 ) 5050 5051 def _parse_key_constraint_options(self) -> t.List[str]: 5052 options = [] 5053 while True: 5054 if not self._curr: 5055 break 5056 5057 if self._match(TokenType.ON): 5058 action = None 5059 on = self._advance_any() and self._prev.text 5060 5061 if self._match_text_seq("NO", "ACTION"): 5062 action = "NO ACTION" 5063 elif self._match_text_seq("CASCADE"): 5064 action = "CASCADE" 5065 elif self._match_text_seq("RESTRICT"): 5066 action = "RESTRICT" 5067 elif self._match_pair(TokenType.SET, TokenType.NULL): 5068 action = "SET NULL" 5069 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5070 action = "SET DEFAULT" 5071 else: 5072 self.raise_error("Invalid key constraint") 5073 5074 options.append(f"ON {on} {action}") 5075 elif self._match_text_seq("NOT", "ENFORCED"): 5076 options.append("NOT ENFORCED") 5077 elif self._match_text_seq("DEFERRABLE"): 5078 options.append("DEFERRABLE") 5079 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5080 options.append("INITIALLY DEFERRED") 5081 elif self._match_text_seq("NORELY"): 5082 options.append("NORELY") 5083 elif self._match_text_seq("MATCH", "FULL"): 5084 options.append("MATCH FULL") 5085 else: 5086 break 5087 5088 return options 5089 5090 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5091 if match and not self._match(TokenType.REFERENCES): 5092 return None 5093 5094 expressions = None 5095 this = self._parse_table(schema=True) 5096 options = self._parse_key_constraint_options() 5097 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5098 5099 def _parse_foreign_key(self) -> exp.ForeignKey: 5100 expressions = self._parse_wrapped_id_vars() 5101 reference = self._parse_references() 5102 options = {} 5103 5104 while self._match(TokenType.ON): 5105 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5106 self.raise_error("Expected DELETE or UPDATE") 5107 5108 kind = self._prev.text.lower() 5109 5110 if self._match_text_seq("NO", "ACTION"): 5111 action = "NO ACTION" 5112 elif self._match(TokenType.SET): 5113 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5114 action = "SET " + self._prev.text.upper() 5115 else: 5116 self._advance() 5117 action = self._prev.text.upper() 5118 5119 options[kind] = action 5120 5121 return self.expression( 5122 exp.ForeignKey, 5123 expressions=expressions, 5124 reference=reference, 5125 **options, # type: ignore 5126 ) 5127 5128 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5129 return self._parse_field() 5130 5131 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5132 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5133 self._retreat(self._index - 1) 5134 return None 5135 5136 id_vars = self._parse_wrapped_id_vars() 5137 return self.expression( 5138 exp.PeriodForSystemTimeConstraint, 5139 this=seq_get(id_vars, 0), 5140 expression=seq_get(id_vars, 1), 5141 ) 5142 5143 def _parse_primary_key( 5144 self, wrapped_optional: bool = False, in_props: bool = False 5145 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5146 desc = ( 5147 self._match_set((TokenType.ASC, TokenType.DESC)) 5148 and self._prev.token_type == TokenType.DESC 5149 ) 5150 5151 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5152 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5153 5154 expressions = self._parse_wrapped_csv( 5155 self._parse_primary_key_part, optional=wrapped_optional 5156 ) 5157 options = self._parse_key_constraint_options() 5158 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5159 5160 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5161 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 5162 5163 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5164 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5165 return this 5166 5167 bracket_kind = self._prev.token_type 5168 expressions = self._parse_csv( 5169 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5170 ) 5171 5172 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5173 self.raise_error("Expected ]") 5174 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5175 self.raise_error("Expected }") 5176 5177 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5178 if bracket_kind == TokenType.L_BRACE: 5179 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5180 elif not this or this.name.upper() == "ARRAY": 5181 this = self.expression(exp.Array, expressions=expressions) 5182 else: 5183 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5184 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5185 5186 self._add_comments(this) 5187 return self._parse_bracket(this) 5188 5189 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5190 if self._match(TokenType.COLON): 5191 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 5192 return this 5193 5194 def _parse_case(self) -> t.Optional[exp.Expression]: 5195 ifs = [] 5196 default = None 5197 5198 comments = self._prev_comments 5199 expression = self._parse_conjunction() 5200 5201 while self._match(TokenType.WHEN): 5202 this = self._parse_conjunction() 5203 self._match(TokenType.THEN) 5204 then = self._parse_conjunction() 5205 ifs.append(self.expression(exp.If, this=this, true=then)) 5206 5207 if self._match(TokenType.ELSE): 5208 default = self._parse_conjunction() 5209 5210 if not self._match(TokenType.END): 5211 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5212 default = exp.column("interval") 5213 else: 5214 self.raise_error("Expected END after CASE", self._prev) 5215 5216 return self.expression( 5217 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5218 ) 5219 5220 def _parse_if(self) -> t.Optional[exp.Expression]: 5221 if self._match(TokenType.L_PAREN): 5222 args = self._parse_csv(self._parse_conjunction) 5223 this = self.validate_expression(exp.If.from_arg_list(args), args) 5224 self._match_r_paren() 5225 else: 5226 index = self._index - 1 5227 5228 if self.NO_PAREN_IF_COMMANDS and index == 0: 5229 return self._parse_as_command(self._prev) 5230 5231 condition = self._parse_conjunction() 5232 5233 if not condition: 5234 self._retreat(index) 5235 return None 5236 5237 self._match(TokenType.THEN) 5238 true = self._parse_conjunction() 5239 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 5240 self._match(TokenType.END) 5241 this = self.expression(exp.If, this=condition, true=true, false=false) 5242 5243 return this 5244 5245 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5246 if not self._match_text_seq("VALUE", "FOR"): 5247 self._retreat(self._index - 1) 5248 return None 5249 5250 return self.expression( 5251 exp.NextValueFor, 5252 this=self._parse_column(), 5253 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5254 ) 5255 5256 def _parse_extract(self) -> exp.Extract: 5257 this = self._parse_function() or self._parse_var() or self._parse_type() 5258 5259 if self._match(TokenType.FROM): 5260 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5261 5262 if not self._match(TokenType.COMMA): 5263 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5264 5265 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5266 5267 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5268 this = self._parse_conjunction() 5269 5270 if not self._match(TokenType.ALIAS): 5271 if self._match(TokenType.COMMA): 5272 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5273 5274 self.raise_error("Expected AS after CAST") 5275 5276 fmt = None 5277 to = self._parse_types() 5278 5279 if self._match(TokenType.FORMAT): 5280 fmt_string = self._parse_string() 5281 fmt = self._parse_at_time_zone(fmt_string) 5282 5283 if not to: 5284 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5285 if to.this in exp.DataType.TEMPORAL_TYPES: 5286 this = self.expression( 5287 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5288 this=this, 5289 format=exp.Literal.string( 5290 format_time( 5291 fmt_string.this if fmt_string else "", 5292 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5293 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5294 ) 5295 ), 5296 ) 5297 5298 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5299 this.set("zone", fmt.args["zone"]) 5300 return this 5301 elif not to: 5302 self.raise_error("Expected TYPE after CAST") 5303 elif isinstance(to, exp.Identifier): 5304 to = exp.DataType.build(to.name, udt=True) 5305 elif to.this == exp.DataType.Type.CHAR: 5306 if self._match(TokenType.CHARACTER_SET): 5307 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5308 5309 return self.expression( 5310 exp.Cast if strict else exp.TryCast, 5311 this=this, 5312 to=to, 5313 format=fmt, 5314 safe=safe, 5315 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5316 ) 5317 5318 def _parse_string_agg(self) -> exp.Expression: 5319 if self._match(TokenType.DISTINCT): 5320 args: t.List[t.Optional[exp.Expression]] = [ 5321 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5322 ] 5323 if self._match(TokenType.COMMA): 5324 args.extend(self._parse_csv(self._parse_conjunction)) 5325 else: 5326 args = self._parse_csv(self._parse_conjunction) # type: ignore 5327 5328 index = self._index 5329 if not self._match(TokenType.R_PAREN) and args: 5330 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5331 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5332 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5333 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5334 5335 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5336 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5337 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5338 if not self._match_text_seq("WITHIN", "GROUP"): 5339 self._retreat(index) 5340 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5341 5342 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5343 order = self._parse_order(this=seq_get(args, 0)) 5344 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5345 5346 def _parse_convert( 5347 self, strict: bool, safe: t.Optional[bool] = None 5348 ) -> t.Optional[exp.Expression]: 5349 this = self._parse_bitwise() 5350 5351 if self._match(TokenType.USING): 5352 to: t.Optional[exp.Expression] = self.expression( 5353 exp.CharacterSet, this=self._parse_var() 5354 ) 5355 elif self._match(TokenType.COMMA): 5356 to = self._parse_types() 5357 else: 5358 to = None 5359 5360 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5361 5362 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5363 """ 5364 There are generally two variants of the DECODE function: 5365 5366 - DECODE(bin, charset) 5367 - DECODE(expression, search, result [, search, result] ... [, default]) 5368 5369 The second variant will always be parsed into a CASE expression. Note that NULL 5370 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5371 instead of relying on pattern matching. 5372 """ 5373 args = self._parse_csv(self._parse_conjunction) 5374 5375 if len(args) < 3: 5376 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5377 5378 expression, *expressions = args 5379 if not expression: 5380 return None 5381 5382 ifs = [] 5383 for search, result in zip(expressions[::2], expressions[1::2]): 5384 if not search or not result: 5385 return None 5386 5387 if isinstance(search, exp.Literal): 5388 ifs.append( 5389 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5390 ) 5391 elif isinstance(search, exp.Null): 5392 ifs.append( 5393 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5394 ) 5395 else: 5396 cond = exp.or_( 5397 exp.EQ(this=expression.copy(), expression=search), 5398 exp.and_( 5399 exp.Is(this=expression.copy(), expression=exp.Null()), 5400 exp.Is(this=search.copy(), expression=exp.Null()), 5401 copy=False, 5402 ), 5403 copy=False, 5404 ) 5405 ifs.append(exp.If(this=cond, true=result)) 5406 5407 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5408 5409 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5410 self._match_text_seq("KEY") 5411 key = self._parse_column() 5412 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5413 self._match_text_seq("VALUE") 5414 value = self._parse_bitwise() 5415 5416 if not key and not value: 5417 return None 5418 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5419 5420 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5421 if not this or not self._match_text_seq("FORMAT", "JSON"): 5422 return this 5423 5424 return self.expression(exp.FormatJson, this=this) 5425 5426 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5427 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5428 for value in values: 5429 if self._match_text_seq(value, "ON", on): 5430 return f"{value} ON {on}" 5431 5432 return None 5433 5434 @t.overload 5435 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5436 5437 @t.overload 5438 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5439 5440 def _parse_json_object(self, agg=False): 5441 star = self._parse_star() 5442 expressions = ( 5443 [star] 5444 if star 5445 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5446 ) 5447 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5448 5449 unique_keys = None 5450 if self._match_text_seq("WITH", "UNIQUE"): 5451 unique_keys = True 5452 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5453 unique_keys = False 5454 5455 self._match_text_seq("KEYS") 5456 5457 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5458 self._parse_type() 5459 ) 5460 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5461 5462 return self.expression( 5463 exp.JSONObjectAgg if agg else exp.JSONObject, 5464 expressions=expressions, 5465 null_handling=null_handling, 5466 unique_keys=unique_keys, 5467 return_type=return_type, 5468 encoding=encoding, 5469 ) 5470 5471 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5472 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5473 if not self._match_text_seq("NESTED"): 5474 this = self._parse_id_var() 5475 kind = self._parse_types(allow_identifiers=False) 5476 nested = None 5477 else: 5478 this = None 5479 kind = None 5480 nested = True 5481 5482 path = self._match_text_seq("PATH") and self._parse_string() 5483 nested_schema = nested and self._parse_json_schema() 5484 5485 return self.expression( 5486 exp.JSONColumnDef, 5487 this=this, 5488 kind=kind, 5489 path=path, 5490 nested_schema=nested_schema, 5491 ) 5492 5493 def _parse_json_schema(self) -> exp.JSONSchema: 5494 self._match_text_seq("COLUMNS") 5495 return self.expression( 5496 exp.JSONSchema, 5497 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5498 ) 5499 5500 def _parse_json_table(self) -> exp.JSONTable: 5501 this = self._parse_format_json(self._parse_bitwise()) 5502 path = self._match(TokenType.COMMA) and self._parse_string() 5503 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5504 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5505 schema = self._parse_json_schema() 5506 5507 return exp.JSONTable( 5508 this=this, 5509 schema=schema, 5510 path=path, 5511 error_handling=error_handling, 5512 empty_handling=empty_handling, 5513 ) 5514 5515 def _parse_match_against(self) -> exp.MatchAgainst: 5516 expressions = self._parse_csv(self._parse_column) 5517 5518 self._match_text_seq(")", "AGAINST", "(") 5519 5520 this = self._parse_string() 5521 5522 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5523 modifier = "IN NATURAL LANGUAGE MODE" 5524 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5525 modifier = f"{modifier} WITH QUERY EXPANSION" 5526 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5527 modifier = "IN BOOLEAN MODE" 5528 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5529 modifier = "WITH QUERY EXPANSION" 5530 else: 5531 modifier = None 5532 5533 return self.expression( 5534 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5535 ) 5536 5537 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5538 def _parse_open_json(self) -> exp.OpenJSON: 5539 this = self._parse_bitwise() 5540 path = self._match(TokenType.COMMA) and self._parse_string() 5541 5542 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5543 this = self._parse_field(any_token=True) 5544 kind = self._parse_types() 5545 path = self._parse_string() 5546 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5547 5548 return self.expression( 5549 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5550 ) 5551 5552 expressions = None 5553 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5554 self._match_l_paren() 5555 expressions = self._parse_csv(_parse_open_json_column_def) 5556 5557 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5558 5559 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5560 args = self._parse_csv(self._parse_bitwise) 5561 5562 if self._match(TokenType.IN): 5563 return self.expression( 5564 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5565 ) 5566 5567 if haystack_first: 5568 haystack = seq_get(args, 0) 5569 needle = seq_get(args, 1) 5570 else: 5571 needle = seq_get(args, 0) 5572 haystack = seq_get(args, 1) 5573 5574 return self.expression( 5575 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5576 ) 5577 5578 def _parse_predict(self) -> exp.Predict: 5579 self._match_text_seq("MODEL") 5580 this = self._parse_table() 5581 5582 self._match(TokenType.COMMA) 5583 self._match_text_seq("TABLE") 5584 5585 return self.expression( 5586 exp.Predict, 5587 this=this, 5588 expression=self._parse_table(), 5589 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5590 ) 5591 5592 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5593 args = self._parse_csv(self._parse_table) 5594 return exp.JoinHint(this=func_name.upper(), expressions=args) 5595 5596 def _parse_substring(self) -> exp.Substring: 5597 # Postgres supports the form: substring(string [from int] [for int]) 5598 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5599 5600 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5601 5602 if self._match(TokenType.FROM): 5603 args.append(self._parse_bitwise()) 5604 if self._match(TokenType.FOR): 5605 if len(args) == 1: 5606 args.append(exp.Literal.number(1)) 5607 args.append(self._parse_bitwise()) 5608 5609 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5610 5611 def _parse_trim(self) -> exp.Trim: 5612 # https://www.w3resource.com/sql/character-functions/trim.php 5613 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5614 5615 position = None 5616 collation = None 5617 expression = None 5618 5619 if self._match_texts(self.TRIM_TYPES): 5620 position = self._prev.text.upper() 5621 5622 this = self._parse_bitwise() 5623 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5624 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5625 expression = self._parse_bitwise() 5626 5627 if invert_order: 5628 this, expression = expression, this 5629 5630 if self._match(TokenType.COLLATE): 5631 collation = self._parse_bitwise() 5632 5633 return self.expression( 5634 exp.Trim, this=this, position=position, expression=expression, collation=collation 5635 ) 5636 5637 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5638 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5639 5640 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5641 return self._parse_window(self._parse_id_var(), alias=True) 5642 5643 def _parse_respect_or_ignore_nulls( 5644 self, this: t.Optional[exp.Expression] 5645 ) -> t.Optional[exp.Expression]: 5646 if self._match_text_seq("IGNORE", "NULLS"): 5647 return self.expression(exp.IgnoreNulls, this=this) 5648 if self._match_text_seq("RESPECT", "NULLS"): 5649 return self.expression(exp.RespectNulls, this=this) 5650 return this 5651 5652 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5653 if self._match(TokenType.HAVING): 5654 self._match_texts(("MAX", "MIN")) 5655 max = self._prev.text.upper() != "MIN" 5656 return self.expression( 5657 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5658 ) 5659 5660 return this 5661 5662 def _parse_window( 5663 self, this: t.Optional[exp.Expression], alias: bool = False 5664 ) -> t.Optional[exp.Expression]: 5665 func = this 5666 comments = func.comments if isinstance(func, exp.Expression) else None 5667 5668 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5669 self._match(TokenType.WHERE) 5670 this = self.expression( 5671 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5672 ) 5673 self._match_r_paren() 5674 5675 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5676 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5677 if self._match_text_seq("WITHIN", "GROUP"): 5678 order = self._parse_wrapped(self._parse_order) 5679 this = self.expression(exp.WithinGroup, this=this, expression=order) 5680 5681 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5682 # Some dialects choose to implement and some do not. 5683 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5684 5685 # There is some code above in _parse_lambda that handles 5686 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5687 5688 # The below changes handle 5689 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5690 5691 # Oracle allows both formats 5692 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5693 # and Snowflake chose to do the same for familiarity 5694 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5695 if isinstance(this, exp.AggFunc): 5696 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5697 5698 if ignore_respect and ignore_respect is not this: 5699 ignore_respect.replace(ignore_respect.this) 5700 this = self.expression(ignore_respect.__class__, this=this) 5701 5702 this = self._parse_respect_or_ignore_nulls(this) 5703 5704 # bigquery select from window x AS (partition by ...) 5705 if alias: 5706 over = None 5707 self._match(TokenType.ALIAS) 5708 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5709 return this 5710 else: 5711 over = self._prev.text.upper() 5712 5713 if comments and isinstance(func, exp.Expression): 5714 func.pop_comments() 5715 5716 if not self._match(TokenType.L_PAREN): 5717 return self.expression( 5718 exp.Window, 5719 comments=comments, 5720 this=this, 5721 alias=self._parse_id_var(False), 5722 over=over, 5723 ) 5724 5725 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5726 5727 first = self._match(TokenType.FIRST) 5728 if self._match_text_seq("LAST"): 5729 first = False 5730 5731 partition, order = self._parse_partition_and_order() 5732 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5733 5734 if kind: 5735 self._match(TokenType.BETWEEN) 5736 start = self._parse_window_spec() 5737 self._match(TokenType.AND) 5738 end = self._parse_window_spec() 5739 5740 spec = self.expression( 5741 exp.WindowSpec, 5742 kind=kind, 5743 start=start["value"], 5744 start_side=start["side"], 5745 end=end["value"], 5746 end_side=end["side"], 5747 ) 5748 else: 5749 spec = None 5750 5751 self._match_r_paren() 5752 5753 window = self.expression( 5754 exp.Window, 5755 comments=comments, 5756 this=this, 5757 partition_by=partition, 5758 order=order, 5759 spec=spec, 5760 alias=window_alias, 5761 over=over, 5762 first=first, 5763 ) 5764 5765 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5766 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5767 return self._parse_window(window, alias=alias) 5768 5769 return window 5770 5771 def _parse_partition_and_order( 5772 self, 5773 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5774 return self._parse_partition_by(), self._parse_order() 5775 5776 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5777 self._match(TokenType.BETWEEN) 5778 5779 return { 5780 "value": ( 5781 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5782 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5783 or self._parse_bitwise() 5784 ), 5785 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5786 } 5787 5788 def _parse_alias( 5789 self, this: t.Optional[exp.Expression], explicit: bool = False 5790 ) -> t.Optional[exp.Expression]: 5791 any_token = self._match(TokenType.ALIAS) 5792 comments = self._prev_comments or [] 5793 5794 if explicit and not any_token: 5795 return this 5796 5797 if self._match(TokenType.L_PAREN): 5798 aliases = self.expression( 5799 exp.Aliases, 5800 comments=comments, 5801 this=this, 5802 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5803 ) 5804 self._match_r_paren(aliases) 5805 return aliases 5806 5807 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5808 self.STRING_ALIASES and self._parse_string_as_identifier() 5809 ) 5810 5811 if alias: 5812 comments.extend(alias.pop_comments()) 5813 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5814 column = this.this 5815 5816 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5817 if not this.comments and column and column.comments: 5818 this.comments = column.pop_comments() 5819 5820 return this 5821 5822 def _parse_id_var( 5823 self, 5824 any_token: bool = True, 5825 tokens: t.Optional[t.Collection[TokenType]] = None, 5826 ) -> t.Optional[exp.Expression]: 5827 expression = self._parse_identifier() 5828 if not expression and ( 5829 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5830 ): 5831 quoted = self._prev.token_type == TokenType.STRING 5832 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5833 5834 return expression 5835 5836 def _parse_string(self) -> t.Optional[exp.Expression]: 5837 if self._match_set(self.STRING_PARSERS): 5838 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5839 return self._parse_placeholder() 5840 5841 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5842 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5843 5844 def _parse_number(self) -> t.Optional[exp.Expression]: 5845 if self._match_set(self.NUMERIC_PARSERS): 5846 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5847 return self._parse_placeholder() 5848 5849 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5850 if self._match(TokenType.IDENTIFIER): 5851 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5852 return self._parse_placeholder() 5853 5854 def _parse_var( 5855 self, 5856 any_token: bool = False, 5857 tokens: t.Optional[t.Collection[TokenType]] = None, 5858 upper: bool = False, 5859 ) -> t.Optional[exp.Expression]: 5860 if ( 5861 (any_token and self._advance_any()) 5862 or self._match(TokenType.VAR) 5863 or (self._match_set(tokens) if tokens else False) 5864 ): 5865 return self.expression( 5866 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5867 ) 5868 return self._parse_placeholder() 5869 5870 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5871 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5872 self._advance() 5873 return self._prev 5874 return None 5875 5876 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5877 return self._parse_var() or self._parse_string() 5878 5879 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5880 return self._parse_primary() or self._parse_var(any_token=True) 5881 5882 def _parse_null(self) -> t.Optional[exp.Expression]: 5883 if self._match_set(self.NULL_TOKENS): 5884 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5885 return self._parse_placeholder() 5886 5887 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5888 if self._match(TokenType.TRUE): 5889 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5890 if self._match(TokenType.FALSE): 5891 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5892 return self._parse_placeholder() 5893 5894 def _parse_star(self) -> t.Optional[exp.Expression]: 5895 if self._match(TokenType.STAR): 5896 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5897 return self._parse_placeholder() 5898 5899 def _parse_parameter(self) -> exp.Parameter: 5900 this = self._parse_identifier() or self._parse_primary_or_var() 5901 return self.expression(exp.Parameter, this=this) 5902 5903 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5904 if self._match_set(self.PLACEHOLDER_PARSERS): 5905 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5906 if placeholder: 5907 return placeholder 5908 self._advance(-1) 5909 return None 5910 5911 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5912 if not self._match_texts(keywords): 5913 return None 5914 if self._match(TokenType.L_PAREN, advance=False): 5915 return self._parse_wrapped_csv(self._parse_expression) 5916 5917 expression = self._parse_expression() 5918 return [expression] if expression else None 5919 5920 def _parse_csv( 5921 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5922 ) -> t.List[exp.Expression]: 5923 parse_result = parse_method() 5924 items = [parse_result] if parse_result is not None else [] 5925 5926 while self._match(sep): 5927 self._add_comments(parse_result) 5928 parse_result = parse_method() 5929 if parse_result is not None: 5930 items.append(parse_result) 5931 5932 return items 5933 5934 def _parse_tokens( 5935 self, parse_method: t.Callable, expressions: t.Dict 5936 ) -> t.Optional[exp.Expression]: 5937 this = parse_method() 5938 5939 while self._match_set(expressions): 5940 this = self.expression( 5941 expressions[self._prev.token_type], 5942 this=this, 5943 comments=self._prev_comments, 5944 expression=parse_method(), 5945 ) 5946 5947 return this 5948 5949 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5950 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5951 5952 def _parse_wrapped_csv( 5953 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5954 ) -> t.List[exp.Expression]: 5955 return self._parse_wrapped( 5956 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5957 ) 5958 5959 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5960 wrapped = self._match(TokenType.L_PAREN) 5961 if not wrapped and not optional: 5962 self.raise_error("Expecting (") 5963 parse_result = parse_method() 5964 if wrapped: 5965 self._match_r_paren() 5966 return parse_result 5967 5968 def _parse_expressions(self) -> t.List[exp.Expression]: 5969 return self._parse_csv(self._parse_expression) 5970 5971 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5972 return self._parse_select() or self._parse_set_operations( 5973 self._parse_expression() if alias else self._parse_conjunction() 5974 ) 5975 5976 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5977 return self._parse_query_modifiers( 5978 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5979 ) 5980 5981 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5982 this = None 5983 if self._match_texts(self.TRANSACTION_KIND): 5984 this = self._prev.text 5985 5986 self._match_texts(("TRANSACTION", "WORK")) 5987 5988 modes = [] 5989 while True: 5990 mode = [] 5991 while self._match(TokenType.VAR): 5992 mode.append(self._prev.text) 5993 5994 if mode: 5995 modes.append(" ".join(mode)) 5996 if not self._match(TokenType.COMMA): 5997 break 5998 5999 return self.expression(exp.Transaction, this=this, modes=modes) 6000 6001 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6002 chain = None 6003 savepoint = None 6004 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6005 6006 self._match_texts(("TRANSACTION", "WORK")) 6007 6008 if self._match_text_seq("TO"): 6009 self._match_text_seq("SAVEPOINT") 6010 savepoint = self._parse_id_var() 6011 6012 if self._match(TokenType.AND): 6013 chain = not self._match_text_seq("NO") 6014 self._match_text_seq("CHAIN") 6015 6016 if is_rollback: 6017 return self.expression(exp.Rollback, savepoint=savepoint) 6018 6019 return self.expression(exp.Commit, chain=chain) 6020 6021 def _parse_refresh(self) -> exp.Refresh: 6022 self._match(TokenType.TABLE) 6023 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6024 6025 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6026 if not self._match_text_seq("ADD"): 6027 return None 6028 6029 self._match(TokenType.COLUMN) 6030 exists_column = self._parse_exists(not_=True) 6031 expression = self._parse_field_def() 6032 6033 if expression: 6034 expression.set("exists", exists_column) 6035 6036 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6037 if self._match_texts(("FIRST", "AFTER")): 6038 position = self._prev.text 6039 column_position = self.expression( 6040 exp.ColumnPosition, this=self._parse_column(), position=position 6041 ) 6042 expression.set("position", column_position) 6043 6044 return expression 6045 6046 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6047 drop = self._match(TokenType.DROP) and self._parse_drop() 6048 if drop and not isinstance(drop, exp.Command): 6049 drop.set("kind", drop.args.get("kind", "COLUMN")) 6050 return drop 6051 6052 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6053 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6054 return self.expression( 6055 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6056 ) 6057 6058 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6059 index = self._index - 1 6060 6061 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6062 return self._parse_csv( 6063 lambda: self.expression( 6064 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6065 ) 6066 ) 6067 6068 self._retreat(index) 6069 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6070 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6071 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6072 6073 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6074 if self._match_texts(self.ALTER_ALTER_PARSERS): 6075 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6076 6077 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6078 # keyword after ALTER we default to parsing this statement 6079 self._match(TokenType.COLUMN) 6080 column = self._parse_field(any_token=True) 6081 6082 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6083 return self.expression(exp.AlterColumn, this=column, drop=True) 6084 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6085 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 6086 if self._match(TokenType.COMMENT): 6087 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6088 if self._match_text_seq("DROP", "NOT", "NULL"): 6089 return self.expression( 6090 exp.AlterColumn, 6091 this=column, 6092 drop=True, 6093 allow_null=True, 6094 ) 6095 if self._match_text_seq("SET", "NOT", "NULL"): 6096 return self.expression( 6097 exp.AlterColumn, 6098 this=column, 6099 allow_null=False, 6100 ) 6101 self._match_text_seq("SET", "DATA") 6102 self._match_text_seq("TYPE") 6103 return self.expression( 6104 exp.AlterColumn, 6105 this=column, 6106 dtype=self._parse_types(), 6107 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6108 using=self._match(TokenType.USING) and self._parse_conjunction(), 6109 ) 6110 6111 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6112 if self._match_texts(("ALL", "EVEN", "AUTO")): 6113 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6114 6115 self._match_text_seq("KEY", "DISTKEY") 6116 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6117 6118 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6119 if compound: 6120 self._match_text_seq("SORTKEY") 6121 6122 if self._match(TokenType.L_PAREN, advance=False): 6123 return self.expression( 6124 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6125 ) 6126 6127 self._match_texts(("AUTO", "NONE")) 6128 return self.expression( 6129 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6130 ) 6131 6132 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6133 index = self._index - 1 6134 6135 partition_exists = self._parse_exists() 6136 if self._match(TokenType.PARTITION, advance=False): 6137 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6138 6139 self._retreat(index) 6140 return self._parse_csv(self._parse_drop_column) 6141 6142 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6143 if self._match(TokenType.COLUMN): 6144 exists = self._parse_exists() 6145 old_column = self._parse_column() 6146 to = self._match_text_seq("TO") 6147 new_column = self._parse_column() 6148 6149 if old_column is None or to is None or new_column is None: 6150 return None 6151 6152 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6153 6154 self._match_text_seq("TO") 6155 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6156 6157 def _parse_alter_table_set(self) -> exp.AlterSet: 6158 alter_set = self.expression(exp.AlterSet) 6159 6160 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6161 "TABLE", "PROPERTIES" 6162 ): 6163 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_conjunction)) 6164 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6165 alter_set.set("expressions", [self._parse_conjunction()]) 6166 elif self._match_texts(("LOGGED", "UNLOGGED")): 6167 alter_set.set("option", exp.var(self._prev.text.upper())) 6168 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6169 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6170 elif self._match_text_seq("LOCATION"): 6171 alter_set.set("location", self._parse_field()) 6172 elif self._match_text_seq("ACCESS", "METHOD"): 6173 alter_set.set("access_method", self._parse_field()) 6174 elif self._match_text_seq("TABLESPACE"): 6175 alter_set.set("tablespace", self._parse_field()) 6176 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6177 alter_set.set("file_format", [self._parse_field()]) 6178 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6179 alter_set.set("file_format", self._parse_wrapped_options()) 6180 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6181 alter_set.set("copy_options", self._parse_wrapped_options()) 6182 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6183 alter_set.set("tag", self._parse_csv(self._parse_conjunction)) 6184 else: 6185 if self._match_text_seq("SERDE"): 6186 alter_set.set("serde", self._parse_field()) 6187 6188 alter_set.set("expressions", [self._parse_properties()]) 6189 6190 return alter_set 6191 6192 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6193 start = self._prev 6194 6195 if not self._match(TokenType.TABLE): 6196 return self._parse_as_command(start) 6197 6198 exists = self._parse_exists() 6199 only = self._match_text_seq("ONLY") 6200 this = self._parse_table(schema=True) 6201 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6202 6203 if self._next: 6204 self._advance() 6205 6206 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6207 if parser: 6208 actions = ensure_list(parser(self)) 6209 options = self._parse_csv(self._parse_property) 6210 6211 if not self._curr and actions: 6212 return self.expression( 6213 exp.AlterTable, 6214 this=this, 6215 exists=exists, 6216 actions=actions, 6217 only=only, 6218 options=options, 6219 cluster=cluster, 6220 ) 6221 6222 return self._parse_as_command(start) 6223 6224 def _parse_merge(self) -> exp.Merge: 6225 self._match(TokenType.INTO) 6226 target = self._parse_table() 6227 6228 if target and self._match(TokenType.ALIAS, advance=False): 6229 target.set("alias", self._parse_table_alias()) 6230 6231 self._match(TokenType.USING) 6232 using = self._parse_table() 6233 6234 self._match(TokenType.ON) 6235 on = self._parse_conjunction() 6236 6237 return self.expression( 6238 exp.Merge, 6239 this=target, 6240 using=using, 6241 on=on, 6242 expressions=self._parse_when_matched(), 6243 ) 6244 6245 def _parse_when_matched(self) -> t.List[exp.When]: 6246 whens = [] 6247 6248 while self._match(TokenType.WHEN): 6249 matched = not self._match(TokenType.NOT) 6250 self._match_text_seq("MATCHED") 6251 source = ( 6252 False 6253 if self._match_text_seq("BY", "TARGET") 6254 else self._match_text_seq("BY", "SOURCE") 6255 ) 6256 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 6257 6258 self._match(TokenType.THEN) 6259 6260 if self._match(TokenType.INSERT): 6261 _this = self._parse_star() 6262 if _this: 6263 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6264 else: 6265 then = self.expression( 6266 exp.Insert, 6267 this=self._parse_value(), 6268 expression=self._match_text_seq("VALUES") and self._parse_value(), 6269 ) 6270 elif self._match(TokenType.UPDATE): 6271 expressions = self._parse_star() 6272 if expressions: 6273 then = self.expression(exp.Update, expressions=expressions) 6274 else: 6275 then = self.expression( 6276 exp.Update, 6277 expressions=self._match(TokenType.SET) 6278 and self._parse_csv(self._parse_equality), 6279 ) 6280 elif self._match(TokenType.DELETE): 6281 then = self.expression(exp.Var, this=self._prev.text) 6282 else: 6283 then = None 6284 6285 whens.append( 6286 self.expression( 6287 exp.When, 6288 matched=matched, 6289 source=source, 6290 condition=condition, 6291 then=then, 6292 ) 6293 ) 6294 return whens 6295 6296 def _parse_show(self) -> t.Optional[exp.Expression]: 6297 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6298 if parser: 6299 return parser(self) 6300 return self._parse_as_command(self._prev) 6301 6302 def _parse_set_item_assignment( 6303 self, kind: t.Optional[str] = None 6304 ) -> t.Optional[exp.Expression]: 6305 index = self._index 6306 6307 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6308 return self._parse_set_transaction(global_=kind == "GLOBAL") 6309 6310 left = self._parse_primary() or self._parse_column() 6311 assignment_delimiter = self._match_texts(("=", "TO")) 6312 6313 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6314 self._retreat(index) 6315 return None 6316 6317 right = self._parse_statement() or self._parse_id_var() 6318 if isinstance(right, (exp.Column, exp.Identifier)): 6319 right = exp.var(right.name) 6320 6321 this = self.expression(exp.EQ, this=left, expression=right) 6322 return self.expression(exp.SetItem, this=this, kind=kind) 6323 6324 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6325 self._match_text_seq("TRANSACTION") 6326 characteristics = self._parse_csv( 6327 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6328 ) 6329 return self.expression( 6330 exp.SetItem, 6331 expressions=characteristics, 6332 kind="TRANSACTION", 6333 **{"global": global_}, # type: ignore 6334 ) 6335 6336 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6337 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6338 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6339 6340 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6341 index = self._index 6342 set_ = self.expression( 6343 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6344 ) 6345 6346 if self._curr: 6347 self._retreat(index) 6348 return self._parse_as_command(self._prev) 6349 6350 return set_ 6351 6352 def _parse_var_from_options( 6353 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6354 ) -> t.Optional[exp.Var]: 6355 start = self._curr 6356 if not start: 6357 return None 6358 6359 option = start.text.upper() 6360 continuations = options.get(option) 6361 6362 index = self._index 6363 self._advance() 6364 for keywords in continuations or []: 6365 if isinstance(keywords, str): 6366 keywords = (keywords,) 6367 6368 if self._match_text_seq(*keywords): 6369 option = f"{option} {' '.join(keywords)}" 6370 break 6371 else: 6372 if continuations or continuations is None: 6373 if raise_unmatched: 6374 self.raise_error(f"Unknown option {option}") 6375 6376 self._retreat(index) 6377 return None 6378 6379 return exp.var(option) 6380 6381 def _parse_as_command(self, start: Token) -> exp.Command: 6382 while self._curr: 6383 self._advance() 6384 text = self._find_sql(start, self._prev) 6385 size = len(start.text) 6386 self._warn_unsupported() 6387 return exp.Command(this=text[:size], expression=text[size:]) 6388 6389 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6390 settings = [] 6391 6392 self._match_l_paren() 6393 kind = self._parse_id_var() 6394 6395 if self._match(TokenType.L_PAREN): 6396 while True: 6397 key = self._parse_id_var() 6398 value = self._parse_primary() 6399 6400 if not key and value is None: 6401 break 6402 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6403 self._match(TokenType.R_PAREN) 6404 6405 self._match_r_paren() 6406 6407 return self.expression( 6408 exp.DictProperty, 6409 this=this, 6410 kind=kind.this if kind else None, 6411 settings=settings, 6412 ) 6413 6414 def _parse_dict_range(self, this: str) -> exp.DictRange: 6415 self._match_l_paren() 6416 has_min = self._match_text_seq("MIN") 6417 if has_min: 6418 min = self._parse_var() or self._parse_primary() 6419 self._match_text_seq("MAX") 6420 max = self._parse_var() or self._parse_primary() 6421 else: 6422 max = self._parse_var() or self._parse_primary() 6423 min = exp.Literal.number(0) 6424 self._match_r_paren() 6425 return self.expression(exp.DictRange, this=this, min=min, max=max) 6426 6427 def _parse_comprehension( 6428 self, this: t.Optional[exp.Expression] 6429 ) -> t.Optional[exp.Comprehension]: 6430 index = self._index 6431 expression = self._parse_column() 6432 if not self._match(TokenType.IN): 6433 self._retreat(index - 1) 6434 return None 6435 iterator = self._parse_column() 6436 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6437 return self.expression( 6438 exp.Comprehension, 6439 this=this, 6440 expression=expression, 6441 iterator=iterator, 6442 condition=condition, 6443 ) 6444 6445 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6446 if self._match(TokenType.HEREDOC_STRING): 6447 return self.expression(exp.Heredoc, this=self._prev.text) 6448 6449 if not self._match_text_seq("$"): 6450 return None 6451 6452 tags = ["$"] 6453 tag_text = None 6454 6455 if self._is_connected(): 6456 self._advance() 6457 tags.append(self._prev.text.upper()) 6458 else: 6459 self.raise_error("No closing $ found") 6460 6461 if tags[-1] != "$": 6462 if self._is_connected() and self._match_text_seq("$"): 6463 tag_text = tags[-1] 6464 tags.append("$") 6465 else: 6466 self.raise_error("No closing $ found") 6467 6468 heredoc_start = self._curr 6469 6470 while self._curr: 6471 if self._match_text_seq(*tags, advance=False): 6472 this = self._find_sql(heredoc_start, self._prev) 6473 self._advance(len(tags)) 6474 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6475 6476 self._advance() 6477 6478 self.raise_error(f"No closing {''.join(tags)} found") 6479 return None 6480 6481 def _find_parser( 6482 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6483 ) -> t.Optional[t.Callable]: 6484 if not self._curr: 6485 return None 6486 6487 index = self._index 6488 this = [] 6489 while True: 6490 # The current token might be multiple words 6491 curr = self._curr.text.upper() 6492 key = curr.split(" ") 6493 this.append(curr) 6494 6495 self._advance() 6496 result, trie = in_trie(trie, key) 6497 if result == TrieResult.FAILED: 6498 break 6499 6500 if result == TrieResult.EXISTS: 6501 subparser = parsers[" ".join(this)] 6502 return subparser 6503 6504 self._retreat(index) 6505 return None 6506 6507 def _match(self, token_type, advance=True, expression=None): 6508 if not self._curr: 6509 return None 6510 6511 if self._curr.token_type == token_type: 6512 if advance: 6513 self._advance() 6514 self._add_comments(expression) 6515 return True 6516 6517 return None 6518 6519 def _match_set(self, types, advance=True): 6520 if not self._curr: 6521 return None 6522 6523 if self._curr.token_type in types: 6524 if advance: 6525 self._advance() 6526 return True 6527 6528 return None 6529 6530 def _match_pair(self, token_type_a, token_type_b, advance=True): 6531 if not self._curr or not self._next: 6532 return None 6533 6534 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6535 if advance: 6536 self._advance(2) 6537 return True 6538 6539 return None 6540 6541 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6542 if not self._match(TokenType.L_PAREN, expression=expression): 6543 self.raise_error("Expecting (") 6544 6545 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6546 if not self._match(TokenType.R_PAREN, expression=expression): 6547 self.raise_error("Expecting )") 6548 6549 def _match_texts(self, texts, advance=True): 6550 if self._curr and self._curr.text.upper() in texts: 6551 if advance: 6552 self._advance() 6553 return True 6554 return None 6555 6556 def _match_text_seq(self, *texts, advance=True): 6557 index = self._index 6558 for text in texts: 6559 if self._curr and self._curr.text.upper() == text: 6560 self._advance() 6561 else: 6562 self._retreat(index) 6563 return None 6564 6565 if not advance: 6566 self._retreat(index) 6567 6568 return True 6569 6570 def _replace_lambda( 6571 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6572 ) -> t.Optional[exp.Expression]: 6573 if not node: 6574 return node 6575 6576 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6577 6578 for column in node.find_all(exp.Column): 6579 typ = lambda_types.get(column.parts[0].name) 6580 if typ is not None: 6581 dot_or_id = column.to_dot() if column.table else column.this 6582 6583 if typ: 6584 dot_or_id = self.expression( 6585 exp.Cast, 6586 this=dot_or_id, 6587 to=typ, 6588 ) 6589 6590 parent = column.parent 6591 6592 while isinstance(parent, exp.Dot): 6593 if not isinstance(parent.parent, exp.Dot): 6594 parent.replace(dot_or_id) 6595 break 6596 parent = parent.parent 6597 else: 6598 if column is node: 6599 node = dot_or_id 6600 else: 6601 column.replace(dot_or_id) 6602 return node 6603 6604 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6605 start = self._prev 6606 6607 # Not to be confused with TRUNCATE(number, decimals) function call 6608 if self._match(TokenType.L_PAREN): 6609 self._retreat(self._index - 2) 6610 return self._parse_function() 6611 6612 # Clickhouse supports TRUNCATE DATABASE as well 6613 is_database = self._match(TokenType.DATABASE) 6614 6615 self._match(TokenType.TABLE) 6616 6617 exists = self._parse_exists(not_=False) 6618 6619 expressions = self._parse_csv( 6620 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6621 ) 6622 6623 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6624 6625 if self._match_text_seq("RESTART", "IDENTITY"): 6626 identity = "RESTART" 6627 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6628 identity = "CONTINUE" 6629 else: 6630 identity = None 6631 6632 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6633 option = self._prev.text 6634 else: 6635 option = None 6636 6637 partition = self._parse_partition() 6638 6639 # Fallback case 6640 if self._curr: 6641 return self._parse_as_command(start) 6642 6643 return self.expression( 6644 exp.TruncateTable, 6645 expressions=expressions, 6646 is_database=is_database, 6647 exists=exists, 6648 cluster=cluster, 6649 identity=identity, 6650 option=option, 6651 partition=partition, 6652 ) 6653 6654 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6655 this = self._parse_ordered(self._parse_opclass) 6656 6657 if not self._match(TokenType.WITH): 6658 return this 6659 6660 op = self._parse_var(any_token=True) 6661 6662 return self.expression(exp.WithOperator, this=this, op=op) 6663 6664 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6665 self._match(TokenType.EQ) 6666 self._match(TokenType.L_PAREN) 6667 6668 opts: t.List[t.Optional[exp.Expression]] = [] 6669 while self._curr and not self._match(TokenType.R_PAREN): 6670 if self._match_text_seq("FORMAT_NAME", "="): 6671 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6672 # so we parse it separately to use _parse_field() 6673 prop = self.expression( 6674 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6675 ) 6676 opts.append(prop) 6677 else: 6678 opts.append(self._parse_property()) 6679 6680 self._match(TokenType.COMMA) 6681 6682 return opts 6683 6684 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6685 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6686 6687 options = [] 6688 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6689 option = self._parse_var(any_token=True) 6690 prev = self._prev.text.upper() 6691 6692 # Different dialects might separate options and values by white space, "=" and "AS" 6693 self._match(TokenType.EQ) 6694 self._match(TokenType.ALIAS) 6695 6696 param = self.expression(exp.CopyParameter, this=option) 6697 6698 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6699 TokenType.L_PAREN, advance=False 6700 ): 6701 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6702 param.set("expressions", self._parse_wrapped_options()) 6703 elif prev == "FILE_FORMAT": 6704 # T-SQL's external file format case 6705 param.set("expression", self._parse_field()) 6706 else: 6707 param.set("expression", self._parse_unquoted_field()) 6708 6709 options.append(param) 6710 self._match(sep) 6711 6712 return options 6713 6714 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6715 expr = self.expression(exp.Credentials) 6716 6717 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6718 expr.set("storage", self._parse_field()) 6719 if self._match_text_seq("CREDENTIALS"): 6720 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6721 creds = ( 6722 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6723 ) 6724 expr.set("credentials", creds) 6725 if self._match_text_seq("ENCRYPTION"): 6726 expr.set("encryption", self._parse_wrapped_options()) 6727 if self._match_text_seq("IAM_ROLE"): 6728 expr.set("iam_role", self._parse_field()) 6729 if self._match_text_seq("REGION"): 6730 expr.set("region", self._parse_field()) 6731 6732 return expr 6733 6734 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6735 return self._parse_field() 6736 6737 def _parse_copy(self) -> exp.Copy | exp.Command: 6738 start = self._prev 6739 6740 self._match(TokenType.INTO) 6741 6742 this = ( 6743 self._parse_select(nested=True, parse_subquery_alias=False) 6744 if self._match(TokenType.L_PAREN, advance=False) 6745 else self._parse_table(schema=True) 6746 ) 6747 6748 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6749 6750 files = self._parse_csv(self._parse_file_location) 6751 credentials = self._parse_credentials() 6752 6753 self._match_text_seq("WITH") 6754 6755 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6756 6757 # Fallback case 6758 if self._curr: 6759 return self._parse_as_command(start) 6760 6761 return self.expression( 6762 exp.Copy, 6763 this=this, 6764 kind=kind, 6765 credentials=credentials, 6766 files=files, 6767 params=params, 6768 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1210 def __init__( 1211 self, 1212 error_level: t.Optional[ErrorLevel] = None, 1213 error_message_context: int = 100, 1214 max_errors: int = 3, 1215 dialect: DialectType = None, 1216 ): 1217 from sqlglot.dialects import Dialect 1218 1219 self.error_level = error_level or ErrorLevel.IMMEDIATE 1220 self.error_message_context = error_message_context 1221 self.max_errors = max_errors 1222 self.dialect = Dialect.get_or_raise(dialect) 1223 self.reset()
1235 def parse( 1236 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1237 ) -> t.List[t.Optional[exp.Expression]]: 1238 """ 1239 Parses a list of tokens and returns a list of syntax trees, one tree 1240 per parsed SQL statement. 1241 1242 Args: 1243 raw_tokens: The list of tokens. 1244 sql: The original SQL string, used to produce helpful debug messages. 1245 1246 Returns: 1247 The list of the produced syntax trees. 1248 """ 1249 return self._parse( 1250 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1251 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1253 def parse_into( 1254 self, 1255 expression_types: exp.IntoType, 1256 raw_tokens: t.List[Token], 1257 sql: t.Optional[str] = None, 1258 ) -> t.List[t.Optional[exp.Expression]]: 1259 """ 1260 Parses a list of tokens into a given Expression type. If a collection of Expression 1261 types is given instead, this method will try to parse the token list into each one 1262 of them, stopping at the first for which the parsing succeeds. 1263 1264 Args: 1265 expression_types: The expression type(s) to try and parse the token list into. 1266 raw_tokens: The list of tokens. 1267 sql: The original SQL string, used to produce helpful debug messages. 1268 1269 Returns: 1270 The target Expression. 1271 """ 1272 errors = [] 1273 for expression_type in ensure_list(expression_types): 1274 parser = self.EXPRESSION_PARSERS.get(expression_type) 1275 if not parser: 1276 raise TypeError(f"No parser registered for {expression_type}") 1277 1278 try: 1279 return self._parse(parser, raw_tokens, sql) 1280 except ParseError as e: 1281 e.errors[0]["into_expression"] = expression_type 1282 errors.append(e) 1283 1284 raise ParseError( 1285 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1286 errors=merge_errors(errors), 1287 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1327 def check_errors(self) -> None: 1328 """Logs or raises any found errors, depending on the chosen error level setting.""" 1329 if self.error_level == ErrorLevel.WARN: 1330 for error in self.errors: 1331 logger.error(str(error)) 1332 elif self.error_level == ErrorLevel.RAISE and self.errors: 1333 raise ParseError( 1334 concat_messages(self.errors, self.max_errors), 1335 errors=merge_errors(self.errors), 1336 )
Logs or raises any found errors, depending on the chosen error level setting.
1338 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1339 """ 1340 Appends an error in the list of recorded errors or raises it, depending on the chosen 1341 error level setting. 1342 """ 1343 token = token or self._curr or self._prev or Token.string("") 1344 start = token.start 1345 end = token.end + 1 1346 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1347 highlight = self.sql[start:end] 1348 end_context = self.sql[end : end + self.error_message_context] 1349 1350 error = ParseError.new( 1351 f"{message}. Line {token.line}, Col: {token.col}.\n" 1352 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1353 description=message, 1354 line=token.line, 1355 col=token.col, 1356 start_context=start_context, 1357 highlight=highlight, 1358 end_context=end_context, 1359 ) 1360 1361 if self.error_level == ErrorLevel.IMMEDIATE: 1362 raise error 1363 1364 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1366 def expression( 1367 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1368 ) -> E: 1369 """ 1370 Creates a new, validated Expression. 1371 1372 Args: 1373 exp_class: The expression class to instantiate. 1374 comments: An optional list of comments to attach to the expression. 1375 kwargs: The arguments to set for the expression along with their respective values. 1376 1377 Returns: 1378 The target expression. 1379 """ 1380 instance = exp_class(**kwargs) 1381 instance.add_comments(comments) if comments else self._add_comments(instance) 1382 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1389 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1390 """ 1391 Validates an Expression, making sure that all its mandatory arguments are set. 1392 1393 Args: 1394 expression: The expression to validate. 1395 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1396 1397 Returns: 1398 The validated expression. 1399 """ 1400 if self.error_level != ErrorLevel.IGNORE: 1401 for error_message in expression.error_messages(args): 1402 self.raise_error(error_message) 1403 1404 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.