sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111class _Parser(type): 112 def __new__(cls, clsname, bases, attrs): 113 klass = super().__new__(cls, clsname, bases, attrs) 114 115 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 116 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 117 118 return klass 119 120 121class Parser(metaclass=_Parser): 122 """ 123 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 124 125 Args: 126 error_level: The desired error level. 127 Default: ErrorLevel.IMMEDIATE 128 error_message_context: The amount of context to capture from a query string when displaying 129 the error message (in number of characters). 130 Default: 100 131 max_errors: Maximum number of error messages to include in a raised ParseError. 132 This is only relevant if error_level is ErrorLevel.RAISE. 133 Default: 3 134 """ 135 136 FUNCTIONS: t.Dict[str, t.Callable] = { 137 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 138 "CONCAT": lambda args, dialect: exp.Concat( 139 expressions=args, 140 safe=not dialect.STRICT_STRING_CONCAT, 141 coalesce=dialect.CONCAT_COALESCE, 142 ), 143 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 144 expressions=args, 145 safe=not dialect.STRICT_STRING_CONCAT, 146 coalesce=dialect.CONCAT_COALESCE, 147 ), 148 "DATE_TO_DATE_STR": lambda args: exp.Cast( 149 this=seq_get(args, 0), 150 to=exp.DataType(this=exp.DataType.Type.TEXT), 151 ), 152 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 153 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 154 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 155 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 156 "LIKE": build_like, 157 "LOG": build_logarithm, 158 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 159 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 160 "MOD": build_mod, 161 "TIME_TO_TIME_STR": lambda args: exp.Cast( 162 this=seq_get(args, 0), 163 to=exp.DataType(this=exp.DataType.Type.TEXT), 164 ), 165 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 166 this=exp.Cast( 167 this=seq_get(args, 0), 168 to=exp.DataType(this=exp.DataType.Type.TEXT), 169 ), 170 start=exp.Literal.number(1), 171 length=exp.Literal.number(10), 172 ), 173 "VAR_MAP": build_var_map, 174 "LOWER": build_lower, 175 "UPPER": build_upper, 176 "HEX": build_hex, 177 "TO_HEX": build_hex, 178 } 179 180 NO_PAREN_FUNCTIONS = { 181 TokenType.CURRENT_DATE: exp.CurrentDate, 182 TokenType.CURRENT_DATETIME: exp.CurrentDate, 183 TokenType.CURRENT_TIME: exp.CurrentTime, 184 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 185 TokenType.CURRENT_USER: exp.CurrentUser, 186 } 187 188 STRUCT_TYPE_TOKENS = { 189 TokenType.NESTED, 190 TokenType.OBJECT, 191 TokenType.STRUCT, 192 } 193 194 NESTED_TYPE_TOKENS = { 195 TokenType.ARRAY, 196 TokenType.LOWCARDINALITY, 197 TokenType.MAP, 198 TokenType.NULLABLE, 199 *STRUCT_TYPE_TOKENS, 200 } 201 202 ENUM_TYPE_TOKENS = { 203 TokenType.ENUM, 204 TokenType.ENUM8, 205 TokenType.ENUM16, 206 } 207 208 AGGREGATE_TYPE_TOKENS = { 209 TokenType.AGGREGATEFUNCTION, 210 TokenType.SIMPLEAGGREGATEFUNCTION, 211 } 212 213 TYPE_TOKENS = { 214 TokenType.BIT, 215 TokenType.BOOLEAN, 216 TokenType.TINYINT, 217 TokenType.UTINYINT, 218 TokenType.SMALLINT, 219 TokenType.USMALLINT, 220 TokenType.INT, 221 TokenType.UINT, 222 TokenType.BIGINT, 223 TokenType.UBIGINT, 224 TokenType.INT128, 225 TokenType.UINT128, 226 TokenType.INT256, 227 TokenType.UINT256, 228 TokenType.MEDIUMINT, 229 TokenType.UMEDIUMINT, 230 TokenType.FIXEDSTRING, 231 TokenType.FLOAT, 232 TokenType.DOUBLE, 233 TokenType.CHAR, 234 TokenType.NCHAR, 235 TokenType.VARCHAR, 236 TokenType.NVARCHAR, 237 TokenType.BPCHAR, 238 TokenType.TEXT, 239 TokenType.MEDIUMTEXT, 240 TokenType.LONGTEXT, 241 TokenType.MEDIUMBLOB, 242 TokenType.LONGBLOB, 243 TokenType.BINARY, 244 TokenType.VARBINARY, 245 TokenType.JSON, 246 TokenType.JSONB, 247 TokenType.INTERVAL, 248 TokenType.TINYBLOB, 249 TokenType.TINYTEXT, 250 TokenType.TIME, 251 TokenType.TIMETZ, 252 TokenType.TIMESTAMP, 253 TokenType.TIMESTAMP_S, 254 TokenType.TIMESTAMP_MS, 255 TokenType.TIMESTAMP_NS, 256 TokenType.TIMESTAMPTZ, 257 TokenType.TIMESTAMPLTZ, 258 TokenType.TIMESTAMPNTZ, 259 TokenType.DATETIME, 260 TokenType.DATETIME64, 261 TokenType.DATE, 262 TokenType.DATE32, 263 TokenType.INT4RANGE, 264 TokenType.INT4MULTIRANGE, 265 TokenType.INT8RANGE, 266 TokenType.INT8MULTIRANGE, 267 TokenType.NUMRANGE, 268 TokenType.NUMMULTIRANGE, 269 TokenType.TSRANGE, 270 TokenType.TSMULTIRANGE, 271 TokenType.TSTZRANGE, 272 TokenType.TSTZMULTIRANGE, 273 TokenType.DATERANGE, 274 TokenType.DATEMULTIRANGE, 275 TokenType.DECIMAL, 276 TokenType.UDECIMAL, 277 TokenType.BIGDECIMAL, 278 TokenType.UUID, 279 TokenType.GEOGRAPHY, 280 TokenType.GEOMETRY, 281 TokenType.HLLSKETCH, 282 TokenType.HSTORE, 283 TokenType.PSEUDO_TYPE, 284 TokenType.SUPER, 285 TokenType.SERIAL, 286 TokenType.SMALLSERIAL, 287 TokenType.BIGSERIAL, 288 TokenType.XML, 289 TokenType.YEAR, 290 TokenType.UNIQUEIDENTIFIER, 291 TokenType.USERDEFINED, 292 TokenType.MONEY, 293 TokenType.SMALLMONEY, 294 TokenType.ROWVERSION, 295 TokenType.IMAGE, 296 TokenType.VARIANT, 297 TokenType.OBJECT, 298 TokenType.OBJECT_IDENTIFIER, 299 TokenType.INET, 300 TokenType.IPADDRESS, 301 TokenType.IPPREFIX, 302 TokenType.IPV4, 303 TokenType.IPV6, 304 TokenType.UNKNOWN, 305 TokenType.NULL, 306 TokenType.NAME, 307 TokenType.TDIGEST, 308 *ENUM_TYPE_TOKENS, 309 *NESTED_TYPE_TOKENS, 310 *AGGREGATE_TYPE_TOKENS, 311 } 312 313 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 314 TokenType.BIGINT: TokenType.UBIGINT, 315 TokenType.INT: TokenType.UINT, 316 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 317 TokenType.SMALLINT: TokenType.USMALLINT, 318 TokenType.TINYINT: TokenType.UTINYINT, 319 TokenType.DECIMAL: TokenType.UDECIMAL, 320 } 321 322 SUBQUERY_PREDICATES = { 323 TokenType.ANY: exp.Any, 324 TokenType.ALL: exp.All, 325 TokenType.EXISTS: exp.Exists, 326 TokenType.SOME: exp.Any, 327 } 328 329 RESERVED_TOKENS = { 330 *Tokenizer.SINGLE_TOKENS.values(), 331 TokenType.SELECT, 332 } - {TokenType.IDENTIFIER} 333 334 DB_CREATABLES = { 335 TokenType.DATABASE, 336 TokenType.DICTIONARY, 337 TokenType.MODEL, 338 TokenType.SCHEMA, 339 TokenType.SEQUENCE, 340 TokenType.STORAGE_INTEGRATION, 341 TokenType.TABLE, 342 TokenType.TAG, 343 TokenType.VIEW, 344 TokenType.WAREHOUSE, 345 TokenType.STREAMLIT, 346 } 347 348 CREATABLES = { 349 TokenType.COLUMN, 350 TokenType.CONSTRAINT, 351 TokenType.FOREIGN_KEY, 352 TokenType.FUNCTION, 353 TokenType.INDEX, 354 TokenType.PROCEDURE, 355 *DB_CREATABLES, 356 } 357 358 # Tokens that can represent identifiers 359 ID_VAR_TOKENS = { 360 TokenType.VAR, 361 TokenType.ANTI, 362 TokenType.APPLY, 363 TokenType.ASC, 364 TokenType.ASOF, 365 TokenType.AUTO_INCREMENT, 366 TokenType.BEGIN, 367 TokenType.BPCHAR, 368 TokenType.CACHE, 369 TokenType.CASE, 370 TokenType.COLLATE, 371 TokenType.COMMAND, 372 TokenType.COMMENT, 373 TokenType.COMMIT, 374 TokenType.CONSTRAINT, 375 TokenType.COPY, 376 TokenType.DEFAULT, 377 TokenType.DELETE, 378 TokenType.DESC, 379 TokenType.DESCRIBE, 380 TokenType.DICTIONARY, 381 TokenType.DIV, 382 TokenType.END, 383 TokenType.EXECUTE, 384 TokenType.ESCAPE, 385 TokenType.FALSE, 386 TokenType.FIRST, 387 TokenType.FILTER, 388 TokenType.FINAL, 389 TokenType.FORMAT, 390 TokenType.FULL, 391 TokenType.IDENTIFIER, 392 TokenType.IS, 393 TokenType.ISNULL, 394 TokenType.INTERVAL, 395 TokenType.KEEP, 396 TokenType.KILL, 397 TokenType.LEFT, 398 TokenType.LOAD, 399 TokenType.MERGE, 400 TokenType.NATURAL, 401 TokenType.NEXT, 402 TokenType.OFFSET, 403 TokenType.OPERATOR, 404 TokenType.ORDINALITY, 405 TokenType.OVERLAPS, 406 TokenType.OVERWRITE, 407 TokenType.PARTITION, 408 TokenType.PERCENT, 409 TokenType.PIVOT, 410 TokenType.PRAGMA, 411 TokenType.RANGE, 412 TokenType.RECURSIVE, 413 TokenType.REFERENCES, 414 TokenType.REFRESH, 415 TokenType.REPLACE, 416 TokenType.RIGHT, 417 TokenType.ROLLUP, 418 TokenType.ROW, 419 TokenType.ROWS, 420 TokenType.SEMI, 421 TokenType.SET, 422 TokenType.SETTINGS, 423 TokenType.SHOW, 424 TokenType.TEMPORARY, 425 TokenType.TOP, 426 TokenType.TRUE, 427 TokenType.TRUNCATE, 428 TokenType.UNIQUE, 429 TokenType.UNNEST, 430 TokenType.UNPIVOT, 431 TokenType.UPDATE, 432 TokenType.USE, 433 TokenType.VOLATILE, 434 TokenType.WINDOW, 435 *CREATABLES, 436 *SUBQUERY_PREDICATES, 437 *TYPE_TOKENS, 438 *NO_PAREN_FUNCTIONS, 439 } 440 441 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 442 443 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 444 TokenType.ANTI, 445 TokenType.APPLY, 446 TokenType.ASOF, 447 TokenType.FULL, 448 TokenType.LEFT, 449 TokenType.LOCK, 450 TokenType.NATURAL, 451 TokenType.OFFSET, 452 TokenType.RIGHT, 453 TokenType.SEMI, 454 TokenType.WINDOW, 455 } 456 457 ALIAS_TOKENS = ID_VAR_TOKENS 458 459 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 460 461 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 462 463 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 464 465 FUNC_TOKENS = { 466 TokenType.COLLATE, 467 TokenType.COMMAND, 468 TokenType.CURRENT_DATE, 469 TokenType.CURRENT_DATETIME, 470 TokenType.CURRENT_TIMESTAMP, 471 TokenType.CURRENT_TIME, 472 TokenType.CURRENT_USER, 473 TokenType.FILTER, 474 TokenType.FIRST, 475 TokenType.FORMAT, 476 TokenType.GLOB, 477 TokenType.IDENTIFIER, 478 TokenType.INDEX, 479 TokenType.ISNULL, 480 TokenType.ILIKE, 481 TokenType.INSERT, 482 TokenType.LIKE, 483 TokenType.MERGE, 484 TokenType.OFFSET, 485 TokenType.PRIMARY_KEY, 486 TokenType.RANGE, 487 TokenType.REPLACE, 488 TokenType.RLIKE, 489 TokenType.ROW, 490 TokenType.UNNEST, 491 TokenType.VAR, 492 TokenType.LEFT, 493 TokenType.RIGHT, 494 TokenType.SEQUENCE, 495 TokenType.DATE, 496 TokenType.DATETIME, 497 TokenType.TABLE, 498 TokenType.TIMESTAMP, 499 TokenType.TIMESTAMPTZ, 500 TokenType.TRUNCATE, 501 TokenType.WINDOW, 502 TokenType.XOR, 503 *TYPE_TOKENS, 504 *SUBQUERY_PREDICATES, 505 } 506 507 CONJUNCTION = { 508 TokenType.AND: exp.And, 509 TokenType.OR: exp.Or, 510 } 511 512 EQUALITY = { 513 TokenType.EQ: exp.EQ, 514 TokenType.NEQ: exp.NEQ, 515 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 516 } 517 518 COMPARISON = { 519 TokenType.GT: exp.GT, 520 TokenType.GTE: exp.GTE, 521 TokenType.LT: exp.LT, 522 TokenType.LTE: exp.LTE, 523 } 524 525 BITWISE = { 526 TokenType.AMP: exp.BitwiseAnd, 527 TokenType.CARET: exp.BitwiseXor, 528 TokenType.PIPE: exp.BitwiseOr, 529 } 530 531 TERM = { 532 TokenType.DASH: exp.Sub, 533 TokenType.PLUS: exp.Add, 534 TokenType.MOD: exp.Mod, 535 TokenType.COLLATE: exp.Collate, 536 } 537 538 FACTOR = { 539 TokenType.DIV: exp.IntDiv, 540 TokenType.LR_ARROW: exp.Distance, 541 TokenType.SLASH: exp.Div, 542 TokenType.STAR: exp.Mul, 543 } 544 545 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 546 547 TIMES = { 548 TokenType.TIME, 549 TokenType.TIMETZ, 550 } 551 552 TIMESTAMPS = { 553 TokenType.TIMESTAMP, 554 TokenType.TIMESTAMPTZ, 555 TokenType.TIMESTAMPLTZ, 556 *TIMES, 557 } 558 559 SET_OPERATIONS = { 560 TokenType.UNION, 561 TokenType.INTERSECT, 562 TokenType.EXCEPT, 563 } 564 565 JOIN_METHODS = { 566 TokenType.ASOF, 567 TokenType.NATURAL, 568 TokenType.POSITIONAL, 569 } 570 571 JOIN_SIDES = { 572 TokenType.LEFT, 573 TokenType.RIGHT, 574 TokenType.FULL, 575 } 576 577 JOIN_KINDS = { 578 TokenType.INNER, 579 TokenType.OUTER, 580 TokenType.CROSS, 581 TokenType.SEMI, 582 TokenType.ANTI, 583 } 584 585 JOIN_HINTS: t.Set[str] = set() 586 587 LAMBDAS = { 588 TokenType.ARROW: lambda self, expressions: self.expression( 589 exp.Lambda, 590 this=self._replace_lambda( 591 self._parse_conjunction(), 592 expressions, 593 ), 594 expressions=expressions, 595 ), 596 TokenType.FARROW: lambda self, expressions: self.expression( 597 exp.Kwarg, 598 this=exp.var(expressions[0].name), 599 expression=self._parse_conjunction(), 600 ), 601 } 602 603 COLUMN_OPERATORS = { 604 TokenType.DOT: None, 605 TokenType.DCOLON: lambda self, this, to: self.expression( 606 exp.Cast if self.STRICT_CAST else exp.TryCast, 607 this=this, 608 to=to, 609 ), 610 TokenType.ARROW: lambda self, this, path: self.expression( 611 exp.JSONExtract, 612 this=this, 613 expression=self.dialect.to_json_path(path), 614 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 615 ), 616 TokenType.DARROW: lambda self, this, path: self.expression( 617 exp.JSONExtractScalar, 618 this=this, 619 expression=self.dialect.to_json_path(path), 620 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 621 ), 622 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 623 exp.JSONBExtract, 624 this=this, 625 expression=path, 626 ), 627 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 628 exp.JSONBExtractScalar, 629 this=this, 630 expression=path, 631 ), 632 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 633 exp.JSONBContains, 634 this=this, 635 expression=key, 636 ), 637 } 638 639 EXPRESSION_PARSERS = { 640 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 641 exp.Column: lambda self: self._parse_column(), 642 exp.Condition: lambda self: self._parse_conjunction(), 643 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 644 exp.Expression: lambda self: self._parse_expression(), 645 exp.From: lambda self: self._parse_from(joins=True), 646 exp.Group: lambda self: self._parse_group(), 647 exp.Having: lambda self: self._parse_having(), 648 exp.Identifier: lambda self: self._parse_id_var(), 649 exp.Join: lambda self: self._parse_join(), 650 exp.Lambda: lambda self: self._parse_lambda(), 651 exp.Lateral: lambda self: self._parse_lateral(), 652 exp.Limit: lambda self: self._parse_limit(), 653 exp.Offset: lambda self: self._parse_offset(), 654 exp.Order: lambda self: self._parse_order(), 655 exp.Ordered: lambda self: self._parse_ordered(), 656 exp.Properties: lambda self: self._parse_properties(), 657 exp.Qualify: lambda self: self._parse_qualify(), 658 exp.Returning: lambda self: self._parse_returning(), 659 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 660 exp.Table: lambda self: self._parse_table_parts(), 661 exp.TableAlias: lambda self: self._parse_table_alias(), 662 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 663 exp.Where: lambda self: self._parse_where(), 664 exp.Window: lambda self: self._parse_named_window(), 665 exp.With: lambda self: self._parse_with(), 666 "JOIN_TYPE": lambda self: self._parse_join_parts(), 667 } 668 669 STATEMENT_PARSERS = { 670 TokenType.ALTER: lambda self: self._parse_alter(), 671 TokenType.BEGIN: lambda self: self._parse_transaction(), 672 TokenType.CACHE: lambda self: self._parse_cache(), 673 TokenType.COMMENT: lambda self: self._parse_comment(), 674 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 675 TokenType.COPY: lambda self: self._parse_copy(), 676 TokenType.CREATE: lambda self: self._parse_create(), 677 TokenType.DELETE: lambda self: self._parse_delete(), 678 TokenType.DESC: lambda self: self._parse_describe(), 679 TokenType.DESCRIBE: lambda self: self._parse_describe(), 680 TokenType.DROP: lambda self: self._parse_drop(), 681 TokenType.INSERT: lambda self: self._parse_insert(), 682 TokenType.KILL: lambda self: self._parse_kill(), 683 TokenType.LOAD: lambda self: self._parse_load(), 684 TokenType.MERGE: lambda self: self._parse_merge(), 685 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 686 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 687 TokenType.REFRESH: lambda self: self._parse_refresh(), 688 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 689 TokenType.SET: lambda self: self._parse_set(), 690 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 691 TokenType.UNCACHE: lambda self: self._parse_uncache(), 692 TokenType.UPDATE: lambda self: self._parse_update(), 693 TokenType.USE: lambda self: self.expression( 694 exp.Use, 695 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 696 this=self._parse_table(schema=False), 697 ), 698 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 699 } 700 701 UNARY_PARSERS = { 702 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 703 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 704 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 705 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 706 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 707 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 708 } 709 710 STRING_PARSERS = { 711 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 712 exp.RawString, this=token.text 713 ), 714 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 715 exp.National, this=token.text 716 ), 717 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 718 TokenType.STRING: lambda self, token: self.expression( 719 exp.Literal, this=token.text, is_string=True 720 ), 721 TokenType.UNICODE_STRING: lambda self, token: self.expression( 722 exp.UnicodeString, 723 this=token.text, 724 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 725 ), 726 } 727 728 NUMERIC_PARSERS = { 729 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 730 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 731 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 732 TokenType.NUMBER: lambda self, token: self.expression( 733 exp.Literal, this=token.text, is_string=False 734 ), 735 } 736 737 PRIMARY_PARSERS = { 738 **STRING_PARSERS, 739 **NUMERIC_PARSERS, 740 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 741 TokenType.NULL: lambda self, _: self.expression(exp.Null), 742 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 743 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 744 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 745 TokenType.STAR: lambda self, _: self.expression( 746 exp.Star, 747 **{ 748 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 749 "replace": self._parse_star_op("REPLACE"), 750 "rename": self._parse_star_op("RENAME"), 751 }, 752 ), 753 } 754 755 PLACEHOLDER_PARSERS = { 756 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 757 TokenType.PARAMETER: lambda self: self._parse_parameter(), 758 TokenType.COLON: lambda self: ( 759 self.expression(exp.Placeholder, this=self._prev.text) 760 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 761 else None 762 ), 763 } 764 765 RANGE_PARSERS = { 766 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 767 TokenType.GLOB: binary_range_parser(exp.Glob), 768 TokenType.ILIKE: binary_range_parser(exp.ILike), 769 TokenType.IN: lambda self, this: self._parse_in(this), 770 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 771 TokenType.IS: lambda self, this: self._parse_is(this), 772 TokenType.LIKE: binary_range_parser(exp.Like), 773 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 774 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 775 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 776 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 777 } 778 779 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 780 "ALLOWED_VALUES": lambda self: self.expression( 781 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 782 ), 783 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 784 "AUTO": lambda self: self._parse_auto_property(), 785 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 786 "BACKUP": lambda self: self.expression( 787 exp.BackupProperty, this=self._parse_var(any_token=True) 788 ), 789 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 790 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 791 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 792 "CHECKSUM": lambda self: self._parse_checksum(), 793 "CLUSTER BY": lambda self: self._parse_cluster(), 794 "CLUSTERED": lambda self: self._parse_clustered_by(), 795 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 796 exp.CollateProperty, **kwargs 797 ), 798 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 799 "CONTAINS": lambda self: self._parse_contains_property(), 800 "COPY": lambda self: self._parse_copy_property(), 801 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 802 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 803 "DEFINER": lambda self: self._parse_definer(), 804 "DETERMINISTIC": lambda self: self.expression( 805 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 806 ), 807 "DISTKEY": lambda self: self._parse_distkey(), 808 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 809 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 810 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 811 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 812 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 813 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 814 "FREESPACE": lambda self: self._parse_freespace(), 815 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 816 "HEAP": lambda self: self.expression(exp.HeapProperty), 817 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 818 "IMMUTABLE": lambda self: self.expression( 819 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 820 ), 821 "INHERITS": lambda self: self.expression( 822 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 823 ), 824 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 825 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 826 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 827 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 828 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 829 "LIKE": lambda self: self._parse_create_like(), 830 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 831 "LOCK": lambda self: self._parse_locking(), 832 "LOCKING": lambda self: self._parse_locking(), 833 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 834 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 835 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 836 "MODIFIES": lambda self: self._parse_modifies_property(), 837 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 838 "NO": lambda self: self._parse_no_property(), 839 "ON": lambda self: self._parse_on_property(), 840 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 841 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 842 "PARTITION": lambda self: self._parse_partitioned_of(), 843 "PARTITION BY": lambda self: self._parse_partitioned_by(), 844 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 845 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 846 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 847 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 848 "READS": lambda self: self._parse_reads_property(), 849 "REMOTE": lambda self: self._parse_remote_with_connection(), 850 "RETURNS": lambda self: self._parse_returns(), 851 "STRICT": lambda self: self.expression(exp.StrictProperty), 852 "ROW": lambda self: self._parse_row(), 853 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 854 "SAMPLE": lambda self: self.expression( 855 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 856 ), 857 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 858 "SETTINGS": lambda self: self.expression( 859 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 860 ), 861 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 862 "SORTKEY": lambda self: self._parse_sortkey(), 863 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 864 "STABLE": lambda self: self.expression( 865 exp.StabilityProperty, this=exp.Literal.string("STABLE") 866 ), 867 "STORED": lambda self: self._parse_stored(), 868 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 869 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 870 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 871 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 872 "TO": lambda self: self._parse_to_table(), 873 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 874 "TRANSFORM": lambda self: self.expression( 875 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 876 ), 877 "TTL": lambda self: self._parse_ttl(), 878 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 879 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 880 "VOLATILE": lambda self: self._parse_volatile_property(), 881 "WITH": lambda self: self._parse_with_property(), 882 } 883 884 CONSTRAINT_PARSERS = { 885 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 886 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 887 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 888 "CHARACTER SET": lambda self: self.expression( 889 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 890 ), 891 "CHECK": lambda self: self.expression( 892 exp.CheckColumnConstraint, 893 this=self._parse_wrapped(self._parse_conjunction), 894 enforced=self._match_text_seq("ENFORCED"), 895 ), 896 "COLLATE": lambda self: self.expression( 897 exp.CollateColumnConstraint, this=self._parse_var() 898 ), 899 "COMMENT": lambda self: self.expression( 900 exp.CommentColumnConstraint, this=self._parse_string() 901 ), 902 "COMPRESS": lambda self: self._parse_compress(), 903 "CLUSTERED": lambda self: self.expression( 904 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 905 ), 906 "NONCLUSTERED": lambda self: self.expression( 907 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 908 ), 909 "DEFAULT": lambda self: self.expression( 910 exp.DefaultColumnConstraint, this=self._parse_bitwise() 911 ), 912 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 913 "EPHEMERAL": lambda self: self.expression( 914 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 915 ), 916 "EXCLUDE": lambda self: self.expression( 917 exp.ExcludeColumnConstraint, this=self._parse_index_params() 918 ), 919 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 920 "FORMAT": lambda self: self.expression( 921 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 922 ), 923 "GENERATED": lambda self: self._parse_generated_as_identity(), 924 "IDENTITY": lambda self: self._parse_auto_increment(), 925 "INLINE": lambda self: self._parse_inline(), 926 "LIKE": lambda self: self._parse_create_like(), 927 "NOT": lambda self: self._parse_not_constraint(), 928 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 929 "ON": lambda self: ( 930 self._match(TokenType.UPDATE) 931 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 932 ) 933 or self.expression(exp.OnProperty, this=self._parse_id_var()), 934 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 935 "PERIOD": lambda self: self._parse_period_for_system_time(), 936 "PRIMARY KEY": lambda self: self._parse_primary_key(), 937 "REFERENCES": lambda self: self._parse_references(match=False), 938 "TITLE": lambda self: self.expression( 939 exp.TitleColumnConstraint, this=self._parse_var_or_string() 940 ), 941 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 942 "UNIQUE": lambda self: self._parse_unique(), 943 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 944 "WITH": lambda self: self.expression( 945 exp.Properties, expressions=self._parse_wrapped_properties() 946 ), 947 } 948 949 ALTER_PARSERS = { 950 "ADD": lambda self: self._parse_alter_table_add(), 951 "ALTER": lambda self: self._parse_alter_table_alter(), 952 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 953 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 954 "DROP": lambda self: self._parse_alter_table_drop(), 955 "RENAME": lambda self: self._parse_alter_table_rename(), 956 "SET": lambda self: self._parse_alter_table_set(), 957 } 958 959 ALTER_ALTER_PARSERS = { 960 "DISTKEY": lambda self: self._parse_alter_diststyle(), 961 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 962 "SORTKEY": lambda self: self._parse_alter_sortkey(), 963 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 964 } 965 966 SCHEMA_UNNAMED_CONSTRAINTS = { 967 "CHECK", 968 "EXCLUDE", 969 "FOREIGN KEY", 970 "LIKE", 971 "PERIOD", 972 "PRIMARY KEY", 973 "UNIQUE", 974 } 975 976 NO_PAREN_FUNCTION_PARSERS = { 977 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 978 "CASE": lambda self: self._parse_case(), 979 "IF": lambda self: self._parse_if(), 980 "NEXT": lambda self: self._parse_next_value_for(), 981 } 982 983 INVALID_FUNC_NAME_TOKENS = { 984 TokenType.IDENTIFIER, 985 TokenType.STRING, 986 } 987 988 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 989 990 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 991 992 FUNCTION_PARSERS = { 993 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 994 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 995 "DECODE": lambda self: self._parse_decode(), 996 "EXTRACT": lambda self: self._parse_extract(), 997 "JSON_OBJECT": lambda self: self._parse_json_object(), 998 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 999 "JSON_TABLE": lambda self: self._parse_json_table(), 1000 "MATCH": lambda self: self._parse_match_against(), 1001 "OPENJSON": lambda self: self._parse_open_json(), 1002 "POSITION": lambda self: self._parse_position(), 1003 "PREDICT": lambda self: self._parse_predict(), 1004 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1005 "STRING_AGG": lambda self: self._parse_string_agg(), 1006 "SUBSTRING": lambda self: self._parse_substring(), 1007 "TRIM": lambda self: self._parse_trim(), 1008 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1009 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1010 } 1011 1012 QUERY_MODIFIER_PARSERS = { 1013 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1014 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1015 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1016 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1017 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1018 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1019 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1020 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1021 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1022 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1023 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1024 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1025 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1026 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1027 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1028 TokenType.CLUSTER_BY: lambda self: ( 1029 "cluster", 1030 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1031 ), 1032 TokenType.DISTRIBUTE_BY: lambda self: ( 1033 "distribute", 1034 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1035 ), 1036 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1037 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1038 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1039 } 1040 1041 SET_PARSERS = { 1042 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1043 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1044 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1045 "TRANSACTION": lambda self: self._parse_set_transaction(), 1046 } 1047 1048 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1049 1050 TYPE_LITERAL_PARSERS = { 1051 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1052 } 1053 1054 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1055 1056 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1057 1058 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1059 1060 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1061 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1062 "ISOLATION": ( 1063 ("LEVEL", "REPEATABLE", "READ"), 1064 ("LEVEL", "READ", "COMMITTED"), 1065 ("LEVEL", "READ", "UNCOMITTED"), 1066 ("LEVEL", "SERIALIZABLE"), 1067 ), 1068 "READ": ("WRITE", "ONLY"), 1069 } 1070 1071 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1072 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1073 ) 1074 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1075 1076 CREATE_SEQUENCE: OPTIONS_TYPE = { 1077 "SCALE": ("EXTEND", "NOEXTEND"), 1078 "SHARD": ("EXTEND", "NOEXTEND"), 1079 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1080 **dict.fromkeys( 1081 ( 1082 "SESSION", 1083 "GLOBAL", 1084 "KEEP", 1085 "NOKEEP", 1086 "ORDER", 1087 "NOORDER", 1088 "NOCACHE", 1089 "CYCLE", 1090 "NOCYCLE", 1091 "NOMINVALUE", 1092 "NOMAXVALUE", 1093 "NOSCALE", 1094 "NOSHARD", 1095 ), 1096 tuple(), 1097 ), 1098 } 1099 1100 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1101 1102 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1103 1104 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1105 1106 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1107 1108 CLONE_KEYWORDS = {"CLONE", "COPY"} 1109 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1110 1111 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1112 1113 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1114 1115 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1116 1117 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1118 1119 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1120 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1121 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1122 1123 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1124 1125 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1126 1127 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1128 1129 DISTINCT_TOKENS = {TokenType.DISTINCT} 1130 1131 NULL_TOKENS = {TokenType.NULL} 1132 1133 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1134 1135 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1136 1137 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1138 1139 STRICT_CAST = True 1140 1141 PREFIXED_PIVOT_COLUMNS = False 1142 IDENTIFY_PIVOT_STRINGS = False 1143 1144 LOG_DEFAULTS_TO_LN = False 1145 1146 # Whether ADD is present for each column added by ALTER TABLE 1147 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1148 1149 # Whether the table sample clause expects CSV syntax 1150 TABLESAMPLE_CSV = False 1151 1152 # The default method used for table sampling 1153 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1154 1155 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1156 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1157 1158 # Whether the TRIM function expects the characters to trim as its first argument 1159 TRIM_PATTERN_FIRST = False 1160 1161 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1162 STRING_ALIASES = False 1163 1164 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1165 MODIFIERS_ATTACHED_TO_UNION = True 1166 UNION_MODIFIERS = {"order", "limit", "offset"} 1167 1168 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1169 NO_PAREN_IF_COMMANDS = True 1170 1171 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1172 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1173 1174 # Whether the `:` operator is used to extract a value from a JSON document 1175 COLON_IS_JSON_EXTRACT = False 1176 1177 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1178 # If this is True and '(' is not found, the keyword will be treated as an identifier 1179 VALUES_FOLLOWED_BY_PAREN = True 1180 1181 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1182 SUPPORTS_IMPLICIT_UNNEST = False 1183 1184 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1185 INTERVAL_SPANS = True 1186 1187 # Whether a PARTITION clause can follow a table reference 1188 SUPPORTS_PARTITION_SELECTION = False 1189 1190 __slots__ = ( 1191 "error_level", 1192 "error_message_context", 1193 "max_errors", 1194 "dialect", 1195 "sql", 1196 "errors", 1197 "_tokens", 1198 "_index", 1199 "_curr", 1200 "_next", 1201 "_prev", 1202 "_prev_comments", 1203 ) 1204 1205 # Autofilled 1206 SHOW_TRIE: t.Dict = {} 1207 SET_TRIE: t.Dict = {} 1208 1209 def __init__( 1210 self, 1211 error_level: t.Optional[ErrorLevel] = None, 1212 error_message_context: int = 100, 1213 max_errors: int = 3, 1214 dialect: DialectType = None, 1215 ): 1216 from sqlglot.dialects import Dialect 1217 1218 self.error_level = error_level or ErrorLevel.IMMEDIATE 1219 self.error_message_context = error_message_context 1220 self.max_errors = max_errors 1221 self.dialect = Dialect.get_or_raise(dialect) 1222 self.reset() 1223 1224 def reset(self): 1225 self.sql = "" 1226 self.errors = [] 1227 self._tokens = [] 1228 self._index = 0 1229 self._curr = None 1230 self._next = None 1231 self._prev = None 1232 self._prev_comments = None 1233 1234 def parse( 1235 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1236 ) -> t.List[t.Optional[exp.Expression]]: 1237 """ 1238 Parses a list of tokens and returns a list of syntax trees, one tree 1239 per parsed SQL statement. 1240 1241 Args: 1242 raw_tokens: The list of tokens. 1243 sql: The original SQL string, used to produce helpful debug messages. 1244 1245 Returns: 1246 The list of the produced syntax trees. 1247 """ 1248 return self._parse( 1249 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1250 ) 1251 1252 def parse_into( 1253 self, 1254 expression_types: exp.IntoType, 1255 raw_tokens: t.List[Token], 1256 sql: t.Optional[str] = None, 1257 ) -> t.List[t.Optional[exp.Expression]]: 1258 """ 1259 Parses a list of tokens into a given Expression type. If a collection of Expression 1260 types is given instead, this method will try to parse the token list into each one 1261 of them, stopping at the first for which the parsing succeeds. 1262 1263 Args: 1264 expression_types: The expression type(s) to try and parse the token list into. 1265 raw_tokens: The list of tokens. 1266 sql: The original SQL string, used to produce helpful debug messages. 1267 1268 Returns: 1269 The target Expression. 1270 """ 1271 errors = [] 1272 for expression_type in ensure_list(expression_types): 1273 parser = self.EXPRESSION_PARSERS.get(expression_type) 1274 if not parser: 1275 raise TypeError(f"No parser registered for {expression_type}") 1276 1277 try: 1278 return self._parse(parser, raw_tokens, sql) 1279 except ParseError as e: 1280 e.errors[0]["into_expression"] = expression_type 1281 errors.append(e) 1282 1283 raise ParseError( 1284 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1285 errors=merge_errors(errors), 1286 ) from errors[-1] 1287 1288 def _parse( 1289 self, 1290 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1291 raw_tokens: t.List[Token], 1292 sql: t.Optional[str] = None, 1293 ) -> t.List[t.Optional[exp.Expression]]: 1294 self.reset() 1295 self.sql = sql or "" 1296 1297 total = len(raw_tokens) 1298 chunks: t.List[t.List[Token]] = [[]] 1299 1300 for i, token in enumerate(raw_tokens): 1301 if token.token_type == TokenType.SEMICOLON: 1302 if token.comments: 1303 chunks.append([token]) 1304 1305 if i < total - 1: 1306 chunks.append([]) 1307 else: 1308 chunks[-1].append(token) 1309 1310 expressions = [] 1311 1312 for tokens in chunks: 1313 self._index = -1 1314 self._tokens = tokens 1315 self._advance() 1316 1317 expressions.append(parse_method(self)) 1318 1319 if self._index < len(self._tokens): 1320 self.raise_error("Invalid expression / Unexpected token") 1321 1322 self.check_errors() 1323 1324 return expressions 1325 1326 def check_errors(self) -> None: 1327 """Logs or raises any found errors, depending on the chosen error level setting.""" 1328 if self.error_level == ErrorLevel.WARN: 1329 for error in self.errors: 1330 logger.error(str(error)) 1331 elif self.error_level == ErrorLevel.RAISE and self.errors: 1332 raise ParseError( 1333 concat_messages(self.errors, self.max_errors), 1334 errors=merge_errors(self.errors), 1335 ) 1336 1337 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1338 """ 1339 Appends an error in the list of recorded errors or raises it, depending on the chosen 1340 error level setting. 1341 """ 1342 token = token or self._curr or self._prev or Token.string("") 1343 start = token.start 1344 end = token.end + 1 1345 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1346 highlight = self.sql[start:end] 1347 end_context = self.sql[end : end + self.error_message_context] 1348 1349 error = ParseError.new( 1350 f"{message}. Line {token.line}, Col: {token.col}.\n" 1351 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1352 description=message, 1353 line=token.line, 1354 col=token.col, 1355 start_context=start_context, 1356 highlight=highlight, 1357 end_context=end_context, 1358 ) 1359 1360 if self.error_level == ErrorLevel.IMMEDIATE: 1361 raise error 1362 1363 self.errors.append(error) 1364 1365 def expression( 1366 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1367 ) -> E: 1368 """ 1369 Creates a new, validated Expression. 1370 1371 Args: 1372 exp_class: The expression class to instantiate. 1373 comments: An optional list of comments to attach to the expression. 1374 kwargs: The arguments to set for the expression along with their respective values. 1375 1376 Returns: 1377 The target expression. 1378 """ 1379 instance = exp_class(**kwargs) 1380 instance.add_comments(comments) if comments else self._add_comments(instance) 1381 return self.validate_expression(instance) 1382 1383 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1384 if expression and self._prev_comments: 1385 expression.add_comments(self._prev_comments) 1386 self._prev_comments = None 1387 1388 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1389 """ 1390 Validates an Expression, making sure that all its mandatory arguments are set. 1391 1392 Args: 1393 expression: The expression to validate. 1394 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1395 1396 Returns: 1397 The validated expression. 1398 """ 1399 if self.error_level != ErrorLevel.IGNORE: 1400 for error_message in expression.error_messages(args): 1401 self.raise_error(error_message) 1402 1403 return expression 1404 1405 def _find_sql(self, start: Token, end: Token) -> str: 1406 return self.sql[start.start : end.end + 1] 1407 1408 def _is_connected(self) -> bool: 1409 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1410 1411 def _advance(self, times: int = 1) -> None: 1412 self._index += times 1413 self._curr = seq_get(self._tokens, self._index) 1414 self._next = seq_get(self._tokens, self._index + 1) 1415 1416 if self._index > 0: 1417 self._prev = self._tokens[self._index - 1] 1418 self._prev_comments = self._prev.comments 1419 else: 1420 self._prev = None 1421 self._prev_comments = None 1422 1423 def _retreat(self, index: int) -> None: 1424 if index != self._index: 1425 self._advance(index - self._index) 1426 1427 def _warn_unsupported(self) -> None: 1428 if len(self._tokens) <= 1: 1429 return 1430 1431 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1432 # interested in emitting a warning for the one being currently processed. 1433 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1434 1435 logger.warning( 1436 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1437 ) 1438 1439 def _parse_command(self) -> exp.Command: 1440 self._warn_unsupported() 1441 return self.expression( 1442 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1443 ) 1444 1445 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1446 """ 1447 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1448 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1449 the parser state accordingly 1450 """ 1451 index = self._index 1452 error_level = self.error_level 1453 1454 self.error_level = ErrorLevel.IMMEDIATE 1455 try: 1456 this = parse_method() 1457 except ParseError: 1458 this = None 1459 finally: 1460 if not this or retreat: 1461 self._retreat(index) 1462 self.error_level = error_level 1463 1464 return this 1465 1466 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1467 start = self._prev 1468 exists = self._parse_exists() if allow_exists else None 1469 1470 self._match(TokenType.ON) 1471 1472 materialized = self._match_text_seq("MATERIALIZED") 1473 kind = self._match_set(self.CREATABLES) and self._prev 1474 if not kind: 1475 return self._parse_as_command(start) 1476 1477 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1478 this = self._parse_user_defined_function(kind=kind.token_type) 1479 elif kind.token_type == TokenType.TABLE: 1480 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1481 elif kind.token_type == TokenType.COLUMN: 1482 this = self._parse_column() 1483 else: 1484 this = self._parse_id_var() 1485 1486 self._match(TokenType.IS) 1487 1488 return self.expression( 1489 exp.Comment, 1490 this=this, 1491 kind=kind.text, 1492 expression=self._parse_string(), 1493 exists=exists, 1494 materialized=materialized, 1495 ) 1496 1497 def _parse_to_table( 1498 self, 1499 ) -> exp.ToTableProperty: 1500 table = self._parse_table_parts(schema=True) 1501 return self.expression(exp.ToTableProperty, this=table) 1502 1503 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1504 def _parse_ttl(self) -> exp.Expression: 1505 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1506 this = self._parse_bitwise() 1507 1508 if self._match_text_seq("DELETE"): 1509 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1510 if self._match_text_seq("RECOMPRESS"): 1511 return self.expression( 1512 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1513 ) 1514 if self._match_text_seq("TO", "DISK"): 1515 return self.expression( 1516 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1517 ) 1518 if self._match_text_seq("TO", "VOLUME"): 1519 return self.expression( 1520 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1521 ) 1522 1523 return this 1524 1525 expressions = self._parse_csv(_parse_ttl_action) 1526 where = self._parse_where() 1527 group = self._parse_group() 1528 1529 aggregates = None 1530 if group and self._match(TokenType.SET): 1531 aggregates = self._parse_csv(self._parse_set_item) 1532 1533 return self.expression( 1534 exp.MergeTreeTTL, 1535 expressions=expressions, 1536 where=where, 1537 group=group, 1538 aggregates=aggregates, 1539 ) 1540 1541 def _parse_statement(self) -> t.Optional[exp.Expression]: 1542 if self._curr is None: 1543 return None 1544 1545 if self._match_set(self.STATEMENT_PARSERS): 1546 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1547 1548 if self._match_set(self.dialect.tokenizer.COMMANDS): 1549 return self._parse_command() 1550 1551 expression = self._parse_expression() 1552 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1553 return self._parse_query_modifiers(expression) 1554 1555 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1556 start = self._prev 1557 temporary = self._match(TokenType.TEMPORARY) 1558 materialized = self._match_text_seq("MATERIALIZED") 1559 1560 kind = self._match_set(self.CREATABLES) and self._prev.text 1561 if not kind: 1562 return self._parse_as_command(start) 1563 1564 if_exists = exists or self._parse_exists() 1565 table = self._parse_table_parts( 1566 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1567 ) 1568 1569 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1570 1571 if self._match(TokenType.L_PAREN, advance=False): 1572 expressions = self._parse_wrapped_csv(self._parse_types) 1573 else: 1574 expressions = None 1575 1576 return self.expression( 1577 exp.Drop, 1578 comments=start.comments, 1579 exists=if_exists, 1580 this=table, 1581 expressions=expressions, 1582 kind=kind.upper(), 1583 temporary=temporary, 1584 materialized=materialized, 1585 cascade=self._match_text_seq("CASCADE"), 1586 constraints=self._match_text_seq("CONSTRAINTS"), 1587 purge=self._match_text_seq("PURGE"), 1588 cluster=cluster, 1589 ) 1590 1591 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1592 return ( 1593 self._match_text_seq("IF") 1594 and (not not_ or self._match(TokenType.NOT)) 1595 and self._match(TokenType.EXISTS) 1596 ) 1597 1598 def _parse_create(self) -> exp.Create | exp.Command: 1599 # Note: this can't be None because we've matched a statement parser 1600 start = self._prev 1601 comments = self._prev_comments 1602 1603 replace = ( 1604 start.token_type == TokenType.REPLACE 1605 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1606 or self._match_pair(TokenType.OR, TokenType.ALTER) 1607 ) 1608 1609 unique = self._match(TokenType.UNIQUE) 1610 1611 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1612 self._advance() 1613 1614 properties = None 1615 create_token = self._match_set(self.CREATABLES) and self._prev 1616 1617 if not create_token: 1618 # exp.Properties.Location.POST_CREATE 1619 properties = self._parse_properties() 1620 create_token = self._match_set(self.CREATABLES) and self._prev 1621 1622 if not properties or not create_token: 1623 return self._parse_as_command(start) 1624 1625 exists = self._parse_exists(not_=True) 1626 this = None 1627 expression: t.Optional[exp.Expression] = None 1628 indexes = None 1629 no_schema_binding = None 1630 begin = None 1631 end = None 1632 clone = None 1633 1634 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1635 nonlocal properties 1636 if properties and temp_props: 1637 properties.expressions.extend(temp_props.expressions) 1638 elif temp_props: 1639 properties = temp_props 1640 1641 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1642 this = self._parse_user_defined_function(kind=create_token.token_type) 1643 1644 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1645 extend_props(self._parse_properties()) 1646 1647 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1648 extend_props(self._parse_properties()) 1649 1650 if not expression: 1651 if self._match(TokenType.COMMAND): 1652 expression = self._parse_as_command(self._prev) 1653 else: 1654 begin = self._match(TokenType.BEGIN) 1655 return_ = self._match_text_seq("RETURN") 1656 1657 if self._match(TokenType.STRING, advance=False): 1658 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1659 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1660 expression = self._parse_string() 1661 extend_props(self._parse_properties()) 1662 else: 1663 expression = self._parse_statement() 1664 1665 end = self._match_text_seq("END") 1666 1667 if return_: 1668 expression = self.expression(exp.Return, this=expression) 1669 elif create_token.token_type == TokenType.INDEX: 1670 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1671 if not self._match(TokenType.ON): 1672 index = self._parse_id_var() 1673 anonymous = False 1674 else: 1675 index = None 1676 anonymous = True 1677 1678 this = self._parse_index(index=index, anonymous=anonymous) 1679 elif create_token.token_type in self.DB_CREATABLES: 1680 table_parts = self._parse_table_parts( 1681 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1682 ) 1683 1684 # exp.Properties.Location.POST_NAME 1685 self._match(TokenType.COMMA) 1686 extend_props(self._parse_properties(before=True)) 1687 1688 this = self._parse_schema(this=table_parts) 1689 1690 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1691 extend_props(self._parse_properties()) 1692 1693 self._match(TokenType.ALIAS) 1694 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1695 # exp.Properties.Location.POST_ALIAS 1696 extend_props(self._parse_properties()) 1697 1698 if create_token.token_type == TokenType.SEQUENCE: 1699 expression = self._parse_types() 1700 extend_props(self._parse_properties()) 1701 else: 1702 expression = self._parse_ddl_select() 1703 1704 if create_token.token_type == TokenType.TABLE: 1705 # exp.Properties.Location.POST_EXPRESSION 1706 extend_props(self._parse_properties()) 1707 1708 indexes = [] 1709 while True: 1710 index = self._parse_index() 1711 1712 # exp.Properties.Location.POST_INDEX 1713 extend_props(self._parse_properties()) 1714 1715 if not index: 1716 break 1717 else: 1718 self._match(TokenType.COMMA) 1719 indexes.append(index) 1720 elif create_token.token_type == TokenType.VIEW: 1721 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1722 no_schema_binding = True 1723 1724 shallow = self._match_text_seq("SHALLOW") 1725 1726 if self._match_texts(self.CLONE_KEYWORDS): 1727 copy = self._prev.text.lower() == "copy" 1728 clone = self.expression( 1729 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1730 ) 1731 1732 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1733 return self._parse_as_command(start) 1734 1735 return self.expression( 1736 exp.Create, 1737 comments=comments, 1738 this=this, 1739 kind=create_token.text.upper(), 1740 replace=replace, 1741 unique=unique, 1742 expression=expression, 1743 exists=exists, 1744 properties=properties, 1745 indexes=indexes, 1746 no_schema_binding=no_schema_binding, 1747 begin=begin, 1748 end=end, 1749 clone=clone, 1750 ) 1751 1752 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1753 seq = exp.SequenceProperties() 1754 1755 options = [] 1756 index = self._index 1757 1758 while self._curr: 1759 self._match(TokenType.COMMA) 1760 if self._match_text_seq("INCREMENT"): 1761 self._match_text_seq("BY") 1762 self._match_text_seq("=") 1763 seq.set("increment", self._parse_term()) 1764 elif self._match_text_seq("MINVALUE"): 1765 seq.set("minvalue", self._parse_term()) 1766 elif self._match_text_seq("MAXVALUE"): 1767 seq.set("maxvalue", self._parse_term()) 1768 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1769 self._match_text_seq("=") 1770 seq.set("start", self._parse_term()) 1771 elif self._match_text_seq("CACHE"): 1772 # T-SQL allows empty CACHE which is initialized dynamically 1773 seq.set("cache", self._parse_number() or True) 1774 elif self._match_text_seq("OWNED", "BY"): 1775 # "OWNED BY NONE" is the default 1776 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1777 else: 1778 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1779 if opt: 1780 options.append(opt) 1781 else: 1782 break 1783 1784 seq.set("options", options if options else None) 1785 return None if self._index == index else seq 1786 1787 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1788 # only used for teradata currently 1789 self._match(TokenType.COMMA) 1790 1791 kwargs = { 1792 "no": self._match_text_seq("NO"), 1793 "dual": self._match_text_seq("DUAL"), 1794 "before": self._match_text_seq("BEFORE"), 1795 "default": self._match_text_seq("DEFAULT"), 1796 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1797 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1798 "after": self._match_text_seq("AFTER"), 1799 "minimum": self._match_texts(("MIN", "MINIMUM")), 1800 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1801 } 1802 1803 if self._match_texts(self.PROPERTY_PARSERS): 1804 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1805 try: 1806 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1807 except TypeError: 1808 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1809 1810 return None 1811 1812 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1813 return self._parse_wrapped_csv(self._parse_property) 1814 1815 def _parse_property(self) -> t.Optional[exp.Expression]: 1816 if self._match_texts(self.PROPERTY_PARSERS): 1817 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1818 1819 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1820 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1821 1822 if self._match_text_seq("COMPOUND", "SORTKEY"): 1823 return self._parse_sortkey(compound=True) 1824 1825 if self._match_text_seq("SQL", "SECURITY"): 1826 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1827 1828 index = self._index 1829 key = self._parse_column() 1830 1831 if not self._match(TokenType.EQ): 1832 self._retreat(index) 1833 return self._parse_sequence_properties() 1834 1835 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1836 if isinstance(key, exp.Column): 1837 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1838 1839 value = self._parse_bitwise() or self._parse_var(any_token=True) 1840 1841 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1842 if isinstance(value, exp.Column): 1843 value = exp.var(value.name) 1844 1845 return self.expression(exp.Property, this=key, value=value) 1846 1847 def _parse_stored(self) -> exp.FileFormatProperty: 1848 self._match(TokenType.ALIAS) 1849 1850 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1851 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1852 1853 return self.expression( 1854 exp.FileFormatProperty, 1855 this=( 1856 self.expression( 1857 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1858 ) 1859 if input_format or output_format 1860 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1861 ), 1862 ) 1863 1864 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1865 field = self._parse_field() 1866 if isinstance(field, exp.Identifier) and not field.quoted: 1867 field = exp.var(field) 1868 1869 return field 1870 1871 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1872 self._match(TokenType.EQ) 1873 self._match(TokenType.ALIAS) 1874 1875 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1876 1877 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1878 properties = [] 1879 while True: 1880 if before: 1881 prop = self._parse_property_before() 1882 else: 1883 prop = self._parse_property() 1884 if not prop: 1885 break 1886 for p in ensure_list(prop): 1887 properties.append(p) 1888 1889 if properties: 1890 return self.expression(exp.Properties, expressions=properties) 1891 1892 return None 1893 1894 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1895 return self.expression( 1896 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1897 ) 1898 1899 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1900 if self._index >= 2: 1901 pre_volatile_token = self._tokens[self._index - 2] 1902 else: 1903 pre_volatile_token = None 1904 1905 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1906 return exp.VolatileProperty() 1907 1908 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1909 1910 def _parse_retention_period(self) -> exp.Var: 1911 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1912 number = self._parse_number() 1913 number_str = f"{number} " if number else "" 1914 unit = self._parse_var(any_token=True) 1915 return exp.var(f"{number_str}{unit}") 1916 1917 def _parse_system_versioning_property( 1918 self, with_: bool = False 1919 ) -> exp.WithSystemVersioningProperty: 1920 self._match(TokenType.EQ) 1921 prop = self.expression( 1922 exp.WithSystemVersioningProperty, 1923 **{ # type: ignore 1924 "on": True, 1925 "with": with_, 1926 }, 1927 ) 1928 1929 if self._match_text_seq("OFF"): 1930 prop.set("on", False) 1931 return prop 1932 1933 self._match(TokenType.ON) 1934 if self._match(TokenType.L_PAREN): 1935 while self._curr and not self._match(TokenType.R_PAREN): 1936 if self._match_text_seq("HISTORY_TABLE", "="): 1937 prop.set("this", self._parse_table_parts()) 1938 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1939 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1940 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1941 prop.set("retention_period", self._parse_retention_period()) 1942 1943 self._match(TokenType.COMMA) 1944 1945 return prop 1946 1947 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1948 self._match(TokenType.EQ) 1949 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1950 prop = self.expression(exp.DataDeletionProperty, on=on) 1951 1952 if self._match(TokenType.L_PAREN): 1953 while self._curr and not self._match(TokenType.R_PAREN): 1954 if self._match_text_seq("FILTER_COLUMN", "="): 1955 prop.set("filter_column", self._parse_column()) 1956 elif self._match_text_seq("RETENTION_PERIOD", "="): 1957 prop.set("retention_period", self._parse_retention_period()) 1958 1959 self._match(TokenType.COMMA) 1960 1961 return prop 1962 1963 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1964 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1965 prop = self._parse_system_versioning_property(with_=True) 1966 self._match_r_paren() 1967 return prop 1968 1969 if self._match(TokenType.L_PAREN, advance=False): 1970 return self._parse_wrapped_properties() 1971 1972 if self._match_text_seq("JOURNAL"): 1973 return self._parse_withjournaltable() 1974 1975 if self._match_texts(self.VIEW_ATTRIBUTES): 1976 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1977 1978 if self._match_text_seq("DATA"): 1979 return self._parse_withdata(no=False) 1980 elif self._match_text_seq("NO", "DATA"): 1981 return self._parse_withdata(no=True) 1982 1983 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1984 return self._parse_serde_properties(with_=True) 1985 1986 if not self._next: 1987 return None 1988 1989 return self._parse_withisolatedloading() 1990 1991 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1992 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1993 self._match(TokenType.EQ) 1994 1995 user = self._parse_id_var() 1996 self._match(TokenType.PARAMETER) 1997 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1998 1999 if not user or not host: 2000 return None 2001 2002 return exp.DefinerProperty(this=f"{user}@{host}") 2003 2004 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2005 self._match(TokenType.TABLE) 2006 self._match(TokenType.EQ) 2007 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2008 2009 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2010 return self.expression(exp.LogProperty, no=no) 2011 2012 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2013 return self.expression(exp.JournalProperty, **kwargs) 2014 2015 def _parse_checksum(self) -> exp.ChecksumProperty: 2016 self._match(TokenType.EQ) 2017 2018 on = None 2019 if self._match(TokenType.ON): 2020 on = True 2021 elif self._match_text_seq("OFF"): 2022 on = False 2023 2024 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2025 2026 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2027 return self.expression( 2028 exp.Cluster, 2029 expressions=( 2030 self._parse_wrapped_csv(self._parse_ordered) 2031 if wrapped 2032 else self._parse_csv(self._parse_ordered) 2033 ), 2034 ) 2035 2036 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2037 self._match_text_seq("BY") 2038 2039 self._match_l_paren() 2040 expressions = self._parse_csv(self._parse_column) 2041 self._match_r_paren() 2042 2043 if self._match_text_seq("SORTED", "BY"): 2044 self._match_l_paren() 2045 sorted_by = self._parse_csv(self._parse_ordered) 2046 self._match_r_paren() 2047 else: 2048 sorted_by = None 2049 2050 self._match(TokenType.INTO) 2051 buckets = self._parse_number() 2052 self._match_text_seq("BUCKETS") 2053 2054 return self.expression( 2055 exp.ClusteredByProperty, 2056 expressions=expressions, 2057 sorted_by=sorted_by, 2058 buckets=buckets, 2059 ) 2060 2061 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2062 if not self._match_text_seq("GRANTS"): 2063 self._retreat(self._index - 1) 2064 return None 2065 2066 return self.expression(exp.CopyGrantsProperty) 2067 2068 def _parse_freespace(self) -> exp.FreespaceProperty: 2069 self._match(TokenType.EQ) 2070 return self.expression( 2071 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2072 ) 2073 2074 def _parse_mergeblockratio( 2075 self, no: bool = False, default: bool = False 2076 ) -> exp.MergeBlockRatioProperty: 2077 if self._match(TokenType.EQ): 2078 return self.expression( 2079 exp.MergeBlockRatioProperty, 2080 this=self._parse_number(), 2081 percent=self._match(TokenType.PERCENT), 2082 ) 2083 2084 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2085 2086 def _parse_datablocksize( 2087 self, 2088 default: t.Optional[bool] = None, 2089 minimum: t.Optional[bool] = None, 2090 maximum: t.Optional[bool] = None, 2091 ) -> exp.DataBlocksizeProperty: 2092 self._match(TokenType.EQ) 2093 size = self._parse_number() 2094 2095 units = None 2096 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2097 units = self._prev.text 2098 2099 return self.expression( 2100 exp.DataBlocksizeProperty, 2101 size=size, 2102 units=units, 2103 default=default, 2104 minimum=minimum, 2105 maximum=maximum, 2106 ) 2107 2108 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2109 self._match(TokenType.EQ) 2110 always = self._match_text_seq("ALWAYS") 2111 manual = self._match_text_seq("MANUAL") 2112 never = self._match_text_seq("NEVER") 2113 default = self._match_text_seq("DEFAULT") 2114 2115 autotemp = None 2116 if self._match_text_seq("AUTOTEMP"): 2117 autotemp = self._parse_schema() 2118 2119 return self.expression( 2120 exp.BlockCompressionProperty, 2121 always=always, 2122 manual=manual, 2123 never=never, 2124 default=default, 2125 autotemp=autotemp, 2126 ) 2127 2128 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2129 index = self._index 2130 no = self._match_text_seq("NO") 2131 concurrent = self._match_text_seq("CONCURRENT") 2132 2133 if not self._match_text_seq("ISOLATED", "LOADING"): 2134 self._retreat(index) 2135 return None 2136 2137 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2138 return self.expression( 2139 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2140 ) 2141 2142 def _parse_locking(self) -> exp.LockingProperty: 2143 if self._match(TokenType.TABLE): 2144 kind = "TABLE" 2145 elif self._match(TokenType.VIEW): 2146 kind = "VIEW" 2147 elif self._match(TokenType.ROW): 2148 kind = "ROW" 2149 elif self._match_text_seq("DATABASE"): 2150 kind = "DATABASE" 2151 else: 2152 kind = None 2153 2154 if kind in ("DATABASE", "TABLE", "VIEW"): 2155 this = self._parse_table_parts() 2156 else: 2157 this = None 2158 2159 if self._match(TokenType.FOR): 2160 for_or_in = "FOR" 2161 elif self._match(TokenType.IN): 2162 for_or_in = "IN" 2163 else: 2164 for_or_in = None 2165 2166 if self._match_text_seq("ACCESS"): 2167 lock_type = "ACCESS" 2168 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2169 lock_type = "EXCLUSIVE" 2170 elif self._match_text_seq("SHARE"): 2171 lock_type = "SHARE" 2172 elif self._match_text_seq("READ"): 2173 lock_type = "READ" 2174 elif self._match_text_seq("WRITE"): 2175 lock_type = "WRITE" 2176 elif self._match_text_seq("CHECKSUM"): 2177 lock_type = "CHECKSUM" 2178 else: 2179 lock_type = None 2180 2181 override = self._match_text_seq("OVERRIDE") 2182 2183 return self.expression( 2184 exp.LockingProperty, 2185 this=this, 2186 kind=kind, 2187 for_or_in=for_or_in, 2188 lock_type=lock_type, 2189 override=override, 2190 ) 2191 2192 def _parse_partition_by(self) -> t.List[exp.Expression]: 2193 if self._match(TokenType.PARTITION_BY): 2194 return self._parse_csv(self._parse_conjunction) 2195 return [] 2196 2197 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2198 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2199 if self._match_text_seq("MINVALUE"): 2200 return exp.var("MINVALUE") 2201 if self._match_text_seq("MAXVALUE"): 2202 return exp.var("MAXVALUE") 2203 return self._parse_bitwise() 2204 2205 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2206 expression = None 2207 from_expressions = None 2208 to_expressions = None 2209 2210 if self._match(TokenType.IN): 2211 this = self._parse_wrapped_csv(self._parse_bitwise) 2212 elif self._match(TokenType.FROM): 2213 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2214 self._match_text_seq("TO") 2215 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2216 elif self._match_text_seq("WITH", "(", "MODULUS"): 2217 this = self._parse_number() 2218 self._match_text_seq(",", "REMAINDER") 2219 expression = self._parse_number() 2220 self._match_r_paren() 2221 else: 2222 self.raise_error("Failed to parse partition bound spec.") 2223 2224 return self.expression( 2225 exp.PartitionBoundSpec, 2226 this=this, 2227 expression=expression, 2228 from_expressions=from_expressions, 2229 to_expressions=to_expressions, 2230 ) 2231 2232 # https://www.postgresql.org/docs/current/sql-createtable.html 2233 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2234 if not self._match_text_seq("OF"): 2235 self._retreat(self._index - 1) 2236 return None 2237 2238 this = self._parse_table(schema=True) 2239 2240 if self._match(TokenType.DEFAULT): 2241 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2242 elif self._match_text_seq("FOR", "VALUES"): 2243 expression = self._parse_partition_bound_spec() 2244 else: 2245 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2246 2247 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2248 2249 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2250 self._match(TokenType.EQ) 2251 return self.expression( 2252 exp.PartitionedByProperty, 2253 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2254 ) 2255 2256 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2257 if self._match_text_seq("AND", "STATISTICS"): 2258 statistics = True 2259 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2260 statistics = False 2261 else: 2262 statistics = None 2263 2264 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2265 2266 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2267 if self._match_text_seq("SQL"): 2268 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2269 return None 2270 2271 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2272 if self._match_text_seq("SQL", "DATA"): 2273 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2274 return None 2275 2276 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2277 if self._match_text_seq("PRIMARY", "INDEX"): 2278 return exp.NoPrimaryIndexProperty() 2279 if self._match_text_seq("SQL"): 2280 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2281 return None 2282 2283 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2284 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2285 return exp.OnCommitProperty() 2286 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2287 return exp.OnCommitProperty(delete=True) 2288 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2289 2290 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2291 if self._match_text_seq("SQL", "DATA"): 2292 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2293 return None 2294 2295 def _parse_distkey(self) -> exp.DistKeyProperty: 2296 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2297 2298 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2299 table = self._parse_table(schema=True) 2300 2301 options = [] 2302 while self._match_texts(("INCLUDING", "EXCLUDING")): 2303 this = self._prev.text.upper() 2304 2305 id_var = self._parse_id_var() 2306 if not id_var: 2307 return None 2308 2309 options.append( 2310 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2311 ) 2312 2313 return self.expression(exp.LikeProperty, this=table, expressions=options) 2314 2315 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2316 return self.expression( 2317 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2318 ) 2319 2320 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2321 self._match(TokenType.EQ) 2322 return self.expression( 2323 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2324 ) 2325 2326 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2327 self._match_text_seq("WITH", "CONNECTION") 2328 return self.expression( 2329 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2330 ) 2331 2332 def _parse_returns(self) -> exp.ReturnsProperty: 2333 value: t.Optional[exp.Expression] 2334 null = None 2335 is_table = self._match(TokenType.TABLE) 2336 2337 if is_table: 2338 if self._match(TokenType.LT): 2339 value = self.expression( 2340 exp.Schema, 2341 this="TABLE", 2342 expressions=self._parse_csv(self._parse_struct_types), 2343 ) 2344 if not self._match(TokenType.GT): 2345 self.raise_error("Expecting >") 2346 else: 2347 value = self._parse_schema(exp.var("TABLE")) 2348 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2349 null = True 2350 value = None 2351 else: 2352 value = self._parse_types() 2353 2354 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2355 2356 def _parse_describe(self) -> exp.Describe: 2357 kind = self._match_set(self.CREATABLES) and self._prev.text 2358 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2359 if self._match(TokenType.DOT): 2360 style = None 2361 self._retreat(self._index - 2) 2362 this = self._parse_table(schema=True) 2363 properties = self._parse_properties() 2364 expressions = properties.expressions if properties else None 2365 return self.expression( 2366 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2367 ) 2368 2369 def _parse_insert(self) -> exp.Insert: 2370 comments = ensure_list(self._prev_comments) 2371 hint = self._parse_hint() 2372 overwrite = self._match(TokenType.OVERWRITE) 2373 ignore = self._match(TokenType.IGNORE) 2374 local = self._match_text_seq("LOCAL") 2375 alternative = None 2376 is_function = None 2377 2378 if self._match_text_seq("DIRECTORY"): 2379 this: t.Optional[exp.Expression] = self.expression( 2380 exp.Directory, 2381 this=self._parse_var_or_string(), 2382 local=local, 2383 row_format=self._parse_row_format(match_row=True), 2384 ) 2385 else: 2386 if self._match(TokenType.OR): 2387 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2388 2389 self._match(TokenType.INTO) 2390 comments += ensure_list(self._prev_comments) 2391 self._match(TokenType.TABLE) 2392 is_function = self._match(TokenType.FUNCTION) 2393 2394 this = ( 2395 self._parse_table(schema=True, parse_partition=True) 2396 if not is_function 2397 else self._parse_function() 2398 ) 2399 2400 returning = self._parse_returning() 2401 2402 return self.expression( 2403 exp.Insert, 2404 comments=comments, 2405 hint=hint, 2406 is_function=is_function, 2407 this=this, 2408 stored=self._match_text_seq("STORED") and self._parse_stored(), 2409 by_name=self._match_text_seq("BY", "NAME"), 2410 exists=self._parse_exists(), 2411 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2412 and self._parse_conjunction(), 2413 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2414 conflict=self._parse_on_conflict(), 2415 returning=returning or self._parse_returning(), 2416 overwrite=overwrite, 2417 alternative=alternative, 2418 ignore=ignore, 2419 ) 2420 2421 def _parse_kill(self) -> exp.Kill: 2422 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2423 2424 return self.expression( 2425 exp.Kill, 2426 this=self._parse_primary(), 2427 kind=kind, 2428 ) 2429 2430 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2431 conflict = self._match_text_seq("ON", "CONFLICT") 2432 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2433 2434 if not conflict and not duplicate: 2435 return None 2436 2437 conflict_keys = None 2438 constraint = None 2439 2440 if conflict: 2441 if self._match_text_seq("ON", "CONSTRAINT"): 2442 constraint = self._parse_id_var() 2443 elif self._match(TokenType.L_PAREN): 2444 conflict_keys = self._parse_csv(self._parse_id_var) 2445 self._match_r_paren() 2446 2447 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2448 if self._prev.token_type == TokenType.UPDATE: 2449 self._match(TokenType.SET) 2450 expressions = self._parse_csv(self._parse_equality) 2451 else: 2452 expressions = None 2453 2454 return self.expression( 2455 exp.OnConflict, 2456 duplicate=duplicate, 2457 expressions=expressions, 2458 action=action, 2459 conflict_keys=conflict_keys, 2460 constraint=constraint, 2461 ) 2462 2463 def _parse_returning(self) -> t.Optional[exp.Returning]: 2464 if not self._match(TokenType.RETURNING): 2465 return None 2466 return self.expression( 2467 exp.Returning, 2468 expressions=self._parse_csv(self._parse_expression), 2469 into=self._match(TokenType.INTO) and self._parse_table_part(), 2470 ) 2471 2472 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2473 if not self._match(TokenType.FORMAT): 2474 return None 2475 return self._parse_row_format() 2476 2477 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2478 index = self._index 2479 with_ = with_ or self._match_text_seq("WITH") 2480 2481 if not self._match(TokenType.SERDE_PROPERTIES): 2482 self._retreat(index) 2483 return None 2484 return self.expression( 2485 exp.SerdeProperties, 2486 **{ # type: ignore 2487 "expressions": self._parse_wrapped_properties(), 2488 "with": with_, 2489 }, 2490 ) 2491 2492 def _parse_row_format( 2493 self, match_row: bool = False 2494 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2495 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2496 return None 2497 2498 if self._match_text_seq("SERDE"): 2499 this = self._parse_string() 2500 2501 serde_properties = self._parse_serde_properties() 2502 2503 return self.expression( 2504 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2505 ) 2506 2507 self._match_text_seq("DELIMITED") 2508 2509 kwargs = {} 2510 2511 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2512 kwargs["fields"] = self._parse_string() 2513 if self._match_text_seq("ESCAPED", "BY"): 2514 kwargs["escaped"] = self._parse_string() 2515 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2516 kwargs["collection_items"] = self._parse_string() 2517 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2518 kwargs["map_keys"] = self._parse_string() 2519 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2520 kwargs["lines"] = self._parse_string() 2521 if self._match_text_seq("NULL", "DEFINED", "AS"): 2522 kwargs["null"] = self._parse_string() 2523 2524 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2525 2526 def _parse_load(self) -> exp.LoadData | exp.Command: 2527 if self._match_text_seq("DATA"): 2528 local = self._match_text_seq("LOCAL") 2529 self._match_text_seq("INPATH") 2530 inpath = self._parse_string() 2531 overwrite = self._match(TokenType.OVERWRITE) 2532 self._match_pair(TokenType.INTO, TokenType.TABLE) 2533 2534 return self.expression( 2535 exp.LoadData, 2536 this=self._parse_table(schema=True), 2537 local=local, 2538 overwrite=overwrite, 2539 inpath=inpath, 2540 partition=self._parse_partition(), 2541 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2542 serde=self._match_text_seq("SERDE") and self._parse_string(), 2543 ) 2544 return self._parse_as_command(self._prev) 2545 2546 def _parse_delete(self) -> exp.Delete: 2547 # This handles MySQL's "Multiple-Table Syntax" 2548 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2549 tables = None 2550 comments = self._prev_comments 2551 if not self._match(TokenType.FROM, advance=False): 2552 tables = self._parse_csv(self._parse_table) or None 2553 2554 returning = self._parse_returning() 2555 2556 return self.expression( 2557 exp.Delete, 2558 comments=comments, 2559 tables=tables, 2560 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2561 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2562 where=self._parse_where(), 2563 returning=returning or self._parse_returning(), 2564 limit=self._parse_limit(), 2565 ) 2566 2567 def _parse_update(self) -> exp.Update: 2568 comments = self._prev_comments 2569 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2570 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2571 returning = self._parse_returning() 2572 return self.expression( 2573 exp.Update, 2574 comments=comments, 2575 **{ # type: ignore 2576 "this": this, 2577 "expressions": expressions, 2578 "from": self._parse_from(joins=True), 2579 "where": self._parse_where(), 2580 "returning": returning or self._parse_returning(), 2581 "order": self._parse_order(), 2582 "limit": self._parse_limit(), 2583 }, 2584 ) 2585 2586 def _parse_uncache(self) -> exp.Uncache: 2587 if not self._match(TokenType.TABLE): 2588 self.raise_error("Expecting TABLE after UNCACHE") 2589 2590 return self.expression( 2591 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2592 ) 2593 2594 def _parse_cache(self) -> exp.Cache: 2595 lazy = self._match_text_seq("LAZY") 2596 self._match(TokenType.TABLE) 2597 table = self._parse_table(schema=True) 2598 2599 options = [] 2600 if self._match_text_seq("OPTIONS"): 2601 self._match_l_paren() 2602 k = self._parse_string() 2603 self._match(TokenType.EQ) 2604 v = self._parse_string() 2605 options = [k, v] 2606 self._match_r_paren() 2607 2608 self._match(TokenType.ALIAS) 2609 return self.expression( 2610 exp.Cache, 2611 this=table, 2612 lazy=lazy, 2613 options=options, 2614 expression=self._parse_select(nested=True), 2615 ) 2616 2617 def _parse_partition(self) -> t.Optional[exp.Partition]: 2618 if not self._match(TokenType.PARTITION): 2619 return None 2620 2621 return self.expression( 2622 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2623 ) 2624 2625 def _parse_value(self) -> t.Optional[exp.Tuple]: 2626 if self._match(TokenType.L_PAREN): 2627 expressions = self._parse_csv(self._parse_expression) 2628 self._match_r_paren() 2629 return self.expression(exp.Tuple, expressions=expressions) 2630 2631 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2632 expression = self._parse_expression() 2633 if expression: 2634 return self.expression(exp.Tuple, expressions=[expression]) 2635 return None 2636 2637 def _parse_projections(self) -> t.List[exp.Expression]: 2638 return self._parse_expressions() 2639 2640 def _parse_select( 2641 self, 2642 nested: bool = False, 2643 table: bool = False, 2644 parse_subquery_alias: bool = True, 2645 parse_set_operation: bool = True, 2646 ) -> t.Optional[exp.Expression]: 2647 cte = self._parse_with() 2648 2649 if cte: 2650 this = self._parse_statement() 2651 2652 if not this: 2653 self.raise_error("Failed to parse any statement following CTE") 2654 return cte 2655 2656 if "with" in this.arg_types: 2657 this.set("with", cte) 2658 else: 2659 self.raise_error(f"{this.key} does not support CTE") 2660 this = cte 2661 2662 return this 2663 2664 # duckdb supports leading with FROM x 2665 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2666 2667 if self._match(TokenType.SELECT): 2668 comments = self._prev_comments 2669 2670 hint = self._parse_hint() 2671 all_ = self._match(TokenType.ALL) 2672 distinct = self._match_set(self.DISTINCT_TOKENS) 2673 2674 kind = ( 2675 self._match(TokenType.ALIAS) 2676 and self._match_texts(("STRUCT", "VALUE")) 2677 and self._prev.text.upper() 2678 ) 2679 2680 if distinct: 2681 distinct = self.expression( 2682 exp.Distinct, 2683 on=self._parse_value() if self._match(TokenType.ON) else None, 2684 ) 2685 2686 if all_ and distinct: 2687 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2688 2689 limit = self._parse_limit(top=True) 2690 projections = self._parse_projections() 2691 2692 this = self.expression( 2693 exp.Select, 2694 kind=kind, 2695 hint=hint, 2696 distinct=distinct, 2697 expressions=projections, 2698 limit=limit, 2699 ) 2700 this.comments = comments 2701 2702 into = self._parse_into() 2703 if into: 2704 this.set("into", into) 2705 2706 if not from_: 2707 from_ = self._parse_from() 2708 2709 if from_: 2710 this.set("from", from_) 2711 2712 this = self._parse_query_modifiers(this) 2713 elif (table or nested) and self._match(TokenType.L_PAREN): 2714 if self._match(TokenType.PIVOT): 2715 this = self._parse_simplified_pivot() 2716 elif self._match(TokenType.FROM): 2717 this = exp.select("*").from_( 2718 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2719 ) 2720 else: 2721 this = ( 2722 self._parse_table() 2723 if table 2724 else self._parse_select(nested=True, parse_set_operation=False) 2725 ) 2726 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2727 2728 self._match_r_paren() 2729 2730 # We return early here so that the UNION isn't attached to the subquery by the 2731 # following call to _parse_set_operations, but instead becomes the parent node 2732 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2733 elif self._match(TokenType.VALUES, advance=False): 2734 this = self._parse_derived_table_values() 2735 elif from_: 2736 this = exp.select("*").from_(from_.this, copy=False) 2737 else: 2738 this = None 2739 2740 if parse_set_operation: 2741 return self._parse_set_operations(this) 2742 return this 2743 2744 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2745 if not skip_with_token and not self._match(TokenType.WITH): 2746 return None 2747 2748 comments = self._prev_comments 2749 recursive = self._match(TokenType.RECURSIVE) 2750 2751 expressions = [] 2752 while True: 2753 expressions.append(self._parse_cte()) 2754 2755 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2756 break 2757 else: 2758 self._match(TokenType.WITH) 2759 2760 return self.expression( 2761 exp.With, comments=comments, expressions=expressions, recursive=recursive 2762 ) 2763 2764 def _parse_cte(self) -> exp.CTE: 2765 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2766 if not alias or not alias.this: 2767 self.raise_error("Expected CTE to have alias") 2768 2769 self._match(TokenType.ALIAS) 2770 2771 if self._match_text_seq("NOT", "MATERIALIZED"): 2772 materialized = False 2773 elif self._match_text_seq("MATERIALIZED"): 2774 materialized = True 2775 else: 2776 materialized = None 2777 2778 return self.expression( 2779 exp.CTE, 2780 this=self._parse_wrapped(self._parse_statement), 2781 alias=alias, 2782 materialized=materialized, 2783 ) 2784 2785 def _parse_table_alias( 2786 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2787 ) -> t.Optional[exp.TableAlias]: 2788 any_token = self._match(TokenType.ALIAS) 2789 alias = ( 2790 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2791 or self._parse_string_as_identifier() 2792 ) 2793 2794 index = self._index 2795 if self._match(TokenType.L_PAREN): 2796 columns = self._parse_csv(self._parse_function_parameter) 2797 self._match_r_paren() if columns else self._retreat(index) 2798 else: 2799 columns = None 2800 2801 if not alias and not columns: 2802 return None 2803 2804 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2805 2806 # We bubble up comments from the Identifier to the TableAlias 2807 if isinstance(alias, exp.Identifier): 2808 table_alias.add_comments(alias.pop_comments()) 2809 2810 return table_alias 2811 2812 def _parse_subquery( 2813 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2814 ) -> t.Optional[exp.Subquery]: 2815 if not this: 2816 return None 2817 2818 return self.expression( 2819 exp.Subquery, 2820 this=this, 2821 pivots=self._parse_pivots(), 2822 alias=self._parse_table_alias() if parse_alias else None, 2823 ) 2824 2825 def _implicit_unnests_to_explicit(self, this: E) -> E: 2826 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2827 2828 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2829 for i, join in enumerate(this.args.get("joins") or []): 2830 table = join.this 2831 normalized_table = table.copy() 2832 normalized_table.meta["maybe_column"] = True 2833 normalized_table = _norm(normalized_table, dialect=self.dialect) 2834 2835 if isinstance(table, exp.Table) and not join.args.get("on"): 2836 if normalized_table.parts[0].name in refs: 2837 table_as_column = table.to_column() 2838 unnest = exp.Unnest(expressions=[table_as_column]) 2839 2840 # Table.to_column creates a parent Alias node that we want to convert to 2841 # a TableAlias and attach to the Unnest, so it matches the parser's output 2842 if isinstance(table.args.get("alias"), exp.TableAlias): 2843 table_as_column.replace(table_as_column.this) 2844 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2845 2846 table.replace(unnest) 2847 2848 refs.add(normalized_table.alias_or_name) 2849 2850 return this 2851 2852 def _parse_query_modifiers( 2853 self, this: t.Optional[exp.Expression] 2854 ) -> t.Optional[exp.Expression]: 2855 if isinstance(this, (exp.Query, exp.Table)): 2856 for join in self._parse_joins(): 2857 this.append("joins", join) 2858 for lateral in iter(self._parse_lateral, None): 2859 this.append("laterals", lateral) 2860 2861 while True: 2862 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2863 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2864 key, expression = parser(self) 2865 2866 if expression: 2867 this.set(key, expression) 2868 if key == "limit": 2869 offset = expression.args.pop("offset", None) 2870 2871 if offset: 2872 offset = exp.Offset(expression=offset) 2873 this.set("offset", offset) 2874 2875 limit_by_expressions = expression.expressions 2876 expression.set("expressions", None) 2877 offset.set("expressions", limit_by_expressions) 2878 continue 2879 break 2880 2881 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2882 this = self._implicit_unnests_to_explicit(this) 2883 2884 return this 2885 2886 def _parse_hint(self) -> t.Optional[exp.Hint]: 2887 if self._match(TokenType.HINT): 2888 hints = [] 2889 for hint in iter( 2890 lambda: self._parse_csv( 2891 lambda: self._parse_function() or self._parse_var(upper=True) 2892 ), 2893 [], 2894 ): 2895 hints.extend(hint) 2896 2897 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2898 self.raise_error("Expected */ after HINT") 2899 2900 return self.expression(exp.Hint, expressions=hints) 2901 2902 return None 2903 2904 def _parse_into(self) -> t.Optional[exp.Into]: 2905 if not self._match(TokenType.INTO): 2906 return None 2907 2908 temp = self._match(TokenType.TEMPORARY) 2909 unlogged = self._match_text_seq("UNLOGGED") 2910 self._match(TokenType.TABLE) 2911 2912 return self.expression( 2913 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2914 ) 2915 2916 def _parse_from( 2917 self, joins: bool = False, skip_from_token: bool = False 2918 ) -> t.Optional[exp.From]: 2919 if not skip_from_token and not self._match(TokenType.FROM): 2920 return None 2921 2922 return self.expression( 2923 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2924 ) 2925 2926 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2927 return self.expression( 2928 exp.MatchRecognizeMeasure, 2929 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2930 this=self._parse_expression(), 2931 ) 2932 2933 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2934 if not self._match(TokenType.MATCH_RECOGNIZE): 2935 return None 2936 2937 self._match_l_paren() 2938 2939 partition = self._parse_partition_by() 2940 order = self._parse_order() 2941 2942 measures = ( 2943 self._parse_csv(self._parse_match_recognize_measure) 2944 if self._match_text_seq("MEASURES") 2945 else None 2946 ) 2947 2948 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2949 rows = exp.var("ONE ROW PER MATCH") 2950 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2951 text = "ALL ROWS PER MATCH" 2952 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2953 text += " SHOW EMPTY MATCHES" 2954 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2955 text += " OMIT EMPTY MATCHES" 2956 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2957 text += " WITH UNMATCHED ROWS" 2958 rows = exp.var(text) 2959 else: 2960 rows = None 2961 2962 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2963 text = "AFTER MATCH SKIP" 2964 if self._match_text_seq("PAST", "LAST", "ROW"): 2965 text += " PAST LAST ROW" 2966 elif self._match_text_seq("TO", "NEXT", "ROW"): 2967 text += " TO NEXT ROW" 2968 elif self._match_text_seq("TO", "FIRST"): 2969 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2970 elif self._match_text_seq("TO", "LAST"): 2971 text += f" TO LAST {self._advance_any().text}" # type: ignore 2972 after = exp.var(text) 2973 else: 2974 after = None 2975 2976 if self._match_text_seq("PATTERN"): 2977 self._match_l_paren() 2978 2979 if not self._curr: 2980 self.raise_error("Expecting )", self._curr) 2981 2982 paren = 1 2983 start = self._curr 2984 2985 while self._curr and paren > 0: 2986 if self._curr.token_type == TokenType.L_PAREN: 2987 paren += 1 2988 if self._curr.token_type == TokenType.R_PAREN: 2989 paren -= 1 2990 2991 end = self._prev 2992 self._advance() 2993 2994 if paren > 0: 2995 self.raise_error("Expecting )", self._curr) 2996 2997 pattern = exp.var(self._find_sql(start, end)) 2998 else: 2999 pattern = None 3000 3001 define = ( 3002 self._parse_csv(self._parse_name_as_expression) 3003 if self._match_text_seq("DEFINE") 3004 else None 3005 ) 3006 3007 self._match_r_paren() 3008 3009 return self.expression( 3010 exp.MatchRecognize, 3011 partition_by=partition, 3012 order=order, 3013 measures=measures, 3014 rows=rows, 3015 after=after, 3016 pattern=pattern, 3017 define=define, 3018 alias=self._parse_table_alias(), 3019 ) 3020 3021 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3022 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3023 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3024 cross_apply = False 3025 3026 if cross_apply is not None: 3027 this = self._parse_select(table=True) 3028 view = None 3029 outer = None 3030 elif self._match(TokenType.LATERAL): 3031 this = self._parse_select(table=True) 3032 view = self._match(TokenType.VIEW) 3033 outer = self._match(TokenType.OUTER) 3034 else: 3035 return None 3036 3037 if not this: 3038 this = ( 3039 self._parse_unnest() 3040 or self._parse_function() 3041 or self._parse_id_var(any_token=False) 3042 ) 3043 3044 while self._match(TokenType.DOT): 3045 this = exp.Dot( 3046 this=this, 3047 expression=self._parse_function() or self._parse_id_var(any_token=False), 3048 ) 3049 3050 if view: 3051 table = self._parse_id_var(any_token=False) 3052 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3053 table_alias: t.Optional[exp.TableAlias] = self.expression( 3054 exp.TableAlias, this=table, columns=columns 3055 ) 3056 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3057 # We move the alias from the lateral's child node to the lateral itself 3058 table_alias = this.args["alias"].pop() 3059 else: 3060 table_alias = self._parse_table_alias() 3061 3062 return self.expression( 3063 exp.Lateral, 3064 this=this, 3065 view=view, 3066 outer=outer, 3067 alias=table_alias, 3068 cross_apply=cross_apply, 3069 ) 3070 3071 def _parse_join_parts( 3072 self, 3073 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3074 return ( 3075 self._match_set(self.JOIN_METHODS) and self._prev, 3076 self._match_set(self.JOIN_SIDES) and self._prev, 3077 self._match_set(self.JOIN_KINDS) and self._prev, 3078 ) 3079 3080 def _parse_join( 3081 self, skip_join_token: bool = False, parse_bracket: bool = False 3082 ) -> t.Optional[exp.Join]: 3083 if self._match(TokenType.COMMA): 3084 return self.expression(exp.Join, this=self._parse_table()) 3085 3086 index = self._index 3087 method, side, kind = self._parse_join_parts() 3088 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3089 join = self._match(TokenType.JOIN) 3090 3091 if not skip_join_token and not join: 3092 self._retreat(index) 3093 kind = None 3094 method = None 3095 side = None 3096 3097 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3098 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3099 3100 if not skip_join_token and not join and not outer_apply and not cross_apply: 3101 return None 3102 3103 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3104 3105 if method: 3106 kwargs["method"] = method.text 3107 if side: 3108 kwargs["side"] = side.text 3109 if kind: 3110 kwargs["kind"] = kind.text 3111 if hint: 3112 kwargs["hint"] = hint 3113 3114 if self._match(TokenType.MATCH_CONDITION): 3115 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3116 3117 if self._match(TokenType.ON): 3118 kwargs["on"] = self._parse_conjunction() 3119 elif self._match(TokenType.USING): 3120 kwargs["using"] = self._parse_wrapped_id_vars() 3121 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3122 kind and kind.token_type == TokenType.CROSS 3123 ): 3124 index = self._index 3125 joins: t.Optional[list] = list(self._parse_joins()) 3126 3127 if joins and self._match(TokenType.ON): 3128 kwargs["on"] = self._parse_conjunction() 3129 elif joins and self._match(TokenType.USING): 3130 kwargs["using"] = self._parse_wrapped_id_vars() 3131 else: 3132 joins = None 3133 self._retreat(index) 3134 3135 kwargs["this"].set("joins", joins if joins else None) 3136 3137 comments = [c for token in (method, side, kind) if token for c in token.comments] 3138 return self.expression(exp.Join, comments=comments, **kwargs) 3139 3140 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3141 this = self._parse_conjunction() 3142 3143 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3144 return this 3145 3146 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3147 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3148 3149 return this 3150 3151 def _parse_index_params(self) -> exp.IndexParameters: 3152 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3153 3154 if self._match(TokenType.L_PAREN, advance=False): 3155 columns = self._parse_wrapped_csv(self._parse_with_operator) 3156 else: 3157 columns = None 3158 3159 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3160 partition_by = self._parse_partition_by() 3161 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3162 tablespace = ( 3163 self._parse_var(any_token=True) 3164 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3165 else None 3166 ) 3167 where = self._parse_where() 3168 3169 return self.expression( 3170 exp.IndexParameters, 3171 using=using, 3172 columns=columns, 3173 include=include, 3174 partition_by=partition_by, 3175 where=where, 3176 with_storage=with_storage, 3177 tablespace=tablespace, 3178 ) 3179 3180 def _parse_index( 3181 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3182 ) -> t.Optional[exp.Index]: 3183 if index or anonymous: 3184 unique = None 3185 primary = None 3186 amp = None 3187 3188 self._match(TokenType.ON) 3189 self._match(TokenType.TABLE) # hive 3190 table = self._parse_table_parts(schema=True) 3191 else: 3192 unique = self._match(TokenType.UNIQUE) 3193 primary = self._match_text_seq("PRIMARY") 3194 amp = self._match_text_seq("AMP") 3195 3196 if not self._match(TokenType.INDEX): 3197 return None 3198 3199 index = self._parse_id_var() 3200 table = None 3201 3202 params = self._parse_index_params() 3203 3204 return self.expression( 3205 exp.Index, 3206 this=index, 3207 table=table, 3208 unique=unique, 3209 primary=primary, 3210 amp=amp, 3211 params=params, 3212 ) 3213 3214 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3215 hints: t.List[exp.Expression] = [] 3216 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3217 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3218 hints.append( 3219 self.expression( 3220 exp.WithTableHint, 3221 expressions=self._parse_csv( 3222 lambda: self._parse_function() or self._parse_var(any_token=True) 3223 ), 3224 ) 3225 ) 3226 self._match_r_paren() 3227 else: 3228 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3229 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3230 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3231 3232 self._match_texts(("INDEX", "KEY")) 3233 if self._match(TokenType.FOR): 3234 hint.set("target", self._advance_any() and self._prev.text.upper()) 3235 3236 hint.set("expressions", self._parse_wrapped_id_vars()) 3237 hints.append(hint) 3238 3239 return hints or None 3240 3241 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3242 return ( 3243 (not schema and self._parse_function(optional_parens=False)) 3244 or self._parse_id_var(any_token=False) 3245 or self._parse_string_as_identifier() 3246 or self._parse_placeholder() 3247 ) 3248 3249 def _parse_table_parts( 3250 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3251 ) -> exp.Table: 3252 catalog = None 3253 db = None 3254 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3255 3256 while self._match(TokenType.DOT): 3257 if catalog: 3258 # This allows nesting the table in arbitrarily many dot expressions if needed 3259 table = self.expression( 3260 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3261 ) 3262 else: 3263 catalog = db 3264 db = table 3265 # "" used for tsql FROM a..b case 3266 table = self._parse_table_part(schema=schema) or "" 3267 3268 if ( 3269 wildcard 3270 and self._is_connected() 3271 and (isinstance(table, exp.Identifier) or not table) 3272 and self._match(TokenType.STAR) 3273 ): 3274 if isinstance(table, exp.Identifier): 3275 table.args["this"] += "*" 3276 else: 3277 table = exp.Identifier(this="*") 3278 3279 # We bubble up comments from the Identifier to the Table 3280 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3281 3282 if is_db_reference: 3283 catalog = db 3284 db = table 3285 table = None 3286 3287 if not table and not is_db_reference: 3288 self.raise_error(f"Expected table name but got {self._curr}") 3289 if not db and is_db_reference: 3290 self.raise_error(f"Expected database name but got {self._curr}") 3291 3292 return self.expression( 3293 exp.Table, 3294 comments=comments, 3295 this=table, 3296 db=db, 3297 catalog=catalog, 3298 pivots=self._parse_pivots(), 3299 ) 3300 3301 def _parse_table( 3302 self, 3303 schema: bool = False, 3304 joins: bool = False, 3305 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3306 parse_bracket: bool = False, 3307 is_db_reference: bool = False, 3308 parse_partition: bool = False, 3309 ) -> t.Optional[exp.Expression]: 3310 lateral = self._parse_lateral() 3311 if lateral: 3312 return lateral 3313 3314 unnest = self._parse_unnest() 3315 if unnest: 3316 return unnest 3317 3318 values = self._parse_derived_table_values() 3319 if values: 3320 return values 3321 3322 subquery = self._parse_select(table=True) 3323 if subquery: 3324 if not subquery.args.get("pivots"): 3325 subquery.set("pivots", self._parse_pivots()) 3326 return subquery 3327 3328 bracket = parse_bracket and self._parse_bracket(None) 3329 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3330 3331 only = self._match(TokenType.ONLY) 3332 3333 this = t.cast( 3334 exp.Expression, 3335 bracket 3336 or self._parse_bracket( 3337 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3338 ), 3339 ) 3340 3341 if only: 3342 this.set("only", only) 3343 3344 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3345 self._match_text_seq("*") 3346 3347 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3348 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3349 this.set("partition", self._parse_partition()) 3350 3351 if schema: 3352 return self._parse_schema(this=this) 3353 3354 version = self._parse_version() 3355 3356 if version: 3357 this.set("version", version) 3358 3359 if self.dialect.ALIAS_POST_TABLESAMPLE: 3360 table_sample = self._parse_table_sample() 3361 3362 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3363 if alias: 3364 this.set("alias", alias) 3365 3366 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3367 return self.expression( 3368 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3369 ) 3370 3371 this.set("hints", self._parse_table_hints()) 3372 3373 if not this.args.get("pivots"): 3374 this.set("pivots", self._parse_pivots()) 3375 3376 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3377 table_sample = self._parse_table_sample() 3378 3379 if table_sample: 3380 table_sample.set("this", this) 3381 this = table_sample 3382 3383 if joins: 3384 for join in self._parse_joins(): 3385 this.append("joins", join) 3386 3387 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3388 this.set("ordinality", True) 3389 this.set("alias", self._parse_table_alias()) 3390 3391 return this 3392 3393 def _parse_version(self) -> t.Optional[exp.Version]: 3394 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3395 this = "TIMESTAMP" 3396 elif self._match(TokenType.VERSION_SNAPSHOT): 3397 this = "VERSION" 3398 else: 3399 return None 3400 3401 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3402 kind = self._prev.text.upper() 3403 start = self._parse_bitwise() 3404 self._match_texts(("TO", "AND")) 3405 end = self._parse_bitwise() 3406 expression: t.Optional[exp.Expression] = self.expression( 3407 exp.Tuple, expressions=[start, end] 3408 ) 3409 elif self._match_text_seq("CONTAINED", "IN"): 3410 kind = "CONTAINED IN" 3411 expression = self.expression( 3412 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3413 ) 3414 elif self._match(TokenType.ALL): 3415 kind = "ALL" 3416 expression = None 3417 else: 3418 self._match_text_seq("AS", "OF") 3419 kind = "AS OF" 3420 expression = self._parse_type() 3421 3422 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3423 3424 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3425 if not self._match(TokenType.UNNEST): 3426 return None 3427 3428 expressions = self._parse_wrapped_csv(self._parse_equality) 3429 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3430 3431 alias = self._parse_table_alias() if with_alias else None 3432 3433 if alias: 3434 if self.dialect.UNNEST_COLUMN_ONLY: 3435 if alias.args.get("columns"): 3436 self.raise_error("Unexpected extra column alias in unnest.") 3437 3438 alias.set("columns", [alias.this]) 3439 alias.set("this", None) 3440 3441 columns = alias.args.get("columns") or [] 3442 if offset and len(expressions) < len(columns): 3443 offset = columns.pop() 3444 3445 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3446 self._match(TokenType.ALIAS) 3447 offset = self._parse_id_var( 3448 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3449 ) or exp.to_identifier("offset") 3450 3451 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3452 3453 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3454 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3455 if not is_derived and not self._match_text_seq("VALUES"): 3456 return None 3457 3458 expressions = self._parse_csv(self._parse_value) 3459 alias = self._parse_table_alias() 3460 3461 if is_derived: 3462 self._match_r_paren() 3463 3464 return self.expression( 3465 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3466 ) 3467 3468 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3469 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3470 as_modifier and self._match_text_seq("USING", "SAMPLE") 3471 ): 3472 return None 3473 3474 bucket_numerator = None 3475 bucket_denominator = None 3476 bucket_field = None 3477 percent = None 3478 size = None 3479 seed = None 3480 3481 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3482 matched_l_paren = self._match(TokenType.L_PAREN) 3483 3484 if self.TABLESAMPLE_CSV: 3485 num = None 3486 expressions = self._parse_csv(self._parse_primary) 3487 else: 3488 expressions = None 3489 num = ( 3490 self._parse_factor() 3491 if self._match(TokenType.NUMBER, advance=False) 3492 else self._parse_primary() or self._parse_placeholder() 3493 ) 3494 3495 if self._match_text_seq("BUCKET"): 3496 bucket_numerator = self._parse_number() 3497 self._match_text_seq("OUT", "OF") 3498 bucket_denominator = bucket_denominator = self._parse_number() 3499 self._match(TokenType.ON) 3500 bucket_field = self._parse_field() 3501 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3502 percent = num 3503 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3504 size = num 3505 else: 3506 percent = num 3507 3508 if matched_l_paren: 3509 self._match_r_paren() 3510 3511 if self._match(TokenType.L_PAREN): 3512 method = self._parse_var(upper=True) 3513 seed = self._match(TokenType.COMMA) and self._parse_number() 3514 self._match_r_paren() 3515 elif self._match_texts(("SEED", "REPEATABLE")): 3516 seed = self._parse_wrapped(self._parse_number) 3517 3518 if not method and self.DEFAULT_SAMPLING_METHOD: 3519 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3520 3521 return self.expression( 3522 exp.TableSample, 3523 expressions=expressions, 3524 method=method, 3525 bucket_numerator=bucket_numerator, 3526 bucket_denominator=bucket_denominator, 3527 bucket_field=bucket_field, 3528 percent=percent, 3529 size=size, 3530 seed=seed, 3531 ) 3532 3533 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3534 return list(iter(self._parse_pivot, None)) or None 3535 3536 def _parse_joins(self) -> t.Iterator[exp.Join]: 3537 return iter(self._parse_join, None) 3538 3539 # https://duckdb.org/docs/sql/statements/pivot 3540 def _parse_simplified_pivot(self) -> exp.Pivot: 3541 def _parse_on() -> t.Optional[exp.Expression]: 3542 this = self._parse_bitwise() 3543 return self._parse_in(this) if self._match(TokenType.IN) else this 3544 3545 this = self._parse_table() 3546 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3547 using = self._match(TokenType.USING) and self._parse_csv( 3548 lambda: self._parse_alias(self._parse_function()) 3549 ) 3550 group = self._parse_group() 3551 return self.expression( 3552 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3553 ) 3554 3555 def _parse_pivot_in(self) -> exp.In: 3556 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3557 this = self._parse_conjunction() 3558 3559 self._match(TokenType.ALIAS) 3560 alias = self._parse_field() 3561 if alias: 3562 return self.expression(exp.PivotAlias, this=this, alias=alias) 3563 3564 return this 3565 3566 value = self._parse_column() 3567 3568 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3569 self.raise_error("Expecting IN (") 3570 3571 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3572 3573 self._match_r_paren() 3574 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3575 3576 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3577 index = self._index 3578 include_nulls = None 3579 3580 if self._match(TokenType.PIVOT): 3581 unpivot = False 3582 elif self._match(TokenType.UNPIVOT): 3583 unpivot = True 3584 3585 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3586 if self._match_text_seq("INCLUDE", "NULLS"): 3587 include_nulls = True 3588 elif self._match_text_seq("EXCLUDE", "NULLS"): 3589 include_nulls = False 3590 else: 3591 return None 3592 3593 expressions = [] 3594 3595 if not self._match(TokenType.L_PAREN): 3596 self._retreat(index) 3597 return None 3598 3599 if unpivot: 3600 expressions = self._parse_csv(self._parse_column) 3601 else: 3602 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3603 3604 if not expressions: 3605 self.raise_error("Failed to parse PIVOT's aggregation list") 3606 3607 if not self._match(TokenType.FOR): 3608 self.raise_error("Expecting FOR") 3609 3610 field = self._parse_pivot_in() 3611 3612 self._match_r_paren() 3613 3614 pivot = self.expression( 3615 exp.Pivot, 3616 expressions=expressions, 3617 field=field, 3618 unpivot=unpivot, 3619 include_nulls=include_nulls, 3620 ) 3621 3622 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3623 pivot.set("alias", self._parse_table_alias()) 3624 3625 if not unpivot: 3626 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3627 3628 columns: t.List[exp.Expression] = [] 3629 for fld in pivot.args["field"].expressions: 3630 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3631 for name in names: 3632 if self.PREFIXED_PIVOT_COLUMNS: 3633 name = f"{name}_{field_name}" if name else field_name 3634 else: 3635 name = f"{field_name}_{name}" if name else field_name 3636 3637 columns.append(exp.to_identifier(name)) 3638 3639 pivot.set("columns", columns) 3640 3641 return pivot 3642 3643 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3644 return [agg.alias for agg in aggregations] 3645 3646 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3647 if not skip_where_token and not self._match(TokenType.PREWHERE): 3648 return None 3649 3650 return self.expression( 3651 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3652 ) 3653 3654 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3655 if not skip_where_token and not self._match(TokenType.WHERE): 3656 return None 3657 3658 return self.expression( 3659 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3660 ) 3661 3662 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3663 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3664 return None 3665 3666 elements: t.Dict[str, t.Any] = defaultdict(list) 3667 3668 if self._match(TokenType.ALL): 3669 elements["all"] = True 3670 elif self._match(TokenType.DISTINCT): 3671 elements["all"] = False 3672 3673 while True: 3674 expressions = self._parse_csv( 3675 lambda: None 3676 if self._match(TokenType.ROLLUP, advance=False) 3677 else self._parse_conjunction() 3678 ) 3679 if expressions: 3680 elements["expressions"].extend(expressions) 3681 3682 grouping_sets = self._parse_grouping_sets() 3683 if grouping_sets: 3684 elements["grouping_sets"].extend(grouping_sets) 3685 3686 rollup = None 3687 cube = None 3688 totals = None 3689 3690 index = self._index 3691 with_ = self._match(TokenType.WITH) 3692 if self._match(TokenType.ROLLUP): 3693 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3694 elements["rollup"].extend(ensure_list(rollup)) 3695 3696 if self._match(TokenType.CUBE): 3697 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3698 elements["cube"].extend(ensure_list(cube)) 3699 3700 if self._match_text_seq("TOTALS"): 3701 totals = True 3702 elements["totals"] = True # type: ignore 3703 3704 if not (grouping_sets or rollup or cube or totals): 3705 if with_: 3706 self._retreat(index) 3707 break 3708 3709 return self.expression(exp.Group, **elements) # type: ignore 3710 3711 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3712 if not self._match(TokenType.GROUPING_SETS): 3713 return None 3714 3715 return self._parse_wrapped_csv(self._parse_grouping_set) 3716 3717 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3718 if self._match(TokenType.L_PAREN): 3719 grouping_set = self._parse_csv(self._parse_column) 3720 self._match_r_paren() 3721 return self.expression(exp.Tuple, expressions=grouping_set) 3722 3723 return self._parse_column() 3724 3725 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3726 if not skip_having_token and not self._match(TokenType.HAVING): 3727 return None 3728 return self.expression(exp.Having, this=self._parse_conjunction()) 3729 3730 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3731 if not self._match(TokenType.QUALIFY): 3732 return None 3733 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3734 3735 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3736 if skip_start_token: 3737 start = None 3738 elif self._match(TokenType.START_WITH): 3739 start = self._parse_conjunction() 3740 else: 3741 return None 3742 3743 self._match(TokenType.CONNECT_BY) 3744 nocycle = self._match_text_seq("NOCYCLE") 3745 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3746 exp.Prior, this=self._parse_bitwise() 3747 ) 3748 connect = self._parse_conjunction() 3749 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3750 3751 if not start and self._match(TokenType.START_WITH): 3752 start = self._parse_conjunction() 3753 3754 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3755 3756 def _parse_name_as_expression(self) -> exp.Alias: 3757 return self.expression( 3758 exp.Alias, 3759 alias=self._parse_id_var(any_token=True), 3760 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3761 ) 3762 3763 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3764 if self._match_text_seq("INTERPOLATE"): 3765 return self._parse_wrapped_csv(self._parse_name_as_expression) 3766 return None 3767 3768 def _parse_order( 3769 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3770 ) -> t.Optional[exp.Expression]: 3771 siblings = None 3772 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3773 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3774 return this 3775 3776 siblings = True 3777 3778 return self.expression( 3779 exp.Order, 3780 this=this, 3781 expressions=self._parse_csv(self._parse_ordered), 3782 interpolate=self._parse_interpolate(), 3783 siblings=siblings, 3784 ) 3785 3786 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3787 if not self._match(token): 3788 return None 3789 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3790 3791 def _parse_ordered( 3792 self, parse_method: t.Optional[t.Callable] = None 3793 ) -> t.Optional[exp.Ordered]: 3794 this = parse_method() if parse_method else self._parse_conjunction() 3795 if not this: 3796 return None 3797 3798 asc = self._match(TokenType.ASC) 3799 desc = self._match(TokenType.DESC) or (asc and False) 3800 3801 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3802 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3803 3804 nulls_first = is_nulls_first or False 3805 explicitly_null_ordered = is_nulls_first or is_nulls_last 3806 3807 if ( 3808 not explicitly_null_ordered 3809 and ( 3810 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3811 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3812 ) 3813 and self.dialect.NULL_ORDERING != "nulls_are_last" 3814 ): 3815 nulls_first = True 3816 3817 if self._match_text_seq("WITH", "FILL"): 3818 with_fill = self.expression( 3819 exp.WithFill, 3820 **{ # type: ignore 3821 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3822 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3823 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3824 }, 3825 ) 3826 else: 3827 with_fill = None 3828 3829 return self.expression( 3830 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3831 ) 3832 3833 def _parse_limit( 3834 self, 3835 this: t.Optional[exp.Expression] = None, 3836 top: bool = False, 3837 skip_limit_token: bool = False, 3838 ) -> t.Optional[exp.Expression]: 3839 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3840 comments = self._prev_comments 3841 if top: 3842 limit_paren = self._match(TokenType.L_PAREN) 3843 expression = self._parse_term() if limit_paren else self._parse_number() 3844 3845 if limit_paren: 3846 self._match_r_paren() 3847 else: 3848 expression = self._parse_term() 3849 3850 if self._match(TokenType.COMMA): 3851 offset = expression 3852 expression = self._parse_term() 3853 else: 3854 offset = None 3855 3856 limit_exp = self.expression( 3857 exp.Limit, 3858 this=this, 3859 expression=expression, 3860 offset=offset, 3861 comments=comments, 3862 expressions=self._parse_limit_by(), 3863 ) 3864 3865 return limit_exp 3866 3867 if self._match(TokenType.FETCH): 3868 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3869 direction = self._prev.text.upper() if direction else "FIRST" 3870 3871 count = self._parse_field(tokens=self.FETCH_TOKENS) 3872 percent = self._match(TokenType.PERCENT) 3873 3874 self._match_set((TokenType.ROW, TokenType.ROWS)) 3875 3876 only = self._match_text_seq("ONLY") 3877 with_ties = self._match_text_seq("WITH", "TIES") 3878 3879 if only and with_ties: 3880 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3881 3882 return self.expression( 3883 exp.Fetch, 3884 direction=direction, 3885 count=count, 3886 percent=percent, 3887 with_ties=with_ties, 3888 ) 3889 3890 return this 3891 3892 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3893 if not self._match(TokenType.OFFSET): 3894 return this 3895 3896 count = self._parse_term() 3897 self._match_set((TokenType.ROW, TokenType.ROWS)) 3898 3899 return self.expression( 3900 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3901 ) 3902 3903 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3904 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3905 3906 def _parse_locks(self) -> t.List[exp.Lock]: 3907 locks = [] 3908 while True: 3909 if self._match_text_seq("FOR", "UPDATE"): 3910 update = True 3911 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3912 "LOCK", "IN", "SHARE", "MODE" 3913 ): 3914 update = False 3915 else: 3916 break 3917 3918 expressions = None 3919 if self._match_text_seq("OF"): 3920 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3921 3922 wait: t.Optional[bool | exp.Expression] = None 3923 if self._match_text_seq("NOWAIT"): 3924 wait = True 3925 elif self._match_text_seq("WAIT"): 3926 wait = self._parse_primary() 3927 elif self._match_text_seq("SKIP", "LOCKED"): 3928 wait = False 3929 3930 locks.append( 3931 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3932 ) 3933 3934 return locks 3935 3936 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3937 while this and self._match_set(self.SET_OPERATIONS): 3938 token_type = self._prev.token_type 3939 3940 if token_type == TokenType.UNION: 3941 operation = exp.Union 3942 elif token_type == TokenType.EXCEPT: 3943 operation = exp.Except 3944 else: 3945 operation = exp.Intersect 3946 3947 comments = self._prev.comments 3948 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3949 by_name = self._match_text_seq("BY", "NAME") 3950 expression = self._parse_select(nested=True, parse_set_operation=False) 3951 3952 this = self.expression( 3953 operation, 3954 comments=comments, 3955 this=this, 3956 distinct=distinct, 3957 by_name=by_name, 3958 expression=expression, 3959 ) 3960 3961 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3962 expression = this.expression 3963 3964 if expression: 3965 for arg in self.UNION_MODIFIERS: 3966 expr = expression.args.get(arg) 3967 if expr: 3968 this.set(arg, expr.pop()) 3969 3970 return this 3971 3972 def _parse_expression(self) -> t.Optional[exp.Expression]: 3973 return self._parse_alias(self._parse_conjunction()) 3974 3975 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3976 this = self._parse_equality() 3977 3978 if self._match(TokenType.COLON_EQ): 3979 this = self.expression( 3980 exp.PropertyEQ, 3981 this=this, 3982 comments=self._prev_comments, 3983 expression=self._parse_conjunction(), 3984 ) 3985 3986 while self._match_set(self.CONJUNCTION): 3987 this = self.expression( 3988 self.CONJUNCTION[self._prev.token_type], 3989 this=this, 3990 comments=self._prev_comments, 3991 expression=self._parse_equality(), 3992 ) 3993 return this 3994 3995 def _parse_equality(self) -> t.Optional[exp.Expression]: 3996 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3997 3998 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3999 return self._parse_tokens(self._parse_range, self.COMPARISON) 4000 4001 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4002 this = this or self._parse_bitwise() 4003 negate = self._match(TokenType.NOT) 4004 4005 if self._match_set(self.RANGE_PARSERS): 4006 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4007 if not expression: 4008 return this 4009 4010 this = expression 4011 elif self._match(TokenType.ISNULL): 4012 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4013 4014 # Postgres supports ISNULL and NOTNULL for conditions. 4015 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4016 if self._match(TokenType.NOTNULL): 4017 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4018 this = self.expression(exp.Not, this=this) 4019 4020 if negate: 4021 this = self.expression(exp.Not, this=this) 4022 4023 if self._match(TokenType.IS): 4024 this = self._parse_is(this) 4025 4026 return this 4027 4028 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4029 index = self._index - 1 4030 negate = self._match(TokenType.NOT) 4031 4032 if self._match_text_seq("DISTINCT", "FROM"): 4033 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4034 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4035 4036 expression = self._parse_null() or self._parse_boolean() 4037 if not expression: 4038 self._retreat(index) 4039 return None 4040 4041 this = self.expression(exp.Is, this=this, expression=expression) 4042 return self.expression(exp.Not, this=this) if negate else this 4043 4044 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4045 unnest = self._parse_unnest(with_alias=False) 4046 if unnest: 4047 this = self.expression(exp.In, this=this, unnest=unnest) 4048 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4049 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4050 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4051 4052 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4053 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4054 else: 4055 this = self.expression(exp.In, this=this, expressions=expressions) 4056 4057 if matched_l_paren: 4058 self._match_r_paren(this) 4059 elif not self._match(TokenType.R_BRACKET, expression=this): 4060 self.raise_error("Expecting ]") 4061 else: 4062 this = self.expression(exp.In, this=this, field=self._parse_field()) 4063 4064 return this 4065 4066 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4067 low = self._parse_bitwise() 4068 self._match(TokenType.AND) 4069 high = self._parse_bitwise() 4070 return self.expression(exp.Between, this=this, low=low, high=high) 4071 4072 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4073 if not self._match(TokenType.ESCAPE): 4074 return this 4075 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4076 4077 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4078 index = self._index 4079 4080 if not self._match(TokenType.INTERVAL) and match_interval: 4081 return None 4082 4083 if self._match(TokenType.STRING, advance=False): 4084 this = self._parse_primary() 4085 else: 4086 this = self._parse_term() 4087 4088 if not this or ( 4089 isinstance(this, exp.Column) 4090 and not this.table 4091 and not this.this.quoted 4092 and this.name.upper() == "IS" 4093 ): 4094 self._retreat(index) 4095 return None 4096 4097 unit = self._parse_function() or ( 4098 not self._match(TokenType.ALIAS, advance=False) 4099 and self._parse_var(any_token=True, upper=True) 4100 ) 4101 4102 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4103 # each INTERVAL expression into this canonical form so it's easy to transpile 4104 if this and this.is_number: 4105 this = exp.Literal.string(this.name) 4106 elif this and this.is_string: 4107 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4108 if len(parts) == 1: 4109 if unit: 4110 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4111 self._retreat(self._index - 1) 4112 4113 this = exp.Literal.string(parts[0][0]) 4114 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4115 4116 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4117 unit = self.expression( 4118 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4119 ) 4120 4121 interval = self.expression(exp.Interval, this=this, unit=unit) 4122 4123 index = self._index 4124 self._match(TokenType.PLUS) 4125 4126 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4127 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4128 return self.expression( 4129 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4130 ) 4131 4132 self._retreat(index) 4133 return interval 4134 4135 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4136 this = self._parse_term() 4137 4138 while True: 4139 if self._match_set(self.BITWISE): 4140 this = self.expression( 4141 self.BITWISE[self._prev.token_type], 4142 this=this, 4143 expression=self._parse_term(), 4144 ) 4145 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4146 this = self.expression( 4147 exp.DPipe, 4148 this=this, 4149 expression=self._parse_term(), 4150 safe=not self.dialect.STRICT_STRING_CONCAT, 4151 ) 4152 elif self._match(TokenType.DQMARK): 4153 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4154 elif self._match_pair(TokenType.LT, TokenType.LT): 4155 this = self.expression( 4156 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4157 ) 4158 elif self._match_pair(TokenType.GT, TokenType.GT): 4159 this = self.expression( 4160 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4161 ) 4162 else: 4163 break 4164 4165 return this 4166 4167 def _parse_term(self) -> t.Optional[exp.Expression]: 4168 return self._parse_tokens(self._parse_factor, self.TERM) 4169 4170 def _parse_factor(self) -> t.Optional[exp.Expression]: 4171 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4172 this = parse_method() 4173 4174 while self._match_set(self.FACTOR): 4175 this = self.expression( 4176 self.FACTOR[self._prev.token_type], 4177 this=this, 4178 comments=self._prev_comments, 4179 expression=parse_method(), 4180 ) 4181 if isinstance(this, exp.Div): 4182 this.args["typed"] = self.dialect.TYPED_DIVISION 4183 this.args["safe"] = self.dialect.SAFE_DIVISION 4184 4185 return this 4186 4187 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4188 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4189 4190 def _parse_unary(self) -> t.Optional[exp.Expression]: 4191 if self._match_set(self.UNARY_PARSERS): 4192 return self.UNARY_PARSERS[self._prev.token_type](self) 4193 return self._parse_at_time_zone(self._parse_type()) 4194 4195 def _parse_type( 4196 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4197 ) -> t.Optional[exp.Expression]: 4198 interval = parse_interval and self._parse_interval() 4199 if interval: 4200 return interval 4201 4202 index = self._index 4203 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4204 4205 if data_type: 4206 index2 = self._index 4207 this = self._parse_primary() 4208 4209 if isinstance(this, exp.Literal): 4210 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4211 if parser: 4212 return parser(self, this, data_type) 4213 4214 return self.expression(exp.Cast, this=this, to=data_type) 4215 4216 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4217 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4218 # 4219 # If the index difference here is greater than 1, that means the parser itself must have 4220 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4221 # 4222 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4223 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4224 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4225 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4226 # 4227 # In these cases, we don't really want to return the converted type, but instead retreat 4228 # and try to parse a Column or Identifier in the section below. 4229 if data_type.expressions and index2 - index > 1: 4230 self._retreat(index2) 4231 return self._parse_column_ops(data_type) 4232 4233 self._retreat(index) 4234 4235 if fallback_to_identifier: 4236 return self._parse_id_var() 4237 4238 this = self._parse_column() 4239 return this and self._parse_column_ops(this) 4240 4241 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4242 this = self._parse_type() 4243 if not this: 4244 return None 4245 4246 if isinstance(this, exp.Column) and not this.table: 4247 this = exp.var(this.name.upper()) 4248 4249 return self.expression( 4250 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4251 ) 4252 4253 def _parse_types( 4254 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4255 ) -> t.Optional[exp.Expression]: 4256 index = self._index 4257 4258 this: t.Optional[exp.Expression] = None 4259 prefix = self._match_text_seq("SYSUDTLIB", ".") 4260 4261 if not self._match_set(self.TYPE_TOKENS): 4262 identifier = allow_identifiers and self._parse_id_var( 4263 any_token=False, tokens=(TokenType.VAR,) 4264 ) 4265 if identifier: 4266 tokens = self.dialect.tokenize(identifier.name) 4267 4268 if len(tokens) != 1: 4269 self.raise_error("Unexpected identifier", self._prev) 4270 4271 if tokens[0].token_type in self.TYPE_TOKENS: 4272 self._prev = tokens[0] 4273 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4274 type_name = identifier.name 4275 4276 while self._match(TokenType.DOT): 4277 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4278 4279 this = exp.DataType.build(type_name, udt=True) 4280 else: 4281 self._retreat(self._index - 1) 4282 return None 4283 else: 4284 return None 4285 4286 type_token = self._prev.token_type 4287 4288 if type_token == TokenType.PSEUDO_TYPE: 4289 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4290 4291 if type_token == TokenType.OBJECT_IDENTIFIER: 4292 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4293 4294 nested = type_token in self.NESTED_TYPE_TOKENS 4295 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4296 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4297 expressions = None 4298 maybe_func = False 4299 4300 if self._match(TokenType.L_PAREN): 4301 if is_struct: 4302 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4303 elif nested: 4304 expressions = self._parse_csv( 4305 lambda: self._parse_types( 4306 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4307 ) 4308 ) 4309 elif type_token in self.ENUM_TYPE_TOKENS: 4310 expressions = self._parse_csv(self._parse_equality) 4311 elif is_aggregate: 4312 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4313 any_token=False, tokens=(TokenType.VAR,) 4314 ) 4315 if not func_or_ident or not self._match(TokenType.COMMA): 4316 return None 4317 expressions = self._parse_csv( 4318 lambda: self._parse_types( 4319 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4320 ) 4321 ) 4322 expressions.insert(0, func_or_ident) 4323 else: 4324 expressions = self._parse_csv(self._parse_type_size) 4325 4326 if not expressions or not self._match(TokenType.R_PAREN): 4327 self._retreat(index) 4328 return None 4329 4330 maybe_func = True 4331 4332 values: t.Optional[t.List[exp.Expression]] = None 4333 4334 if nested and self._match(TokenType.LT): 4335 if is_struct: 4336 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4337 else: 4338 expressions = self._parse_csv( 4339 lambda: self._parse_types( 4340 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4341 ) 4342 ) 4343 4344 if not self._match(TokenType.GT): 4345 self.raise_error("Expecting >") 4346 4347 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4348 values = self._parse_csv(self._parse_conjunction) 4349 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4350 4351 if type_token in self.TIMESTAMPS: 4352 if self._match_text_seq("WITH", "TIME", "ZONE"): 4353 maybe_func = False 4354 tz_type = ( 4355 exp.DataType.Type.TIMETZ 4356 if type_token in self.TIMES 4357 else exp.DataType.Type.TIMESTAMPTZ 4358 ) 4359 this = exp.DataType(this=tz_type, expressions=expressions) 4360 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4361 maybe_func = False 4362 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4363 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4364 maybe_func = False 4365 elif type_token == TokenType.INTERVAL: 4366 unit = self._parse_var(upper=True) 4367 if unit: 4368 if self._match_text_seq("TO"): 4369 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4370 4371 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4372 else: 4373 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4374 4375 if maybe_func and check_func: 4376 index2 = self._index 4377 peek = self._parse_string() 4378 4379 if not peek: 4380 self._retreat(index) 4381 return None 4382 4383 self._retreat(index2) 4384 4385 if not this: 4386 if self._match_text_seq("UNSIGNED"): 4387 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4388 if not unsigned_type_token: 4389 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4390 4391 type_token = unsigned_type_token or type_token 4392 4393 this = exp.DataType( 4394 this=exp.DataType.Type[type_token.value], 4395 expressions=expressions, 4396 nested=nested, 4397 values=values, 4398 prefix=prefix, 4399 ) 4400 elif expressions: 4401 this.set("expressions", expressions) 4402 4403 index = self._index 4404 4405 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4406 matched_array = self._match(TokenType.ARRAY) 4407 4408 while self._curr: 4409 matched_l_bracket = self._match(TokenType.L_BRACKET) 4410 if not matched_l_bracket and not matched_array: 4411 break 4412 4413 matched_array = False 4414 values = self._parse_csv(self._parse_conjunction) or None 4415 if values and not schema: 4416 self._retreat(index) 4417 break 4418 4419 this = exp.DataType( 4420 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4421 ) 4422 self._match(TokenType.R_BRACKET) 4423 4424 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4425 converter = self.TYPE_CONVERTER.get(this.this) 4426 if converter: 4427 this = converter(t.cast(exp.DataType, this)) 4428 4429 return this 4430 4431 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4432 index = self._index 4433 this = ( 4434 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4435 or self._parse_id_var() 4436 ) 4437 self._match(TokenType.COLON) 4438 4439 if ( 4440 type_required 4441 and not isinstance(this, exp.DataType) 4442 and not self._match_set(self.TYPE_TOKENS, advance=False) 4443 ): 4444 self._retreat(index) 4445 return self._parse_types() 4446 4447 return self._parse_column_def(this) 4448 4449 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4450 if not self._match_text_seq("AT", "TIME", "ZONE"): 4451 return this 4452 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4453 4454 def _parse_column(self) -> t.Optional[exp.Expression]: 4455 this = self._parse_column_reference() 4456 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4457 4458 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4459 this = self._parse_field() 4460 if ( 4461 not this 4462 and self._match(TokenType.VALUES, advance=False) 4463 and self.VALUES_FOLLOWED_BY_PAREN 4464 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4465 ): 4466 this = self._parse_id_var() 4467 4468 if isinstance(this, exp.Identifier): 4469 # We bubble up comments from the Identifier to the Column 4470 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4471 4472 return this 4473 4474 def _parse_colon_as_json_extract( 4475 self, this: t.Optional[exp.Expression] 4476 ) -> t.Optional[exp.Expression]: 4477 casts = [] 4478 json_path = [] 4479 4480 while self._match(TokenType.COLON): 4481 start_index = self._index 4482 path = self._parse_column_ops(self._parse_field(any_token=True)) 4483 4484 # The cast :: operator has a lower precedence than the extraction operator :, so 4485 # we rearrange the AST appropriately to avoid casting the JSON path 4486 while isinstance(path, exp.Cast): 4487 casts.append(path.to) 4488 path = path.this 4489 4490 if casts: 4491 dcolon_offset = next( 4492 i 4493 for i, t in enumerate(self._tokens[start_index:]) 4494 if t.token_type == TokenType.DCOLON 4495 ) 4496 end_token = self._tokens[start_index + dcolon_offset - 1] 4497 else: 4498 end_token = self._prev 4499 4500 if path: 4501 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4502 4503 if json_path: 4504 this = self.expression( 4505 exp.JSONExtract, 4506 this=this, 4507 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4508 ) 4509 4510 while casts: 4511 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4512 4513 return this 4514 4515 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4516 this = self._parse_bracket(this) 4517 4518 while self._match_set(self.COLUMN_OPERATORS): 4519 op_token = self._prev.token_type 4520 op = self.COLUMN_OPERATORS.get(op_token) 4521 4522 if op_token == TokenType.DCOLON: 4523 field = self._parse_types() 4524 if not field: 4525 self.raise_error("Expected type") 4526 elif op and self._curr: 4527 field = self._parse_column_reference() 4528 else: 4529 field = self._parse_field(any_token=True, anonymous_func=True) 4530 4531 if isinstance(field, exp.Func) and this: 4532 # bigquery allows function calls like x.y.count(...) 4533 # SAFE.SUBSTR(...) 4534 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4535 this = exp.replace_tree( 4536 this, 4537 lambda n: ( 4538 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4539 if n.table 4540 else n.this 4541 ) 4542 if isinstance(n, exp.Column) 4543 else n, 4544 ) 4545 4546 if op: 4547 this = op(self, this, field) 4548 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4549 this = self.expression( 4550 exp.Column, 4551 this=field, 4552 table=this.this, 4553 db=this.args.get("table"), 4554 catalog=this.args.get("db"), 4555 ) 4556 else: 4557 this = self.expression(exp.Dot, this=this, expression=field) 4558 4559 this = self._parse_bracket(this) 4560 4561 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4562 4563 def _parse_primary(self) -> t.Optional[exp.Expression]: 4564 if self._match_set(self.PRIMARY_PARSERS): 4565 token_type = self._prev.token_type 4566 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4567 4568 if token_type == TokenType.STRING: 4569 expressions = [primary] 4570 while self._match(TokenType.STRING): 4571 expressions.append(exp.Literal.string(self._prev.text)) 4572 4573 if len(expressions) > 1: 4574 return self.expression(exp.Concat, expressions=expressions) 4575 4576 return primary 4577 4578 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4579 return exp.Literal.number(f"0.{self._prev.text}") 4580 4581 if self._match(TokenType.L_PAREN): 4582 comments = self._prev_comments 4583 query = self._parse_select() 4584 4585 if query: 4586 expressions = [query] 4587 else: 4588 expressions = self._parse_expressions() 4589 4590 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4591 4592 if not this and self._match(TokenType.R_PAREN, advance=False): 4593 this = self.expression(exp.Tuple) 4594 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4595 this = self._parse_subquery(this=this, parse_alias=False) 4596 elif isinstance(this, exp.Subquery): 4597 this = self._parse_subquery( 4598 this=self._parse_set_operations(this), parse_alias=False 4599 ) 4600 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4601 this = self.expression(exp.Tuple, expressions=expressions) 4602 else: 4603 this = self.expression(exp.Paren, this=this) 4604 4605 if this: 4606 this.add_comments(comments) 4607 4608 self._match_r_paren(expression=this) 4609 return this 4610 4611 return None 4612 4613 def _parse_field( 4614 self, 4615 any_token: bool = False, 4616 tokens: t.Optional[t.Collection[TokenType]] = None, 4617 anonymous_func: bool = False, 4618 ) -> t.Optional[exp.Expression]: 4619 if anonymous_func: 4620 field = ( 4621 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4622 or self._parse_primary() 4623 ) 4624 else: 4625 field = self._parse_primary() or self._parse_function( 4626 anonymous=anonymous_func, any_token=any_token 4627 ) 4628 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4629 4630 def _parse_function( 4631 self, 4632 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4633 anonymous: bool = False, 4634 optional_parens: bool = True, 4635 any_token: bool = False, 4636 ) -> t.Optional[exp.Expression]: 4637 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4638 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4639 fn_syntax = False 4640 if ( 4641 self._match(TokenType.L_BRACE, advance=False) 4642 and self._next 4643 and self._next.text.upper() == "FN" 4644 ): 4645 self._advance(2) 4646 fn_syntax = True 4647 4648 func = self._parse_function_call( 4649 functions=functions, 4650 anonymous=anonymous, 4651 optional_parens=optional_parens, 4652 any_token=any_token, 4653 ) 4654 4655 if fn_syntax: 4656 self._match(TokenType.R_BRACE) 4657 4658 return func 4659 4660 def _parse_function_call( 4661 self, 4662 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4663 anonymous: bool = False, 4664 optional_parens: bool = True, 4665 any_token: bool = False, 4666 ) -> t.Optional[exp.Expression]: 4667 if not self._curr: 4668 return None 4669 4670 comments = self._curr.comments 4671 token_type = self._curr.token_type 4672 this = self._curr.text 4673 upper = this.upper() 4674 4675 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4676 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4677 self._advance() 4678 return self._parse_window(parser(self)) 4679 4680 if not self._next or self._next.token_type != TokenType.L_PAREN: 4681 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4682 self._advance() 4683 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4684 4685 return None 4686 4687 if any_token: 4688 if token_type in self.RESERVED_TOKENS: 4689 return None 4690 elif token_type not in self.FUNC_TOKENS: 4691 return None 4692 4693 self._advance(2) 4694 4695 parser = self.FUNCTION_PARSERS.get(upper) 4696 if parser and not anonymous: 4697 this = parser(self) 4698 else: 4699 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4700 4701 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4702 this = self.expression(subquery_predicate, this=self._parse_select()) 4703 self._match_r_paren() 4704 return this 4705 4706 if functions is None: 4707 functions = self.FUNCTIONS 4708 4709 function = functions.get(upper) 4710 4711 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4712 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4713 4714 if alias: 4715 args = self._kv_to_prop_eq(args) 4716 4717 if function and not anonymous: 4718 if "dialect" in function.__code__.co_varnames: 4719 func = function(args, dialect=self.dialect) 4720 else: 4721 func = function(args) 4722 4723 func = self.validate_expression(func, args) 4724 if not self.dialect.NORMALIZE_FUNCTIONS: 4725 func.meta["name"] = this 4726 4727 this = func 4728 else: 4729 if token_type == TokenType.IDENTIFIER: 4730 this = exp.Identifier(this=this, quoted=True) 4731 this = self.expression(exp.Anonymous, this=this, expressions=args) 4732 4733 if isinstance(this, exp.Expression): 4734 this.add_comments(comments) 4735 4736 self._match_r_paren(this) 4737 return self._parse_window(this) 4738 4739 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4740 transformed = [] 4741 4742 for e in expressions: 4743 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4744 if isinstance(e, exp.Alias): 4745 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4746 4747 if not isinstance(e, exp.PropertyEQ): 4748 e = self.expression( 4749 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4750 ) 4751 4752 if isinstance(e.this, exp.Column): 4753 e.this.replace(e.this.this) 4754 4755 transformed.append(e) 4756 4757 return transformed 4758 4759 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4760 return self._parse_column_def(self._parse_id_var()) 4761 4762 def _parse_user_defined_function( 4763 self, kind: t.Optional[TokenType] = None 4764 ) -> t.Optional[exp.Expression]: 4765 this = self._parse_id_var() 4766 4767 while self._match(TokenType.DOT): 4768 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4769 4770 if not self._match(TokenType.L_PAREN): 4771 return this 4772 4773 expressions = self._parse_csv(self._parse_function_parameter) 4774 self._match_r_paren() 4775 return self.expression( 4776 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4777 ) 4778 4779 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4780 literal = self._parse_primary() 4781 if literal: 4782 return self.expression(exp.Introducer, this=token.text, expression=literal) 4783 4784 return self.expression(exp.Identifier, this=token.text) 4785 4786 def _parse_session_parameter(self) -> exp.SessionParameter: 4787 kind = None 4788 this = self._parse_id_var() or self._parse_primary() 4789 4790 if this and self._match(TokenType.DOT): 4791 kind = this.name 4792 this = self._parse_var() or self._parse_primary() 4793 4794 return self.expression(exp.SessionParameter, this=this, kind=kind) 4795 4796 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4797 return self._parse_id_var() 4798 4799 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4800 index = self._index 4801 4802 if self._match(TokenType.L_PAREN): 4803 expressions = t.cast( 4804 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4805 ) 4806 4807 if not self._match(TokenType.R_PAREN): 4808 self._retreat(index) 4809 else: 4810 expressions = [self._parse_lambda_arg()] 4811 4812 if self._match_set(self.LAMBDAS): 4813 return self.LAMBDAS[self._prev.token_type](self, expressions) 4814 4815 self._retreat(index) 4816 4817 this: t.Optional[exp.Expression] 4818 4819 if self._match(TokenType.DISTINCT): 4820 this = self.expression( 4821 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4822 ) 4823 else: 4824 this = self._parse_select_or_expression(alias=alias) 4825 4826 return self._parse_limit( 4827 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4828 ) 4829 4830 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4831 index = self._index 4832 if not self._match(TokenType.L_PAREN): 4833 return this 4834 4835 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4836 # expr can be of both types 4837 if self._match_set(self.SELECT_START_TOKENS): 4838 self._retreat(index) 4839 return this 4840 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4841 self._match_r_paren() 4842 return self.expression(exp.Schema, this=this, expressions=args) 4843 4844 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4845 return self._parse_column_def(self._parse_field(any_token=True)) 4846 4847 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4848 # column defs are not really columns, they're identifiers 4849 if isinstance(this, exp.Column): 4850 this = this.this 4851 4852 kind = self._parse_types(schema=True) 4853 4854 if self._match_text_seq("FOR", "ORDINALITY"): 4855 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4856 4857 constraints: t.List[exp.Expression] = [] 4858 4859 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4860 ("ALIAS", "MATERIALIZED") 4861 ): 4862 persisted = self._prev.text.upper() == "MATERIALIZED" 4863 constraints.append( 4864 self.expression( 4865 exp.ComputedColumnConstraint, 4866 this=self._parse_conjunction(), 4867 persisted=persisted or self._match_text_seq("PERSISTED"), 4868 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4869 ) 4870 ) 4871 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4872 self._match(TokenType.ALIAS) 4873 constraints.append( 4874 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4875 ) 4876 4877 while True: 4878 constraint = self._parse_column_constraint() 4879 if not constraint: 4880 break 4881 constraints.append(constraint) 4882 4883 if not kind and not constraints: 4884 return this 4885 4886 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4887 4888 def _parse_auto_increment( 4889 self, 4890 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4891 start = None 4892 increment = None 4893 4894 if self._match(TokenType.L_PAREN, advance=False): 4895 args = self._parse_wrapped_csv(self._parse_bitwise) 4896 start = seq_get(args, 0) 4897 increment = seq_get(args, 1) 4898 elif self._match_text_seq("START"): 4899 start = self._parse_bitwise() 4900 self._match_text_seq("INCREMENT") 4901 increment = self._parse_bitwise() 4902 4903 if start and increment: 4904 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4905 4906 return exp.AutoIncrementColumnConstraint() 4907 4908 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4909 if not self._match_text_seq("REFRESH"): 4910 self._retreat(self._index - 1) 4911 return None 4912 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4913 4914 def _parse_compress(self) -> exp.CompressColumnConstraint: 4915 if self._match(TokenType.L_PAREN, advance=False): 4916 return self.expression( 4917 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4918 ) 4919 4920 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4921 4922 def _parse_generated_as_identity( 4923 self, 4924 ) -> ( 4925 exp.GeneratedAsIdentityColumnConstraint 4926 | exp.ComputedColumnConstraint 4927 | exp.GeneratedAsRowColumnConstraint 4928 ): 4929 if self._match_text_seq("BY", "DEFAULT"): 4930 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4931 this = self.expression( 4932 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4933 ) 4934 else: 4935 self._match_text_seq("ALWAYS") 4936 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4937 4938 self._match(TokenType.ALIAS) 4939 4940 if self._match_text_seq("ROW"): 4941 start = self._match_text_seq("START") 4942 if not start: 4943 self._match(TokenType.END) 4944 hidden = self._match_text_seq("HIDDEN") 4945 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4946 4947 identity = self._match_text_seq("IDENTITY") 4948 4949 if self._match(TokenType.L_PAREN): 4950 if self._match(TokenType.START_WITH): 4951 this.set("start", self._parse_bitwise()) 4952 if self._match_text_seq("INCREMENT", "BY"): 4953 this.set("increment", self._parse_bitwise()) 4954 if self._match_text_seq("MINVALUE"): 4955 this.set("minvalue", self._parse_bitwise()) 4956 if self._match_text_seq("MAXVALUE"): 4957 this.set("maxvalue", self._parse_bitwise()) 4958 4959 if self._match_text_seq("CYCLE"): 4960 this.set("cycle", True) 4961 elif self._match_text_seq("NO", "CYCLE"): 4962 this.set("cycle", False) 4963 4964 if not identity: 4965 this.set("expression", self._parse_range()) 4966 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4967 args = self._parse_csv(self._parse_bitwise) 4968 this.set("start", seq_get(args, 0)) 4969 this.set("increment", seq_get(args, 1)) 4970 4971 self._match_r_paren() 4972 4973 return this 4974 4975 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4976 self._match_text_seq("LENGTH") 4977 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4978 4979 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4980 if self._match_text_seq("NULL"): 4981 return self.expression(exp.NotNullColumnConstraint) 4982 if self._match_text_seq("CASESPECIFIC"): 4983 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4984 if self._match_text_seq("FOR", "REPLICATION"): 4985 return self.expression(exp.NotForReplicationColumnConstraint) 4986 return None 4987 4988 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4989 if self._match(TokenType.CONSTRAINT): 4990 this = self._parse_id_var() 4991 else: 4992 this = None 4993 4994 if self._match_texts(self.CONSTRAINT_PARSERS): 4995 return self.expression( 4996 exp.ColumnConstraint, 4997 this=this, 4998 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4999 ) 5000 5001 return this 5002 5003 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5004 if not self._match(TokenType.CONSTRAINT): 5005 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5006 5007 return self.expression( 5008 exp.Constraint, 5009 this=self._parse_id_var(), 5010 expressions=self._parse_unnamed_constraints(), 5011 ) 5012 5013 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5014 constraints = [] 5015 while True: 5016 constraint = self._parse_unnamed_constraint() or self._parse_function() 5017 if not constraint: 5018 break 5019 constraints.append(constraint) 5020 5021 return constraints 5022 5023 def _parse_unnamed_constraint( 5024 self, constraints: t.Optional[t.Collection[str]] = None 5025 ) -> t.Optional[exp.Expression]: 5026 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5027 constraints or self.CONSTRAINT_PARSERS 5028 ): 5029 return None 5030 5031 constraint = self._prev.text.upper() 5032 if constraint not in self.CONSTRAINT_PARSERS: 5033 self.raise_error(f"No parser found for schema constraint {constraint}.") 5034 5035 return self.CONSTRAINT_PARSERS[constraint](self) 5036 5037 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5038 self._match_text_seq("KEY") 5039 return self.expression( 5040 exp.UniqueColumnConstraint, 5041 this=self._parse_schema(self._parse_id_var(any_token=False)), 5042 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5043 on_conflict=self._parse_on_conflict(), 5044 ) 5045 5046 def _parse_key_constraint_options(self) -> t.List[str]: 5047 options = [] 5048 while True: 5049 if not self._curr: 5050 break 5051 5052 if self._match(TokenType.ON): 5053 action = None 5054 on = self._advance_any() and self._prev.text 5055 5056 if self._match_text_seq("NO", "ACTION"): 5057 action = "NO ACTION" 5058 elif self._match_text_seq("CASCADE"): 5059 action = "CASCADE" 5060 elif self._match_text_seq("RESTRICT"): 5061 action = "RESTRICT" 5062 elif self._match_pair(TokenType.SET, TokenType.NULL): 5063 action = "SET NULL" 5064 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5065 action = "SET DEFAULT" 5066 else: 5067 self.raise_error("Invalid key constraint") 5068 5069 options.append(f"ON {on} {action}") 5070 elif self._match_text_seq("NOT", "ENFORCED"): 5071 options.append("NOT ENFORCED") 5072 elif self._match_text_seq("DEFERRABLE"): 5073 options.append("DEFERRABLE") 5074 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5075 options.append("INITIALLY DEFERRED") 5076 elif self._match_text_seq("NORELY"): 5077 options.append("NORELY") 5078 elif self._match_text_seq("MATCH", "FULL"): 5079 options.append("MATCH FULL") 5080 else: 5081 break 5082 5083 return options 5084 5085 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5086 if match and not self._match(TokenType.REFERENCES): 5087 return None 5088 5089 expressions = None 5090 this = self._parse_table(schema=True) 5091 options = self._parse_key_constraint_options() 5092 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5093 5094 def _parse_foreign_key(self) -> exp.ForeignKey: 5095 expressions = self._parse_wrapped_id_vars() 5096 reference = self._parse_references() 5097 options = {} 5098 5099 while self._match(TokenType.ON): 5100 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5101 self.raise_error("Expected DELETE or UPDATE") 5102 5103 kind = self._prev.text.lower() 5104 5105 if self._match_text_seq("NO", "ACTION"): 5106 action = "NO ACTION" 5107 elif self._match(TokenType.SET): 5108 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5109 action = "SET " + self._prev.text.upper() 5110 else: 5111 self._advance() 5112 action = self._prev.text.upper() 5113 5114 options[kind] = action 5115 5116 return self.expression( 5117 exp.ForeignKey, 5118 expressions=expressions, 5119 reference=reference, 5120 **options, # type: ignore 5121 ) 5122 5123 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5124 return self._parse_field() 5125 5126 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5127 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5128 self._retreat(self._index - 1) 5129 return None 5130 5131 id_vars = self._parse_wrapped_id_vars() 5132 return self.expression( 5133 exp.PeriodForSystemTimeConstraint, 5134 this=seq_get(id_vars, 0), 5135 expression=seq_get(id_vars, 1), 5136 ) 5137 5138 def _parse_primary_key( 5139 self, wrapped_optional: bool = False, in_props: bool = False 5140 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5141 desc = ( 5142 self._match_set((TokenType.ASC, TokenType.DESC)) 5143 and self._prev.token_type == TokenType.DESC 5144 ) 5145 5146 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5147 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5148 5149 expressions = self._parse_wrapped_csv( 5150 self._parse_primary_key_part, optional=wrapped_optional 5151 ) 5152 options = self._parse_key_constraint_options() 5153 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5154 5155 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5156 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 5157 5158 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5159 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5160 return this 5161 5162 bracket_kind = self._prev.token_type 5163 expressions = self._parse_csv( 5164 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5165 ) 5166 5167 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5168 self.raise_error("Expected ]") 5169 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5170 self.raise_error("Expected }") 5171 5172 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5173 if bracket_kind == TokenType.L_BRACE: 5174 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5175 elif not this or this.name.upper() == "ARRAY": 5176 this = self.expression(exp.Array, expressions=expressions) 5177 else: 5178 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5179 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5180 5181 self._add_comments(this) 5182 return self._parse_bracket(this) 5183 5184 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5185 if self._match(TokenType.COLON): 5186 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 5187 return this 5188 5189 def _parse_case(self) -> t.Optional[exp.Expression]: 5190 ifs = [] 5191 default = None 5192 5193 comments = self._prev_comments 5194 expression = self._parse_conjunction() 5195 5196 while self._match(TokenType.WHEN): 5197 this = self._parse_conjunction() 5198 self._match(TokenType.THEN) 5199 then = self._parse_conjunction() 5200 ifs.append(self.expression(exp.If, this=this, true=then)) 5201 5202 if self._match(TokenType.ELSE): 5203 default = self._parse_conjunction() 5204 5205 if not self._match(TokenType.END): 5206 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5207 default = exp.column("interval") 5208 else: 5209 self.raise_error("Expected END after CASE", self._prev) 5210 5211 return self.expression( 5212 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5213 ) 5214 5215 def _parse_if(self) -> t.Optional[exp.Expression]: 5216 if self._match(TokenType.L_PAREN): 5217 args = self._parse_csv(self._parse_conjunction) 5218 this = self.validate_expression(exp.If.from_arg_list(args), args) 5219 self._match_r_paren() 5220 else: 5221 index = self._index - 1 5222 5223 if self.NO_PAREN_IF_COMMANDS and index == 0: 5224 return self._parse_as_command(self._prev) 5225 5226 condition = self._parse_conjunction() 5227 5228 if not condition: 5229 self._retreat(index) 5230 return None 5231 5232 self._match(TokenType.THEN) 5233 true = self._parse_conjunction() 5234 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 5235 self._match(TokenType.END) 5236 this = self.expression(exp.If, this=condition, true=true, false=false) 5237 5238 return this 5239 5240 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5241 if not self._match_text_seq("VALUE", "FOR"): 5242 self._retreat(self._index - 1) 5243 return None 5244 5245 return self.expression( 5246 exp.NextValueFor, 5247 this=self._parse_column(), 5248 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5249 ) 5250 5251 def _parse_extract(self) -> exp.Extract: 5252 this = self._parse_function() or self._parse_var() or self._parse_type() 5253 5254 if self._match(TokenType.FROM): 5255 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5256 5257 if not self._match(TokenType.COMMA): 5258 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5259 5260 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5261 5262 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5263 this = self._parse_conjunction() 5264 5265 if not self._match(TokenType.ALIAS): 5266 if self._match(TokenType.COMMA): 5267 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5268 5269 self.raise_error("Expected AS after CAST") 5270 5271 fmt = None 5272 to = self._parse_types() 5273 5274 if self._match(TokenType.FORMAT): 5275 fmt_string = self._parse_string() 5276 fmt = self._parse_at_time_zone(fmt_string) 5277 5278 if not to: 5279 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5280 if to.this in exp.DataType.TEMPORAL_TYPES: 5281 this = self.expression( 5282 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5283 this=this, 5284 format=exp.Literal.string( 5285 format_time( 5286 fmt_string.this if fmt_string else "", 5287 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5288 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5289 ) 5290 ), 5291 ) 5292 5293 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5294 this.set("zone", fmt.args["zone"]) 5295 return this 5296 elif not to: 5297 self.raise_error("Expected TYPE after CAST") 5298 elif isinstance(to, exp.Identifier): 5299 to = exp.DataType.build(to.name, udt=True) 5300 elif to.this == exp.DataType.Type.CHAR: 5301 if self._match(TokenType.CHARACTER_SET): 5302 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5303 5304 return self.expression( 5305 exp.Cast if strict else exp.TryCast, 5306 this=this, 5307 to=to, 5308 format=fmt, 5309 safe=safe, 5310 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5311 ) 5312 5313 def _parse_string_agg(self) -> exp.Expression: 5314 if self._match(TokenType.DISTINCT): 5315 args: t.List[t.Optional[exp.Expression]] = [ 5316 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5317 ] 5318 if self._match(TokenType.COMMA): 5319 args.extend(self._parse_csv(self._parse_conjunction)) 5320 else: 5321 args = self._parse_csv(self._parse_conjunction) # type: ignore 5322 5323 index = self._index 5324 if not self._match(TokenType.R_PAREN) and args: 5325 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5326 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5327 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5328 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5329 5330 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5331 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5332 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5333 if not self._match_text_seq("WITHIN", "GROUP"): 5334 self._retreat(index) 5335 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5336 5337 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5338 order = self._parse_order(this=seq_get(args, 0)) 5339 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5340 5341 def _parse_convert( 5342 self, strict: bool, safe: t.Optional[bool] = None 5343 ) -> t.Optional[exp.Expression]: 5344 this = self._parse_bitwise() 5345 5346 if self._match(TokenType.USING): 5347 to: t.Optional[exp.Expression] = self.expression( 5348 exp.CharacterSet, this=self._parse_var() 5349 ) 5350 elif self._match(TokenType.COMMA): 5351 to = self._parse_types() 5352 else: 5353 to = None 5354 5355 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5356 5357 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5358 """ 5359 There are generally two variants of the DECODE function: 5360 5361 - DECODE(bin, charset) 5362 - DECODE(expression, search, result [, search, result] ... [, default]) 5363 5364 The second variant will always be parsed into a CASE expression. Note that NULL 5365 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5366 instead of relying on pattern matching. 5367 """ 5368 args = self._parse_csv(self._parse_conjunction) 5369 5370 if len(args) < 3: 5371 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5372 5373 expression, *expressions = args 5374 if not expression: 5375 return None 5376 5377 ifs = [] 5378 for search, result in zip(expressions[::2], expressions[1::2]): 5379 if not search or not result: 5380 return None 5381 5382 if isinstance(search, exp.Literal): 5383 ifs.append( 5384 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5385 ) 5386 elif isinstance(search, exp.Null): 5387 ifs.append( 5388 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5389 ) 5390 else: 5391 cond = exp.or_( 5392 exp.EQ(this=expression.copy(), expression=search), 5393 exp.and_( 5394 exp.Is(this=expression.copy(), expression=exp.Null()), 5395 exp.Is(this=search.copy(), expression=exp.Null()), 5396 copy=False, 5397 ), 5398 copy=False, 5399 ) 5400 ifs.append(exp.If(this=cond, true=result)) 5401 5402 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5403 5404 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5405 self._match_text_seq("KEY") 5406 key = self._parse_column() 5407 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5408 self._match_text_seq("VALUE") 5409 value = self._parse_bitwise() 5410 5411 if not key and not value: 5412 return None 5413 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5414 5415 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5416 if not this or not self._match_text_seq("FORMAT", "JSON"): 5417 return this 5418 5419 return self.expression(exp.FormatJson, this=this) 5420 5421 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5422 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5423 for value in values: 5424 if self._match_text_seq(value, "ON", on): 5425 return f"{value} ON {on}" 5426 5427 return None 5428 5429 @t.overload 5430 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5431 5432 @t.overload 5433 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5434 5435 def _parse_json_object(self, agg=False): 5436 star = self._parse_star() 5437 expressions = ( 5438 [star] 5439 if star 5440 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5441 ) 5442 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5443 5444 unique_keys = None 5445 if self._match_text_seq("WITH", "UNIQUE"): 5446 unique_keys = True 5447 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5448 unique_keys = False 5449 5450 self._match_text_seq("KEYS") 5451 5452 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5453 self._parse_type() 5454 ) 5455 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5456 5457 return self.expression( 5458 exp.JSONObjectAgg if agg else exp.JSONObject, 5459 expressions=expressions, 5460 null_handling=null_handling, 5461 unique_keys=unique_keys, 5462 return_type=return_type, 5463 encoding=encoding, 5464 ) 5465 5466 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5467 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5468 if not self._match_text_seq("NESTED"): 5469 this = self._parse_id_var() 5470 kind = self._parse_types(allow_identifiers=False) 5471 nested = None 5472 else: 5473 this = None 5474 kind = None 5475 nested = True 5476 5477 path = self._match_text_seq("PATH") and self._parse_string() 5478 nested_schema = nested and self._parse_json_schema() 5479 5480 return self.expression( 5481 exp.JSONColumnDef, 5482 this=this, 5483 kind=kind, 5484 path=path, 5485 nested_schema=nested_schema, 5486 ) 5487 5488 def _parse_json_schema(self) -> exp.JSONSchema: 5489 self._match_text_seq("COLUMNS") 5490 return self.expression( 5491 exp.JSONSchema, 5492 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5493 ) 5494 5495 def _parse_json_table(self) -> exp.JSONTable: 5496 this = self._parse_format_json(self._parse_bitwise()) 5497 path = self._match(TokenType.COMMA) and self._parse_string() 5498 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5499 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5500 schema = self._parse_json_schema() 5501 5502 return exp.JSONTable( 5503 this=this, 5504 schema=schema, 5505 path=path, 5506 error_handling=error_handling, 5507 empty_handling=empty_handling, 5508 ) 5509 5510 def _parse_match_against(self) -> exp.MatchAgainst: 5511 expressions = self._parse_csv(self._parse_column) 5512 5513 self._match_text_seq(")", "AGAINST", "(") 5514 5515 this = self._parse_string() 5516 5517 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5518 modifier = "IN NATURAL LANGUAGE MODE" 5519 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5520 modifier = f"{modifier} WITH QUERY EXPANSION" 5521 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5522 modifier = "IN BOOLEAN MODE" 5523 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5524 modifier = "WITH QUERY EXPANSION" 5525 else: 5526 modifier = None 5527 5528 return self.expression( 5529 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5530 ) 5531 5532 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5533 def _parse_open_json(self) -> exp.OpenJSON: 5534 this = self._parse_bitwise() 5535 path = self._match(TokenType.COMMA) and self._parse_string() 5536 5537 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5538 this = self._parse_field(any_token=True) 5539 kind = self._parse_types() 5540 path = self._parse_string() 5541 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5542 5543 return self.expression( 5544 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5545 ) 5546 5547 expressions = None 5548 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5549 self._match_l_paren() 5550 expressions = self._parse_csv(_parse_open_json_column_def) 5551 5552 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5553 5554 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5555 args = self._parse_csv(self._parse_bitwise) 5556 5557 if self._match(TokenType.IN): 5558 return self.expression( 5559 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5560 ) 5561 5562 if haystack_first: 5563 haystack = seq_get(args, 0) 5564 needle = seq_get(args, 1) 5565 else: 5566 needle = seq_get(args, 0) 5567 haystack = seq_get(args, 1) 5568 5569 return self.expression( 5570 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5571 ) 5572 5573 def _parse_predict(self) -> exp.Predict: 5574 self._match_text_seq("MODEL") 5575 this = self._parse_table() 5576 5577 self._match(TokenType.COMMA) 5578 self._match_text_seq("TABLE") 5579 5580 return self.expression( 5581 exp.Predict, 5582 this=this, 5583 expression=self._parse_table(), 5584 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5585 ) 5586 5587 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5588 args = self._parse_csv(self._parse_table) 5589 return exp.JoinHint(this=func_name.upper(), expressions=args) 5590 5591 def _parse_substring(self) -> exp.Substring: 5592 # Postgres supports the form: substring(string [from int] [for int]) 5593 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5594 5595 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5596 5597 if self._match(TokenType.FROM): 5598 args.append(self._parse_bitwise()) 5599 if self._match(TokenType.FOR): 5600 if len(args) == 1: 5601 args.append(exp.Literal.number(1)) 5602 args.append(self._parse_bitwise()) 5603 5604 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5605 5606 def _parse_trim(self) -> exp.Trim: 5607 # https://www.w3resource.com/sql/character-functions/trim.php 5608 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5609 5610 position = None 5611 collation = None 5612 expression = None 5613 5614 if self._match_texts(self.TRIM_TYPES): 5615 position = self._prev.text.upper() 5616 5617 this = self._parse_bitwise() 5618 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5619 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5620 expression = self._parse_bitwise() 5621 5622 if invert_order: 5623 this, expression = expression, this 5624 5625 if self._match(TokenType.COLLATE): 5626 collation = self._parse_bitwise() 5627 5628 return self.expression( 5629 exp.Trim, this=this, position=position, expression=expression, collation=collation 5630 ) 5631 5632 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5633 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5634 5635 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5636 return self._parse_window(self._parse_id_var(), alias=True) 5637 5638 def _parse_respect_or_ignore_nulls( 5639 self, this: t.Optional[exp.Expression] 5640 ) -> t.Optional[exp.Expression]: 5641 if self._match_text_seq("IGNORE", "NULLS"): 5642 return self.expression(exp.IgnoreNulls, this=this) 5643 if self._match_text_seq("RESPECT", "NULLS"): 5644 return self.expression(exp.RespectNulls, this=this) 5645 return this 5646 5647 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5648 if self._match(TokenType.HAVING): 5649 self._match_texts(("MAX", "MIN")) 5650 max = self._prev.text.upper() != "MIN" 5651 return self.expression( 5652 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5653 ) 5654 5655 return this 5656 5657 def _parse_window( 5658 self, this: t.Optional[exp.Expression], alias: bool = False 5659 ) -> t.Optional[exp.Expression]: 5660 func = this 5661 comments = func.comments if isinstance(func, exp.Expression) else None 5662 5663 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5664 self._match(TokenType.WHERE) 5665 this = self.expression( 5666 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5667 ) 5668 self._match_r_paren() 5669 5670 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5671 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5672 if self._match_text_seq("WITHIN", "GROUP"): 5673 order = self._parse_wrapped(self._parse_order) 5674 this = self.expression(exp.WithinGroup, this=this, expression=order) 5675 5676 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5677 # Some dialects choose to implement and some do not. 5678 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5679 5680 # There is some code above in _parse_lambda that handles 5681 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5682 5683 # The below changes handle 5684 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5685 5686 # Oracle allows both formats 5687 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5688 # and Snowflake chose to do the same for familiarity 5689 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5690 if isinstance(this, exp.AggFunc): 5691 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5692 5693 if ignore_respect and ignore_respect is not this: 5694 ignore_respect.replace(ignore_respect.this) 5695 this = self.expression(ignore_respect.__class__, this=this) 5696 5697 this = self._parse_respect_or_ignore_nulls(this) 5698 5699 # bigquery select from window x AS (partition by ...) 5700 if alias: 5701 over = None 5702 self._match(TokenType.ALIAS) 5703 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5704 return this 5705 else: 5706 over = self._prev.text.upper() 5707 5708 if comments and isinstance(func, exp.Expression): 5709 func.pop_comments() 5710 5711 if not self._match(TokenType.L_PAREN): 5712 return self.expression( 5713 exp.Window, 5714 comments=comments, 5715 this=this, 5716 alias=self._parse_id_var(False), 5717 over=over, 5718 ) 5719 5720 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5721 5722 first = self._match(TokenType.FIRST) 5723 if self._match_text_seq("LAST"): 5724 first = False 5725 5726 partition, order = self._parse_partition_and_order() 5727 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5728 5729 if kind: 5730 self._match(TokenType.BETWEEN) 5731 start = self._parse_window_spec() 5732 self._match(TokenType.AND) 5733 end = self._parse_window_spec() 5734 5735 spec = self.expression( 5736 exp.WindowSpec, 5737 kind=kind, 5738 start=start["value"], 5739 start_side=start["side"], 5740 end=end["value"], 5741 end_side=end["side"], 5742 ) 5743 else: 5744 spec = None 5745 5746 self._match_r_paren() 5747 5748 window = self.expression( 5749 exp.Window, 5750 comments=comments, 5751 this=this, 5752 partition_by=partition, 5753 order=order, 5754 spec=spec, 5755 alias=window_alias, 5756 over=over, 5757 first=first, 5758 ) 5759 5760 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5761 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5762 return self._parse_window(window, alias=alias) 5763 5764 return window 5765 5766 def _parse_partition_and_order( 5767 self, 5768 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5769 return self._parse_partition_by(), self._parse_order() 5770 5771 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5772 self._match(TokenType.BETWEEN) 5773 5774 return { 5775 "value": ( 5776 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5777 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5778 or self._parse_bitwise() 5779 ), 5780 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5781 } 5782 5783 def _parse_alias( 5784 self, this: t.Optional[exp.Expression], explicit: bool = False 5785 ) -> t.Optional[exp.Expression]: 5786 any_token = self._match(TokenType.ALIAS) 5787 comments = self._prev_comments or [] 5788 5789 if explicit and not any_token: 5790 return this 5791 5792 if self._match(TokenType.L_PAREN): 5793 aliases = self.expression( 5794 exp.Aliases, 5795 comments=comments, 5796 this=this, 5797 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5798 ) 5799 self._match_r_paren(aliases) 5800 return aliases 5801 5802 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5803 self.STRING_ALIASES and self._parse_string_as_identifier() 5804 ) 5805 5806 if alias: 5807 comments.extend(alias.pop_comments()) 5808 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5809 column = this.this 5810 5811 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5812 if not this.comments and column and column.comments: 5813 this.comments = column.pop_comments() 5814 5815 return this 5816 5817 def _parse_id_var( 5818 self, 5819 any_token: bool = True, 5820 tokens: t.Optional[t.Collection[TokenType]] = None, 5821 ) -> t.Optional[exp.Expression]: 5822 expression = self._parse_identifier() 5823 if not expression and ( 5824 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5825 ): 5826 quoted = self._prev.token_type == TokenType.STRING 5827 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5828 5829 return expression 5830 5831 def _parse_string(self) -> t.Optional[exp.Expression]: 5832 if self._match_set(self.STRING_PARSERS): 5833 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5834 return self._parse_placeholder() 5835 5836 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5837 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5838 5839 def _parse_number(self) -> t.Optional[exp.Expression]: 5840 if self._match_set(self.NUMERIC_PARSERS): 5841 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5842 return self._parse_placeholder() 5843 5844 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5845 if self._match(TokenType.IDENTIFIER): 5846 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5847 return self._parse_placeholder() 5848 5849 def _parse_var( 5850 self, 5851 any_token: bool = False, 5852 tokens: t.Optional[t.Collection[TokenType]] = None, 5853 upper: bool = False, 5854 ) -> t.Optional[exp.Expression]: 5855 if ( 5856 (any_token and self._advance_any()) 5857 or self._match(TokenType.VAR) 5858 or (self._match_set(tokens) if tokens else False) 5859 ): 5860 return self.expression( 5861 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5862 ) 5863 return self._parse_placeholder() 5864 5865 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5866 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5867 self._advance() 5868 return self._prev 5869 return None 5870 5871 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5872 return self._parse_var() or self._parse_string() 5873 5874 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5875 return self._parse_primary() or self._parse_var(any_token=True) 5876 5877 def _parse_null(self) -> t.Optional[exp.Expression]: 5878 if self._match_set(self.NULL_TOKENS): 5879 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5880 return self._parse_placeholder() 5881 5882 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5883 if self._match(TokenType.TRUE): 5884 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5885 if self._match(TokenType.FALSE): 5886 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5887 return self._parse_placeholder() 5888 5889 def _parse_star(self) -> t.Optional[exp.Expression]: 5890 if self._match(TokenType.STAR): 5891 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5892 return self._parse_placeholder() 5893 5894 def _parse_parameter(self) -> exp.Parameter: 5895 this = self._parse_identifier() or self._parse_primary_or_var() 5896 return self.expression(exp.Parameter, this=this) 5897 5898 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5899 if self._match_set(self.PLACEHOLDER_PARSERS): 5900 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5901 if placeholder: 5902 return placeholder 5903 self._advance(-1) 5904 return None 5905 5906 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5907 if not self._match_texts(keywords): 5908 return None 5909 if self._match(TokenType.L_PAREN, advance=False): 5910 return self._parse_wrapped_csv(self._parse_expression) 5911 5912 expression = self._parse_expression() 5913 return [expression] if expression else None 5914 5915 def _parse_csv( 5916 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5917 ) -> t.List[exp.Expression]: 5918 parse_result = parse_method() 5919 items = [parse_result] if parse_result is not None else [] 5920 5921 while self._match(sep): 5922 self._add_comments(parse_result) 5923 parse_result = parse_method() 5924 if parse_result is not None: 5925 items.append(parse_result) 5926 5927 return items 5928 5929 def _parse_tokens( 5930 self, parse_method: t.Callable, expressions: t.Dict 5931 ) -> t.Optional[exp.Expression]: 5932 this = parse_method() 5933 5934 while self._match_set(expressions): 5935 this = self.expression( 5936 expressions[self._prev.token_type], 5937 this=this, 5938 comments=self._prev_comments, 5939 expression=parse_method(), 5940 ) 5941 5942 return this 5943 5944 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5945 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5946 5947 def _parse_wrapped_csv( 5948 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5949 ) -> t.List[exp.Expression]: 5950 return self._parse_wrapped( 5951 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5952 ) 5953 5954 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5955 wrapped = self._match(TokenType.L_PAREN) 5956 if not wrapped and not optional: 5957 self.raise_error("Expecting (") 5958 parse_result = parse_method() 5959 if wrapped: 5960 self._match_r_paren() 5961 return parse_result 5962 5963 def _parse_expressions(self) -> t.List[exp.Expression]: 5964 return self._parse_csv(self._parse_expression) 5965 5966 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5967 return self._parse_select() or self._parse_set_operations( 5968 self._parse_expression() if alias else self._parse_conjunction() 5969 ) 5970 5971 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5972 return self._parse_query_modifiers( 5973 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5974 ) 5975 5976 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5977 this = None 5978 if self._match_texts(self.TRANSACTION_KIND): 5979 this = self._prev.text 5980 5981 self._match_texts(("TRANSACTION", "WORK")) 5982 5983 modes = [] 5984 while True: 5985 mode = [] 5986 while self._match(TokenType.VAR): 5987 mode.append(self._prev.text) 5988 5989 if mode: 5990 modes.append(" ".join(mode)) 5991 if not self._match(TokenType.COMMA): 5992 break 5993 5994 return self.expression(exp.Transaction, this=this, modes=modes) 5995 5996 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5997 chain = None 5998 savepoint = None 5999 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6000 6001 self._match_texts(("TRANSACTION", "WORK")) 6002 6003 if self._match_text_seq("TO"): 6004 self._match_text_seq("SAVEPOINT") 6005 savepoint = self._parse_id_var() 6006 6007 if self._match(TokenType.AND): 6008 chain = not self._match_text_seq("NO") 6009 self._match_text_seq("CHAIN") 6010 6011 if is_rollback: 6012 return self.expression(exp.Rollback, savepoint=savepoint) 6013 6014 return self.expression(exp.Commit, chain=chain) 6015 6016 def _parse_refresh(self) -> exp.Refresh: 6017 self._match(TokenType.TABLE) 6018 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6019 6020 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6021 if not self._match_text_seq("ADD"): 6022 return None 6023 6024 self._match(TokenType.COLUMN) 6025 exists_column = self._parse_exists(not_=True) 6026 expression = self._parse_field_def() 6027 6028 if expression: 6029 expression.set("exists", exists_column) 6030 6031 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6032 if self._match_texts(("FIRST", "AFTER")): 6033 position = self._prev.text 6034 column_position = self.expression( 6035 exp.ColumnPosition, this=self._parse_column(), position=position 6036 ) 6037 expression.set("position", column_position) 6038 6039 return expression 6040 6041 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6042 drop = self._match(TokenType.DROP) and self._parse_drop() 6043 if drop and not isinstance(drop, exp.Command): 6044 drop.set("kind", drop.args.get("kind", "COLUMN")) 6045 return drop 6046 6047 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6048 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6049 return self.expression( 6050 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6051 ) 6052 6053 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6054 index = self._index - 1 6055 6056 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6057 return self._parse_csv( 6058 lambda: self.expression( 6059 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6060 ) 6061 ) 6062 6063 self._retreat(index) 6064 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6065 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6066 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6067 6068 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6069 if self._match_texts(self.ALTER_ALTER_PARSERS): 6070 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6071 6072 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6073 # keyword after ALTER we default to parsing this statement 6074 self._match(TokenType.COLUMN) 6075 column = self._parse_field(any_token=True) 6076 6077 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6078 return self.expression(exp.AlterColumn, this=column, drop=True) 6079 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6080 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 6081 if self._match(TokenType.COMMENT): 6082 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6083 if self._match_text_seq("DROP", "NOT", "NULL"): 6084 return self.expression( 6085 exp.AlterColumn, 6086 this=column, 6087 drop=True, 6088 allow_null=True, 6089 ) 6090 if self._match_text_seq("SET", "NOT", "NULL"): 6091 return self.expression( 6092 exp.AlterColumn, 6093 this=column, 6094 allow_null=False, 6095 ) 6096 self._match_text_seq("SET", "DATA") 6097 self._match_text_seq("TYPE") 6098 return self.expression( 6099 exp.AlterColumn, 6100 this=column, 6101 dtype=self._parse_types(), 6102 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6103 using=self._match(TokenType.USING) and self._parse_conjunction(), 6104 ) 6105 6106 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6107 if self._match_texts(("ALL", "EVEN", "AUTO")): 6108 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6109 6110 self._match_text_seq("KEY", "DISTKEY") 6111 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6112 6113 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6114 if compound: 6115 self._match_text_seq("SORTKEY") 6116 6117 if self._match(TokenType.L_PAREN, advance=False): 6118 return self.expression( 6119 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6120 ) 6121 6122 self._match_texts(("AUTO", "NONE")) 6123 return self.expression( 6124 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6125 ) 6126 6127 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6128 index = self._index - 1 6129 6130 partition_exists = self._parse_exists() 6131 if self._match(TokenType.PARTITION, advance=False): 6132 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6133 6134 self._retreat(index) 6135 return self._parse_csv(self._parse_drop_column) 6136 6137 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6138 if self._match(TokenType.COLUMN): 6139 exists = self._parse_exists() 6140 old_column = self._parse_column() 6141 to = self._match_text_seq("TO") 6142 new_column = self._parse_column() 6143 6144 if old_column is None or to is None or new_column is None: 6145 return None 6146 6147 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6148 6149 self._match_text_seq("TO") 6150 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6151 6152 def _parse_alter_table_set(self) -> exp.AlterSet: 6153 alter_set = self.expression(exp.AlterSet) 6154 6155 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6156 "TABLE", "PROPERTIES" 6157 ): 6158 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_conjunction)) 6159 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6160 alter_set.set("expressions", [self._parse_conjunction()]) 6161 elif self._match_texts(("LOGGED", "UNLOGGED")): 6162 alter_set.set("option", exp.var(self._prev.text.upper())) 6163 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6164 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6165 elif self._match_text_seq("LOCATION"): 6166 alter_set.set("location", self._parse_field()) 6167 elif self._match_text_seq("ACCESS", "METHOD"): 6168 alter_set.set("access_method", self._parse_field()) 6169 elif self._match_text_seq("TABLESPACE"): 6170 alter_set.set("tablespace", self._parse_field()) 6171 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6172 alter_set.set("file_format", [self._parse_field()]) 6173 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6174 alter_set.set("file_format", self._parse_wrapped_options()) 6175 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6176 alter_set.set("copy_options", self._parse_wrapped_options()) 6177 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6178 alter_set.set("tag", self._parse_csv(self._parse_conjunction)) 6179 else: 6180 if self._match_text_seq("SERDE"): 6181 alter_set.set("serde", self._parse_field()) 6182 6183 alter_set.set("expressions", [self._parse_properties()]) 6184 6185 return alter_set 6186 6187 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6188 start = self._prev 6189 6190 if not self._match(TokenType.TABLE): 6191 return self._parse_as_command(start) 6192 6193 exists = self._parse_exists() 6194 only = self._match_text_seq("ONLY") 6195 this = self._parse_table(schema=True) 6196 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6197 6198 if self._next: 6199 self._advance() 6200 6201 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6202 if parser: 6203 actions = ensure_list(parser(self)) 6204 options = self._parse_csv(self._parse_property) 6205 6206 if not self._curr and actions: 6207 return self.expression( 6208 exp.AlterTable, 6209 this=this, 6210 exists=exists, 6211 actions=actions, 6212 only=only, 6213 options=options, 6214 cluster=cluster, 6215 ) 6216 6217 return self._parse_as_command(start) 6218 6219 def _parse_merge(self) -> exp.Merge: 6220 self._match(TokenType.INTO) 6221 target = self._parse_table() 6222 6223 if target and self._match(TokenType.ALIAS, advance=False): 6224 target.set("alias", self._parse_table_alias()) 6225 6226 self._match(TokenType.USING) 6227 using = self._parse_table() 6228 6229 self._match(TokenType.ON) 6230 on = self._parse_conjunction() 6231 6232 return self.expression( 6233 exp.Merge, 6234 this=target, 6235 using=using, 6236 on=on, 6237 expressions=self._parse_when_matched(), 6238 ) 6239 6240 def _parse_when_matched(self) -> t.List[exp.When]: 6241 whens = [] 6242 6243 while self._match(TokenType.WHEN): 6244 matched = not self._match(TokenType.NOT) 6245 self._match_text_seq("MATCHED") 6246 source = ( 6247 False 6248 if self._match_text_seq("BY", "TARGET") 6249 else self._match_text_seq("BY", "SOURCE") 6250 ) 6251 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 6252 6253 self._match(TokenType.THEN) 6254 6255 if self._match(TokenType.INSERT): 6256 _this = self._parse_star() 6257 if _this: 6258 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6259 else: 6260 then = self.expression( 6261 exp.Insert, 6262 this=self._parse_value(), 6263 expression=self._match_text_seq("VALUES") and self._parse_value(), 6264 ) 6265 elif self._match(TokenType.UPDATE): 6266 expressions = self._parse_star() 6267 if expressions: 6268 then = self.expression(exp.Update, expressions=expressions) 6269 else: 6270 then = self.expression( 6271 exp.Update, 6272 expressions=self._match(TokenType.SET) 6273 and self._parse_csv(self._parse_equality), 6274 ) 6275 elif self._match(TokenType.DELETE): 6276 then = self.expression(exp.Var, this=self._prev.text) 6277 else: 6278 then = None 6279 6280 whens.append( 6281 self.expression( 6282 exp.When, 6283 matched=matched, 6284 source=source, 6285 condition=condition, 6286 then=then, 6287 ) 6288 ) 6289 return whens 6290 6291 def _parse_show(self) -> t.Optional[exp.Expression]: 6292 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6293 if parser: 6294 return parser(self) 6295 return self._parse_as_command(self._prev) 6296 6297 def _parse_set_item_assignment( 6298 self, kind: t.Optional[str] = None 6299 ) -> t.Optional[exp.Expression]: 6300 index = self._index 6301 6302 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6303 return self._parse_set_transaction(global_=kind == "GLOBAL") 6304 6305 left = self._parse_primary() or self._parse_column() 6306 assignment_delimiter = self._match_texts(("=", "TO")) 6307 6308 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6309 self._retreat(index) 6310 return None 6311 6312 right = self._parse_statement() or self._parse_id_var() 6313 if isinstance(right, (exp.Column, exp.Identifier)): 6314 right = exp.var(right.name) 6315 6316 this = self.expression(exp.EQ, this=left, expression=right) 6317 return self.expression(exp.SetItem, this=this, kind=kind) 6318 6319 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6320 self._match_text_seq("TRANSACTION") 6321 characteristics = self._parse_csv( 6322 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6323 ) 6324 return self.expression( 6325 exp.SetItem, 6326 expressions=characteristics, 6327 kind="TRANSACTION", 6328 **{"global": global_}, # type: ignore 6329 ) 6330 6331 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6332 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6333 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6334 6335 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6336 index = self._index 6337 set_ = self.expression( 6338 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6339 ) 6340 6341 if self._curr: 6342 self._retreat(index) 6343 return self._parse_as_command(self._prev) 6344 6345 return set_ 6346 6347 def _parse_var_from_options( 6348 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6349 ) -> t.Optional[exp.Var]: 6350 start = self._curr 6351 if not start: 6352 return None 6353 6354 option = start.text.upper() 6355 continuations = options.get(option) 6356 6357 index = self._index 6358 self._advance() 6359 for keywords in continuations or []: 6360 if isinstance(keywords, str): 6361 keywords = (keywords,) 6362 6363 if self._match_text_seq(*keywords): 6364 option = f"{option} {' '.join(keywords)}" 6365 break 6366 else: 6367 if continuations or continuations is None: 6368 if raise_unmatched: 6369 self.raise_error(f"Unknown option {option}") 6370 6371 self._retreat(index) 6372 return None 6373 6374 return exp.var(option) 6375 6376 def _parse_as_command(self, start: Token) -> exp.Command: 6377 while self._curr: 6378 self._advance() 6379 text = self._find_sql(start, self._prev) 6380 size = len(start.text) 6381 self._warn_unsupported() 6382 return exp.Command(this=text[:size], expression=text[size:]) 6383 6384 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6385 settings = [] 6386 6387 self._match_l_paren() 6388 kind = self._parse_id_var() 6389 6390 if self._match(TokenType.L_PAREN): 6391 while True: 6392 key = self._parse_id_var() 6393 value = self._parse_primary() 6394 6395 if not key and value is None: 6396 break 6397 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6398 self._match(TokenType.R_PAREN) 6399 6400 self._match_r_paren() 6401 6402 return self.expression( 6403 exp.DictProperty, 6404 this=this, 6405 kind=kind.this if kind else None, 6406 settings=settings, 6407 ) 6408 6409 def _parse_dict_range(self, this: str) -> exp.DictRange: 6410 self._match_l_paren() 6411 has_min = self._match_text_seq("MIN") 6412 if has_min: 6413 min = self._parse_var() or self._parse_primary() 6414 self._match_text_seq("MAX") 6415 max = self._parse_var() or self._parse_primary() 6416 else: 6417 max = self._parse_var() or self._parse_primary() 6418 min = exp.Literal.number(0) 6419 self._match_r_paren() 6420 return self.expression(exp.DictRange, this=this, min=min, max=max) 6421 6422 def _parse_comprehension( 6423 self, this: t.Optional[exp.Expression] 6424 ) -> t.Optional[exp.Comprehension]: 6425 index = self._index 6426 expression = self._parse_column() 6427 if not self._match(TokenType.IN): 6428 self._retreat(index - 1) 6429 return None 6430 iterator = self._parse_column() 6431 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6432 return self.expression( 6433 exp.Comprehension, 6434 this=this, 6435 expression=expression, 6436 iterator=iterator, 6437 condition=condition, 6438 ) 6439 6440 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6441 if self._match(TokenType.HEREDOC_STRING): 6442 return self.expression(exp.Heredoc, this=self._prev.text) 6443 6444 if not self._match_text_seq("$"): 6445 return None 6446 6447 tags = ["$"] 6448 tag_text = None 6449 6450 if self._is_connected(): 6451 self._advance() 6452 tags.append(self._prev.text.upper()) 6453 else: 6454 self.raise_error("No closing $ found") 6455 6456 if tags[-1] != "$": 6457 if self._is_connected() and self._match_text_seq("$"): 6458 tag_text = tags[-1] 6459 tags.append("$") 6460 else: 6461 self.raise_error("No closing $ found") 6462 6463 heredoc_start = self._curr 6464 6465 while self._curr: 6466 if self._match_text_seq(*tags, advance=False): 6467 this = self._find_sql(heredoc_start, self._prev) 6468 self._advance(len(tags)) 6469 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6470 6471 self._advance() 6472 6473 self.raise_error(f"No closing {''.join(tags)} found") 6474 return None 6475 6476 def _find_parser( 6477 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6478 ) -> t.Optional[t.Callable]: 6479 if not self._curr: 6480 return None 6481 6482 index = self._index 6483 this = [] 6484 while True: 6485 # The current token might be multiple words 6486 curr = self._curr.text.upper() 6487 key = curr.split(" ") 6488 this.append(curr) 6489 6490 self._advance() 6491 result, trie = in_trie(trie, key) 6492 if result == TrieResult.FAILED: 6493 break 6494 6495 if result == TrieResult.EXISTS: 6496 subparser = parsers[" ".join(this)] 6497 return subparser 6498 6499 self._retreat(index) 6500 return None 6501 6502 def _match(self, token_type, advance=True, expression=None): 6503 if not self._curr: 6504 return None 6505 6506 if self._curr.token_type == token_type: 6507 if advance: 6508 self._advance() 6509 self._add_comments(expression) 6510 return True 6511 6512 return None 6513 6514 def _match_set(self, types, advance=True): 6515 if not self._curr: 6516 return None 6517 6518 if self._curr.token_type in types: 6519 if advance: 6520 self._advance() 6521 return True 6522 6523 return None 6524 6525 def _match_pair(self, token_type_a, token_type_b, advance=True): 6526 if not self._curr or not self._next: 6527 return None 6528 6529 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6530 if advance: 6531 self._advance(2) 6532 return True 6533 6534 return None 6535 6536 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6537 if not self._match(TokenType.L_PAREN, expression=expression): 6538 self.raise_error("Expecting (") 6539 6540 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6541 if not self._match(TokenType.R_PAREN, expression=expression): 6542 self.raise_error("Expecting )") 6543 6544 def _match_texts(self, texts, advance=True): 6545 if self._curr and self._curr.text.upper() in texts: 6546 if advance: 6547 self._advance() 6548 return True 6549 return None 6550 6551 def _match_text_seq(self, *texts, advance=True): 6552 index = self._index 6553 for text in texts: 6554 if self._curr and self._curr.text.upper() == text: 6555 self._advance() 6556 else: 6557 self._retreat(index) 6558 return None 6559 6560 if not advance: 6561 self._retreat(index) 6562 6563 return True 6564 6565 def _replace_lambda( 6566 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6567 ) -> t.Optional[exp.Expression]: 6568 if not node: 6569 return node 6570 6571 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6572 6573 for column in node.find_all(exp.Column): 6574 typ = lambda_types.get(column.parts[0].name) 6575 if typ is not None: 6576 dot_or_id = column.to_dot() if column.table else column.this 6577 6578 if typ: 6579 dot_or_id = self.expression( 6580 exp.Cast, 6581 this=dot_or_id, 6582 to=typ, 6583 ) 6584 6585 parent = column.parent 6586 6587 while isinstance(parent, exp.Dot): 6588 if not isinstance(parent.parent, exp.Dot): 6589 parent.replace(dot_or_id) 6590 break 6591 parent = parent.parent 6592 else: 6593 if column is node: 6594 node = dot_or_id 6595 else: 6596 column.replace(dot_or_id) 6597 return node 6598 6599 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6600 start = self._prev 6601 6602 # Not to be confused with TRUNCATE(number, decimals) function call 6603 if self._match(TokenType.L_PAREN): 6604 self._retreat(self._index - 2) 6605 return self._parse_function() 6606 6607 # Clickhouse supports TRUNCATE DATABASE as well 6608 is_database = self._match(TokenType.DATABASE) 6609 6610 self._match(TokenType.TABLE) 6611 6612 exists = self._parse_exists(not_=False) 6613 6614 expressions = self._parse_csv( 6615 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6616 ) 6617 6618 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6619 6620 if self._match_text_seq("RESTART", "IDENTITY"): 6621 identity = "RESTART" 6622 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6623 identity = "CONTINUE" 6624 else: 6625 identity = None 6626 6627 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6628 option = self._prev.text 6629 else: 6630 option = None 6631 6632 partition = self._parse_partition() 6633 6634 # Fallback case 6635 if self._curr: 6636 return self._parse_as_command(start) 6637 6638 return self.expression( 6639 exp.TruncateTable, 6640 expressions=expressions, 6641 is_database=is_database, 6642 exists=exists, 6643 cluster=cluster, 6644 identity=identity, 6645 option=option, 6646 partition=partition, 6647 ) 6648 6649 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6650 this = self._parse_ordered(self._parse_opclass) 6651 6652 if not self._match(TokenType.WITH): 6653 return this 6654 6655 op = self._parse_var(any_token=True) 6656 6657 return self.expression(exp.WithOperator, this=this, op=op) 6658 6659 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6660 self._match(TokenType.EQ) 6661 self._match(TokenType.L_PAREN) 6662 6663 opts: t.List[t.Optional[exp.Expression]] = [] 6664 while self._curr and not self._match(TokenType.R_PAREN): 6665 if self._match_text_seq("FORMAT_NAME", "="): 6666 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6667 # so we parse it separately to use _parse_field() 6668 prop = self.expression( 6669 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6670 ) 6671 opts.append(prop) 6672 else: 6673 opts.append(self._parse_property()) 6674 6675 self._match(TokenType.COMMA) 6676 6677 return opts 6678 6679 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6680 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6681 6682 options = [] 6683 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6684 option = self._parse_var(any_token=True) 6685 prev = self._prev.text.upper() 6686 6687 # Different dialects might separate options and values by white space, "=" and "AS" 6688 self._match(TokenType.EQ) 6689 self._match(TokenType.ALIAS) 6690 6691 param = self.expression(exp.CopyParameter, this=option) 6692 6693 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6694 TokenType.L_PAREN, advance=False 6695 ): 6696 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6697 param.set("expressions", self._parse_wrapped_options()) 6698 elif prev == "FILE_FORMAT": 6699 # T-SQL's external file format case 6700 param.set("expression", self._parse_field()) 6701 else: 6702 param.set("expression", self._parse_unquoted_field()) 6703 6704 options.append(param) 6705 self._match(sep) 6706 6707 return options 6708 6709 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6710 expr = self.expression(exp.Credentials) 6711 6712 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6713 expr.set("storage", self._parse_field()) 6714 if self._match_text_seq("CREDENTIALS"): 6715 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6716 creds = ( 6717 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6718 ) 6719 expr.set("credentials", creds) 6720 if self._match_text_seq("ENCRYPTION"): 6721 expr.set("encryption", self._parse_wrapped_options()) 6722 if self._match_text_seq("IAM_ROLE"): 6723 expr.set("iam_role", self._parse_field()) 6724 if self._match_text_seq("REGION"): 6725 expr.set("region", self._parse_field()) 6726 6727 return expr 6728 6729 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6730 return self._parse_field() 6731 6732 def _parse_copy(self) -> exp.Copy | exp.Command: 6733 start = self._prev 6734 6735 self._match(TokenType.INTO) 6736 6737 this = ( 6738 self._parse_select(nested=True, parse_subquery_alias=False) 6739 if self._match(TokenType.L_PAREN, advance=False) 6740 else self._parse_table(schema=True) 6741 ) 6742 6743 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6744 6745 files = self._parse_csv(self._parse_file_location) 6746 credentials = self._parse_credentials() 6747 6748 self._match_text_seq("WITH") 6749 6750 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6751 6752 # Fallback case 6753 if self._curr: 6754 return self._parse_as_command(start) 6755 6756 return self.expression( 6757 exp.Copy, 6758 this=this, 6759 kind=kind, 6760 credentials=credentials, 6761 files=files, 6762 params=params, 6763 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
122class Parser(metaclass=_Parser): 123 """ 124 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 125 126 Args: 127 error_level: The desired error level. 128 Default: ErrorLevel.IMMEDIATE 129 error_message_context: The amount of context to capture from a query string when displaying 130 the error message (in number of characters). 131 Default: 100 132 max_errors: Maximum number of error messages to include in a raised ParseError. 133 This is only relevant if error_level is ErrorLevel.RAISE. 134 Default: 3 135 """ 136 137 FUNCTIONS: t.Dict[str, t.Callable] = { 138 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 139 "CONCAT": lambda args, dialect: exp.Concat( 140 expressions=args, 141 safe=not dialect.STRICT_STRING_CONCAT, 142 coalesce=dialect.CONCAT_COALESCE, 143 ), 144 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 145 expressions=args, 146 safe=not dialect.STRICT_STRING_CONCAT, 147 coalesce=dialect.CONCAT_COALESCE, 148 ), 149 "DATE_TO_DATE_STR": lambda args: exp.Cast( 150 this=seq_get(args, 0), 151 to=exp.DataType(this=exp.DataType.Type.TEXT), 152 ), 153 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 154 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 155 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 156 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 157 "LIKE": build_like, 158 "LOG": build_logarithm, 159 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 160 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 161 "MOD": build_mod, 162 "TIME_TO_TIME_STR": lambda args: exp.Cast( 163 this=seq_get(args, 0), 164 to=exp.DataType(this=exp.DataType.Type.TEXT), 165 ), 166 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 167 this=exp.Cast( 168 this=seq_get(args, 0), 169 to=exp.DataType(this=exp.DataType.Type.TEXT), 170 ), 171 start=exp.Literal.number(1), 172 length=exp.Literal.number(10), 173 ), 174 "VAR_MAP": build_var_map, 175 "LOWER": build_lower, 176 "UPPER": build_upper, 177 "HEX": build_hex, 178 "TO_HEX": build_hex, 179 } 180 181 NO_PAREN_FUNCTIONS = { 182 TokenType.CURRENT_DATE: exp.CurrentDate, 183 TokenType.CURRENT_DATETIME: exp.CurrentDate, 184 TokenType.CURRENT_TIME: exp.CurrentTime, 185 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 186 TokenType.CURRENT_USER: exp.CurrentUser, 187 } 188 189 STRUCT_TYPE_TOKENS = { 190 TokenType.NESTED, 191 TokenType.OBJECT, 192 TokenType.STRUCT, 193 } 194 195 NESTED_TYPE_TOKENS = { 196 TokenType.ARRAY, 197 TokenType.LOWCARDINALITY, 198 TokenType.MAP, 199 TokenType.NULLABLE, 200 *STRUCT_TYPE_TOKENS, 201 } 202 203 ENUM_TYPE_TOKENS = { 204 TokenType.ENUM, 205 TokenType.ENUM8, 206 TokenType.ENUM16, 207 } 208 209 AGGREGATE_TYPE_TOKENS = { 210 TokenType.AGGREGATEFUNCTION, 211 TokenType.SIMPLEAGGREGATEFUNCTION, 212 } 213 214 TYPE_TOKENS = { 215 TokenType.BIT, 216 TokenType.BOOLEAN, 217 TokenType.TINYINT, 218 TokenType.UTINYINT, 219 TokenType.SMALLINT, 220 TokenType.USMALLINT, 221 TokenType.INT, 222 TokenType.UINT, 223 TokenType.BIGINT, 224 TokenType.UBIGINT, 225 TokenType.INT128, 226 TokenType.UINT128, 227 TokenType.INT256, 228 TokenType.UINT256, 229 TokenType.MEDIUMINT, 230 TokenType.UMEDIUMINT, 231 TokenType.FIXEDSTRING, 232 TokenType.FLOAT, 233 TokenType.DOUBLE, 234 TokenType.CHAR, 235 TokenType.NCHAR, 236 TokenType.VARCHAR, 237 TokenType.NVARCHAR, 238 TokenType.BPCHAR, 239 TokenType.TEXT, 240 TokenType.MEDIUMTEXT, 241 TokenType.LONGTEXT, 242 TokenType.MEDIUMBLOB, 243 TokenType.LONGBLOB, 244 TokenType.BINARY, 245 TokenType.VARBINARY, 246 TokenType.JSON, 247 TokenType.JSONB, 248 TokenType.INTERVAL, 249 TokenType.TINYBLOB, 250 TokenType.TINYTEXT, 251 TokenType.TIME, 252 TokenType.TIMETZ, 253 TokenType.TIMESTAMP, 254 TokenType.TIMESTAMP_S, 255 TokenType.TIMESTAMP_MS, 256 TokenType.TIMESTAMP_NS, 257 TokenType.TIMESTAMPTZ, 258 TokenType.TIMESTAMPLTZ, 259 TokenType.TIMESTAMPNTZ, 260 TokenType.DATETIME, 261 TokenType.DATETIME64, 262 TokenType.DATE, 263 TokenType.DATE32, 264 TokenType.INT4RANGE, 265 TokenType.INT4MULTIRANGE, 266 TokenType.INT8RANGE, 267 TokenType.INT8MULTIRANGE, 268 TokenType.NUMRANGE, 269 TokenType.NUMMULTIRANGE, 270 TokenType.TSRANGE, 271 TokenType.TSMULTIRANGE, 272 TokenType.TSTZRANGE, 273 TokenType.TSTZMULTIRANGE, 274 TokenType.DATERANGE, 275 TokenType.DATEMULTIRANGE, 276 TokenType.DECIMAL, 277 TokenType.UDECIMAL, 278 TokenType.BIGDECIMAL, 279 TokenType.UUID, 280 TokenType.GEOGRAPHY, 281 TokenType.GEOMETRY, 282 TokenType.HLLSKETCH, 283 TokenType.HSTORE, 284 TokenType.PSEUDO_TYPE, 285 TokenType.SUPER, 286 TokenType.SERIAL, 287 TokenType.SMALLSERIAL, 288 TokenType.BIGSERIAL, 289 TokenType.XML, 290 TokenType.YEAR, 291 TokenType.UNIQUEIDENTIFIER, 292 TokenType.USERDEFINED, 293 TokenType.MONEY, 294 TokenType.SMALLMONEY, 295 TokenType.ROWVERSION, 296 TokenType.IMAGE, 297 TokenType.VARIANT, 298 TokenType.OBJECT, 299 TokenType.OBJECT_IDENTIFIER, 300 TokenType.INET, 301 TokenType.IPADDRESS, 302 TokenType.IPPREFIX, 303 TokenType.IPV4, 304 TokenType.IPV6, 305 TokenType.UNKNOWN, 306 TokenType.NULL, 307 TokenType.NAME, 308 TokenType.TDIGEST, 309 *ENUM_TYPE_TOKENS, 310 *NESTED_TYPE_TOKENS, 311 *AGGREGATE_TYPE_TOKENS, 312 } 313 314 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 315 TokenType.BIGINT: TokenType.UBIGINT, 316 TokenType.INT: TokenType.UINT, 317 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 318 TokenType.SMALLINT: TokenType.USMALLINT, 319 TokenType.TINYINT: TokenType.UTINYINT, 320 TokenType.DECIMAL: TokenType.UDECIMAL, 321 } 322 323 SUBQUERY_PREDICATES = { 324 TokenType.ANY: exp.Any, 325 TokenType.ALL: exp.All, 326 TokenType.EXISTS: exp.Exists, 327 TokenType.SOME: exp.Any, 328 } 329 330 RESERVED_TOKENS = { 331 *Tokenizer.SINGLE_TOKENS.values(), 332 TokenType.SELECT, 333 } - {TokenType.IDENTIFIER} 334 335 DB_CREATABLES = { 336 TokenType.DATABASE, 337 TokenType.DICTIONARY, 338 TokenType.MODEL, 339 TokenType.SCHEMA, 340 TokenType.SEQUENCE, 341 TokenType.STORAGE_INTEGRATION, 342 TokenType.TABLE, 343 TokenType.TAG, 344 TokenType.VIEW, 345 TokenType.WAREHOUSE, 346 TokenType.STREAMLIT, 347 } 348 349 CREATABLES = { 350 TokenType.COLUMN, 351 TokenType.CONSTRAINT, 352 TokenType.FOREIGN_KEY, 353 TokenType.FUNCTION, 354 TokenType.INDEX, 355 TokenType.PROCEDURE, 356 *DB_CREATABLES, 357 } 358 359 # Tokens that can represent identifiers 360 ID_VAR_TOKENS = { 361 TokenType.VAR, 362 TokenType.ANTI, 363 TokenType.APPLY, 364 TokenType.ASC, 365 TokenType.ASOF, 366 TokenType.AUTO_INCREMENT, 367 TokenType.BEGIN, 368 TokenType.BPCHAR, 369 TokenType.CACHE, 370 TokenType.CASE, 371 TokenType.COLLATE, 372 TokenType.COMMAND, 373 TokenType.COMMENT, 374 TokenType.COMMIT, 375 TokenType.CONSTRAINT, 376 TokenType.COPY, 377 TokenType.DEFAULT, 378 TokenType.DELETE, 379 TokenType.DESC, 380 TokenType.DESCRIBE, 381 TokenType.DICTIONARY, 382 TokenType.DIV, 383 TokenType.END, 384 TokenType.EXECUTE, 385 TokenType.ESCAPE, 386 TokenType.FALSE, 387 TokenType.FIRST, 388 TokenType.FILTER, 389 TokenType.FINAL, 390 TokenType.FORMAT, 391 TokenType.FULL, 392 TokenType.IDENTIFIER, 393 TokenType.IS, 394 TokenType.ISNULL, 395 TokenType.INTERVAL, 396 TokenType.KEEP, 397 TokenType.KILL, 398 TokenType.LEFT, 399 TokenType.LOAD, 400 TokenType.MERGE, 401 TokenType.NATURAL, 402 TokenType.NEXT, 403 TokenType.OFFSET, 404 TokenType.OPERATOR, 405 TokenType.ORDINALITY, 406 TokenType.OVERLAPS, 407 TokenType.OVERWRITE, 408 TokenType.PARTITION, 409 TokenType.PERCENT, 410 TokenType.PIVOT, 411 TokenType.PRAGMA, 412 TokenType.RANGE, 413 TokenType.RECURSIVE, 414 TokenType.REFERENCES, 415 TokenType.REFRESH, 416 TokenType.REPLACE, 417 TokenType.RIGHT, 418 TokenType.ROLLUP, 419 TokenType.ROW, 420 TokenType.ROWS, 421 TokenType.SEMI, 422 TokenType.SET, 423 TokenType.SETTINGS, 424 TokenType.SHOW, 425 TokenType.TEMPORARY, 426 TokenType.TOP, 427 TokenType.TRUE, 428 TokenType.TRUNCATE, 429 TokenType.UNIQUE, 430 TokenType.UNNEST, 431 TokenType.UNPIVOT, 432 TokenType.UPDATE, 433 TokenType.USE, 434 TokenType.VOLATILE, 435 TokenType.WINDOW, 436 *CREATABLES, 437 *SUBQUERY_PREDICATES, 438 *TYPE_TOKENS, 439 *NO_PAREN_FUNCTIONS, 440 } 441 442 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 443 444 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 445 TokenType.ANTI, 446 TokenType.APPLY, 447 TokenType.ASOF, 448 TokenType.FULL, 449 TokenType.LEFT, 450 TokenType.LOCK, 451 TokenType.NATURAL, 452 TokenType.OFFSET, 453 TokenType.RIGHT, 454 TokenType.SEMI, 455 TokenType.WINDOW, 456 } 457 458 ALIAS_TOKENS = ID_VAR_TOKENS 459 460 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 461 462 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 463 464 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 465 466 FUNC_TOKENS = { 467 TokenType.COLLATE, 468 TokenType.COMMAND, 469 TokenType.CURRENT_DATE, 470 TokenType.CURRENT_DATETIME, 471 TokenType.CURRENT_TIMESTAMP, 472 TokenType.CURRENT_TIME, 473 TokenType.CURRENT_USER, 474 TokenType.FILTER, 475 TokenType.FIRST, 476 TokenType.FORMAT, 477 TokenType.GLOB, 478 TokenType.IDENTIFIER, 479 TokenType.INDEX, 480 TokenType.ISNULL, 481 TokenType.ILIKE, 482 TokenType.INSERT, 483 TokenType.LIKE, 484 TokenType.MERGE, 485 TokenType.OFFSET, 486 TokenType.PRIMARY_KEY, 487 TokenType.RANGE, 488 TokenType.REPLACE, 489 TokenType.RLIKE, 490 TokenType.ROW, 491 TokenType.UNNEST, 492 TokenType.VAR, 493 TokenType.LEFT, 494 TokenType.RIGHT, 495 TokenType.SEQUENCE, 496 TokenType.DATE, 497 TokenType.DATETIME, 498 TokenType.TABLE, 499 TokenType.TIMESTAMP, 500 TokenType.TIMESTAMPTZ, 501 TokenType.TRUNCATE, 502 TokenType.WINDOW, 503 TokenType.XOR, 504 *TYPE_TOKENS, 505 *SUBQUERY_PREDICATES, 506 } 507 508 CONJUNCTION = { 509 TokenType.AND: exp.And, 510 TokenType.OR: exp.Or, 511 } 512 513 EQUALITY = { 514 TokenType.EQ: exp.EQ, 515 TokenType.NEQ: exp.NEQ, 516 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 517 } 518 519 COMPARISON = { 520 TokenType.GT: exp.GT, 521 TokenType.GTE: exp.GTE, 522 TokenType.LT: exp.LT, 523 TokenType.LTE: exp.LTE, 524 } 525 526 BITWISE = { 527 TokenType.AMP: exp.BitwiseAnd, 528 TokenType.CARET: exp.BitwiseXor, 529 TokenType.PIPE: exp.BitwiseOr, 530 } 531 532 TERM = { 533 TokenType.DASH: exp.Sub, 534 TokenType.PLUS: exp.Add, 535 TokenType.MOD: exp.Mod, 536 TokenType.COLLATE: exp.Collate, 537 } 538 539 FACTOR = { 540 TokenType.DIV: exp.IntDiv, 541 TokenType.LR_ARROW: exp.Distance, 542 TokenType.SLASH: exp.Div, 543 TokenType.STAR: exp.Mul, 544 } 545 546 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 547 548 TIMES = { 549 TokenType.TIME, 550 TokenType.TIMETZ, 551 } 552 553 TIMESTAMPS = { 554 TokenType.TIMESTAMP, 555 TokenType.TIMESTAMPTZ, 556 TokenType.TIMESTAMPLTZ, 557 *TIMES, 558 } 559 560 SET_OPERATIONS = { 561 TokenType.UNION, 562 TokenType.INTERSECT, 563 TokenType.EXCEPT, 564 } 565 566 JOIN_METHODS = { 567 TokenType.ASOF, 568 TokenType.NATURAL, 569 TokenType.POSITIONAL, 570 } 571 572 JOIN_SIDES = { 573 TokenType.LEFT, 574 TokenType.RIGHT, 575 TokenType.FULL, 576 } 577 578 JOIN_KINDS = { 579 TokenType.INNER, 580 TokenType.OUTER, 581 TokenType.CROSS, 582 TokenType.SEMI, 583 TokenType.ANTI, 584 } 585 586 JOIN_HINTS: t.Set[str] = set() 587 588 LAMBDAS = { 589 TokenType.ARROW: lambda self, expressions: self.expression( 590 exp.Lambda, 591 this=self._replace_lambda( 592 self._parse_conjunction(), 593 expressions, 594 ), 595 expressions=expressions, 596 ), 597 TokenType.FARROW: lambda self, expressions: self.expression( 598 exp.Kwarg, 599 this=exp.var(expressions[0].name), 600 expression=self._parse_conjunction(), 601 ), 602 } 603 604 COLUMN_OPERATORS = { 605 TokenType.DOT: None, 606 TokenType.DCOLON: lambda self, this, to: self.expression( 607 exp.Cast if self.STRICT_CAST else exp.TryCast, 608 this=this, 609 to=to, 610 ), 611 TokenType.ARROW: lambda self, this, path: self.expression( 612 exp.JSONExtract, 613 this=this, 614 expression=self.dialect.to_json_path(path), 615 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 616 ), 617 TokenType.DARROW: lambda self, this, path: self.expression( 618 exp.JSONExtractScalar, 619 this=this, 620 expression=self.dialect.to_json_path(path), 621 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 622 ), 623 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 624 exp.JSONBExtract, 625 this=this, 626 expression=path, 627 ), 628 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 629 exp.JSONBExtractScalar, 630 this=this, 631 expression=path, 632 ), 633 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 634 exp.JSONBContains, 635 this=this, 636 expression=key, 637 ), 638 } 639 640 EXPRESSION_PARSERS = { 641 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 642 exp.Column: lambda self: self._parse_column(), 643 exp.Condition: lambda self: self._parse_conjunction(), 644 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 645 exp.Expression: lambda self: self._parse_expression(), 646 exp.From: lambda self: self._parse_from(joins=True), 647 exp.Group: lambda self: self._parse_group(), 648 exp.Having: lambda self: self._parse_having(), 649 exp.Identifier: lambda self: self._parse_id_var(), 650 exp.Join: lambda self: self._parse_join(), 651 exp.Lambda: lambda self: self._parse_lambda(), 652 exp.Lateral: lambda self: self._parse_lateral(), 653 exp.Limit: lambda self: self._parse_limit(), 654 exp.Offset: lambda self: self._parse_offset(), 655 exp.Order: lambda self: self._parse_order(), 656 exp.Ordered: lambda self: self._parse_ordered(), 657 exp.Properties: lambda self: self._parse_properties(), 658 exp.Qualify: lambda self: self._parse_qualify(), 659 exp.Returning: lambda self: self._parse_returning(), 660 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 661 exp.Table: lambda self: self._parse_table_parts(), 662 exp.TableAlias: lambda self: self._parse_table_alias(), 663 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 664 exp.Where: lambda self: self._parse_where(), 665 exp.Window: lambda self: self._parse_named_window(), 666 exp.With: lambda self: self._parse_with(), 667 "JOIN_TYPE": lambda self: self._parse_join_parts(), 668 } 669 670 STATEMENT_PARSERS = { 671 TokenType.ALTER: lambda self: self._parse_alter(), 672 TokenType.BEGIN: lambda self: self._parse_transaction(), 673 TokenType.CACHE: lambda self: self._parse_cache(), 674 TokenType.COMMENT: lambda self: self._parse_comment(), 675 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 676 TokenType.COPY: lambda self: self._parse_copy(), 677 TokenType.CREATE: lambda self: self._parse_create(), 678 TokenType.DELETE: lambda self: self._parse_delete(), 679 TokenType.DESC: lambda self: self._parse_describe(), 680 TokenType.DESCRIBE: lambda self: self._parse_describe(), 681 TokenType.DROP: lambda self: self._parse_drop(), 682 TokenType.INSERT: lambda self: self._parse_insert(), 683 TokenType.KILL: lambda self: self._parse_kill(), 684 TokenType.LOAD: lambda self: self._parse_load(), 685 TokenType.MERGE: lambda self: self._parse_merge(), 686 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 687 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 688 TokenType.REFRESH: lambda self: self._parse_refresh(), 689 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 690 TokenType.SET: lambda self: self._parse_set(), 691 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 692 TokenType.UNCACHE: lambda self: self._parse_uncache(), 693 TokenType.UPDATE: lambda self: self._parse_update(), 694 TokenType.USE: lambda self: self.expression( 695 exp.Use, 696 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 697 this=self._parse_table(schema=False), 698 ), 699 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 700 } 701 702 UNARY_PARSERS = { 703 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 704 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 705 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 706 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 707 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 708 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 709 } 710 711 STRING_PARSERS = { 712 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 713 exp.RawString, this=token.text 714 ), 715 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 716 exp.National, this=token.text 717 ), 718 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 719 TokenType.STRING: lambda self, token: self.expression( 720 exp.Literal, this=token.text, is_string=True 721 ), 722 TokenType.UNICODE_STRING: lambda self, token: self.expression( 723 exp.UnicodeString, 724 this=token.text, 725 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 726 ), 727 } 728 729 NUMERIC_PARSERS = { 730 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 731 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 732 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 733 TokenType.NUMBER: lambda self, token: self.expression( 734 exp.Literal, this=token.text, is_string=False 735 ), 736 } 737 738 PRIMARY_PARSERS = { 739 **STRING_PARSERS, 740 **NUMERIC_PARSERS, 741 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 742 TokenType.NULL: lambda self, _: self.expression(exp.Null), 743 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 744 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 745 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 746 TokenType.STAR: lambda self, _: self.expression( 747 exp.Star, 748 **{ 749 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 750 "replace": self._parse_star_op("REPLACE"), 751 "rename": self._parse_star_op("RENAME"), 752 }, 753 ), 754 } 755 756 PLACEHOLDER_PARSERS = { 757 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 758 TokenType.PARAMETER: lambda self: self._parse_parameter(), 759 TokenType.COLON: lambda self: ( 760 self.expression(exp.Placeholder, this=self._prev.text) 761 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 762 else None 763 ), 764 } 765 766 RANGE_PARSERS = { 767 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 768 TokenType.GLOB: binary_range_parser(exp.Glob), 769 TokenType.ILIKE: binary_range_parser(exp.ILike), 770 TokenType.IN: lambda self, this: self._parse_in(this), 771 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 772 TokenType.IS: lambda self, this: self._parse_is(this), 773 TokenType.LIKE: binary_range_parser(exp.Like), 774 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 775 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 776 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 777 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 778 } 779 780 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 781 "ALLOWED_VALUES": lambda self: self.expression( 782 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 783 ), 784 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 785 "AUTO": lambda self: self._parse_auto_property(), 786 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 787 "BACKUP": lambda self: self.expression( 788 exp.BackupProperty, this=self._parse_var(any_token=True) 789 ), 790 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 791 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 792 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 793 "CHECKSUM": lambda self: self._parse_checksum(), 794 "CLUSTER BY": lambda self: self._parse_cluster(), 795 "CLUSTERED": lambda self: self._parse_clustered_by(), 796 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 797 exp.CollateProperty, **kwargs 798 ), 799 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 800 "CONTAINS": lambda self: self._parse_contains_property(), 801 "COPY": lambda self: self._parse_copy_property(), 802 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 803 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 804 "DEFINER": lambda self: self._parse_definer(), 805 "DETERMINISTIC": lambda self: self.expression( 806 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 807 ), 808 "DISTKEY": lambda self: self._parse_distkey(), 809 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 810 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 811 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 812 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 813 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 814 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 815 "FREESPACE": lambda self: self._parse_freespace(), 816 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 817 "HEAP": lambda self: self.expression(exp.HeapProperty), 818 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 819 "IMMUTABLE": lambda self: self.expression( 820 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 821 ), 822 "INHERITS": lambda self: self.expression( 823 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 824 ), 825 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 826 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 827 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 828 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 829 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 830 "LIKE": lambda self: self._parse_create_like(), 831 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 832 "LOCK": lambda self: self._parse_locking(), 833 "LOCKING": lambda self: self._parse_locking(), 834 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 835 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 836 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 837 "MODIFIES": lambda self: self._parse_modifies_property(), 838 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 839 "NO": lambda self: self._parse_no_property(), 840 "ON": lambda self: self._parse_on_property(), 841 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 842 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 843 "PARTITION": lambda self: self._parse_partitioned_of(), 844 "PARTITION BY": lambda self: self._parse_partitioned_by(), 845 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 846 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 847 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 848 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 849 "READS": lambda self: self._parse_reads_property(), 850 "REMOTE": lambda self: self._parse_remote_with_connection(), 851 "RETURNS": lambda self: self._parse_returns(), 852 "STRICT": lambda self: self.expression(exp.StrictProperty), 853 "ROW": lambda self: self._parse_row(), 854 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 855 "SAMPLE": lambda self: self.expression( 856 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 857 ), 858 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 859 "SETTINGS": lambda self: self.expression( 860 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 861 ), 862 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 863 "SORTKEY": lambda self: self._parse_sortkey(), 864 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 865 "STABLE": lambda self: self.expression( 866 exp.StabilityProperty, this=exp.Literal.string("STABLE") 867 ), 868 "STORED": lambda self: self._parse_stored(), 869 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 870 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 871 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 872 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 873 "TO": lambda self: self._parse_to_table(), 874 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 875 "TRANSFORM": lambda self: self.expression( 876 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 877 ), 878 "TTL": lambda self: self._parse_ttl(), 879 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 880 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 881 "VOLATILE": lambda self: self._parse_volatile_property(), 882 "WITH": lambda self: self._parse_with_property(), 883 } 884 885 CONSTRAINT_PARSERS = { 886 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 887 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 888 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 889 "CHARACTER SET": lambda self: self.expression( 890 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 891 ), 892 "CHECK": lambda self: self.expression( 893 exp.CheckColumnConstraint, 894 this=self._parse_wrapped(self._parse_conjunction), 895 enforced=self._match_text_seq("ENFORCED"), 896 ), 897 "COLLATE": lambda self: self.expression( 898 exp.CollateColumnConstraint, this=self._parse_var() 899 ), 900 "COMMENT": lambda self: self.expression( 901 exp.CommentColumnConstraint, this=self._parse_string() 902 ), 903 "COMPRESS": lambda self: self._parse_compress(), 904 "CLUSTERED": lambda self: self.expression( 905 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 906 ), 907 "NONCLUSTERED": lambda self: self.expression( 908 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 909 ), 910 "DEFAULT": lambda self: self.expression( 911 exp.DefaultColumnConstraint, this=self._parse_bitwise() 912 ), 913 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 914 "EPHEMERAL": lambda self: self.expression( 915 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 916 ), 917 "EXCLUDE": lambda self: self.expression( 918 exp.ExcludeColumnConstraint, this=self._parse_index_params() 919 ), 920 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 921 "FORMAT": lambda self: self.expression( 922 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 923 ), 924 "GENERATED": lambda self: self._parse_generated_as_identity(), 925 "IDENTITY": lambda self: self._parse_auto_increment(), 926 "INLINE": lambda self: self._parse_inline(), 927 "LIKE": lambda self: self._parse_create_like(), 928 "NOT": lambda self: self._parse_not_constraint(), 929 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 930 "ON": lambda self: ( 931 self._match(TokenType.UPDATE) 932 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 933 ) 934 or self.expression(exp.OnProperty, this=self._parse_id_var()), 935 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 936 "PERIOD": lambda self: self._parse_period_for_system_time(), 937 "PRIMARY KEY": lambda self: self._parse_primary_key(), 938 "REFERENCES": lambda self: self._parse_references(match=False), 939 "TITLE": lambda self: self.expression( 940 exp.TitleColumnConstraint, this=self._parse_var_or_string() 941 ), 942 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 943 "UNIQUE": lambda self: self._parse_unique(), 944 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 945 "WITH": lambda self: self.expression( 946 exp.Properties, expressions=self._parse_wrapped_properties() 947 ), 948 } 949 950 ALTER_PARSERS = { 951 "ADD": lambda self: self._parse_alter_table_add(), 952 "ALTER": lambda self: self._parse_alter_table_alter(), 953 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 954 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 955 "DROP": lambda self: self._parse_alter_table_drop(), 956 "RENAME": lambda self: self._parse_alter_table_rename(), 957 "SET": lambda self: self._parse_alter_table_set(), 958 } 959 960 ALTER_ALTER_PARSERS = { 961 "DISTKEY": lambda self: self._parse_alter_diststyle(), 962 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 963 "SORTKEY": lambda self: self._parse_alter_sortkey(), 964 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 965 } 966 967 SCHEMA_UNNAMED_CONSTRAINTS = { 968 "CHECK", 969 "EXCLUDE", 970 "FOREIGN KEY", 971 "LIKE", 972 "PERIOD", 973 "PRIMARY KEY", 974 "UNIQUE", 975 } 976 977 NO_PAREN_FUNCTION_PARSERS = { 978 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 979 "CASE": lambda self: self._parse_case(), 980 "IF": lambda self: self._parse_if(), 981 "NEXT": lambda self: self._parse_next_value_for(), 982 } 983 984 INVALID_FUNC_NAME_TOKENS = { 985 TokenType.IDENTIFIER, 986 TokenType.STRING, 987 } 988 989 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 990 991 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 992 993 FUNCTION_PARSERS = { 994 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 995 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 996 "DECODE": lambda self: self._parse_decode(), 997 "EXTRACT": lambda self: self._parse_extract(), 998 "JSON_OBJECT": lambda self: self._parse_json_object(), 999 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1000 "JSON_TABLE": lambda self: self._parse_json_table(), 1001 "MATCH": lambda self: self._parse_match_against(), 1002 "OPENJSON": lambda self: self._parse_open_json(), 1003 "POSITION": lambda self: self._parse_position(), 1004 "PREDICT": lambda self: self._parse_predict(), 1005 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1006 "STRING_AGG": lambda self: self._parse_string_agg(), 1007 "SUBSTRING": lambda self: self._parse_substring(), 1008 "TRIM": lambda self: self._parse_trim(), 1009 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1010 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1011 } 1012 1013 QUERY_MODIFIER_PARSERS = { 1014 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1015 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1016 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1017 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1018 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1019 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1020 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1021 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1022 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1023 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1024 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1025 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1026 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1027 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1028 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1029 TokenType.CLUSTER_BY: lambda self: ( 1030 "cluster", 1031 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1032 ), 1033 TokenType.DISTRIBUTE_BY: lambda self: ( 1034 "distribute", 1035 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1036 ), 1037 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1038 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1039 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1040 } 1041 1042 SET_PARSERS = { 1043 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1044 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1045 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1046 "TRANSACTION": lambda self: self._parse_set_transaction(), 1047 } 1048 1049 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1050 1051 TYPE_LITERAL_PARSERS = { 1052 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1053 } 1054 1055 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1056 1057 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1058 1059 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1060 1061 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1062 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1063 "ISOLATION": ( 1064 ("LEVEL", "REPEATABLE", "READ"), 1065 ("LEVEL", "READ", "COMMITTED"), 1066 ("LEVEL", "READ", "UNCOMITTED"), 1067 ("LEVEL", "SERIALIZABLE"), 1068 ), 1069 "READ": ("WRITE", "ONLY"), 1070 } 1071 1072 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1073 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1074 ) 1075 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1076 1077 CREATE_SEQUENCE: OPTIONS_TYPE = { 1078 "SCALE": ("EXTEND", "NOEXTEND"), 1079 "SHARD": ("EXTEND", "NOEXTEND"), 1080 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1081 **dict.fromkeys( 1082 ( 1083 "SESSION", 1084 "GLOBAL", 1085 "KEEP", 1086 "NOKEEP", 1087 "ORDER", 1088 "NOORDER", 1089 "NOCACHE", 1090 "CYCLE", 1091 "NOCYCLE", 1092 "NOMINVALUE", 1093 "NOMAXVALUE", 1094 "NOSCALE", 1095 "NOSHARD", 1096 ), 1097 tuple(), 1098 ), 1099 } 1100 1101 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1102 1103 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1104 1105 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1106 1107 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1108 1109 CLONE_KEYWORDS = {"CLONE", "COPY"} 1110 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1111 1112 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1113 1114 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1115 1116 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1117 1118 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1119 1120 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1121 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1122 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1123 1124 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1125 1126 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1127 1128 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1129 1130 DISTINCT_TOKENS = {TokenType.DISTINCT} 1131 1132 NULL_TOKENS = {TokenType.NULL} 1133 1134 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1135 1136 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1137 1138 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1139 1140 STRICT_CAST = True 1141 1142 PREFIXED_PIVOT_COLUMNS = False 1143 IDENTIFY_PIVOT_STRINGS = False 1144 1145 LOG_DEFAULTS_TO_LN = False 1146 1147 # Whether ADD is present for each column added by ALTER TABLE 1148 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1149 1150 # Whether the table sample clause expects CSV syntax 1151 TABLESAMPLE_CSV = False 1152 1153 # The default method used for table sampling 1154 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1155 1156 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1157 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1158 1159 # Whether the TRIM function expects the characters to trim as its first argument 1160 TRIM_PATTERN_FIRST = False 1161 1162 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1163 STRING_ALIASES = False 1164 1165 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1166 MODIFIERS_ATTACHED_TO_UNION = True 1167 UNION_MODIFIERS = {"order", "limit", "offset"} 1168 1169 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1170 NO_PAREN_IF_COMMANDS = True 1171 1172 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1173 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1174 1175 # Whether the `:` operator is used to extract a value from a JSON document 1176 COLON_IS_JSON_EXTRACT = False 1177 1178 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1179 # If this is True and '(' is not found, the keyword will be treated as an identifier 1180 VALUES_FOLLOWED_BY_PAREN = True 1181 1182 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1183 SUPPORTS_IMPLICIT_UNNEST = False 1184 1185 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1186 INTERVAL_SPANS = True 1187 1188 # Whether a PARTITION clause can follow a table reference 1189 SUPPORTS_PARTITION_SELECTION = False 1190 1191 __slots__ = ( 1192 "error_level", 1193 "error_message_context", 1194 "max_errors", 1195 "dialect", 1196 "sql", 1197 "errors", 1198 "_tokens", 1199 "_index", 1200 "_curr", 1201 "_next", 1202 "_prev", 1203 "_prev_comments", 1204 ) 1205 1206 # Autofilled 1207 SHOW_TRIE: t.Dict = {} 1208 SET_TRIE: t.Dict = {} 1209 1210 def __init__( 1211 self, 1212 error_level: t.Optional[ErrorLevel] = None, 1213 error_message_context: int = 100, 1214 max_errors: int = 3, 1215 dialect: DialectType = None, 1216 ): 1217 from sqlglot.dialects import Dialect 1218 1219 self.error_level = error_level or ErrorLevel.IMMEDIATE 1220 self.error_message_context = error_message_context 1221 self.max_errors = max_errors 1222 self.dialect = Dialect.get_or_raise(dialect) 1223 self.reset() 1224 1225 def reset(self): 1226 self.sql = "" 1227 self.errors = [] 1228 self._tokens = [] 1229 self._index = 0 1230 self._curr = None 1231 self._next = None 1232 self._prev = None 1233 self._prev_comments = None 1234 1235 def parse( 1236 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1237 ) -> t.List[t.Optional[exp.Expression]]: 1238 """ 1239 Parses a list of tokens and returns a list of syntax trees, one tree 1240 per parsed SQL statement. 1241 1242 Args: 1243 raw_tokens: The list of tokens. 1244 sql: The original SQL string, used to produce helpful debug messages. 1245 1246 Returns: 1247 The list of the produced syntax trees. 1248 """ 1249 return self._parse( 1250 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1251 ) 1252 1253 def parse_into( 1254 self, 1255 expression_types: exp.IntoType, 1256 raw_tokens: t.List[Token], 1257 sql: t.Optional[str] = None, 1258 ) -> t.List[t.Optional[exp.Expression]]: 1259 """ 1260 Parses a list of tokens into a given Expression type. If a collection of Expression 1261 types is given instead, this method will try to parse the token list into each one 1262 of them, stopping at the first for which the parsing succeeds. 1263 1264 Args: 1265 expression_types: The expression type(s) to try and parse the token list into. 1266 raw_tokens: The list of tokens. 1267 sql: The original SQL string, used to produce helpful debug messages. 1268 1269 Returns: 1270 The target Expression. 1271 """ 1272 errors = [] 1273 for expression_type in ensure_list(expression_types): 1274 parser = self.EXPRESSION_PARSERS.get(expression_type) 1275 if not parser: 1276 raise TypeError(f"No parser registered for {expression_type}") 1277 1278 try: 1279 return self._parse(parser, raw_tokens, sql) 1280 except ParseError as e: 1281 e.errors[0]["into_expression"] = expression_type 1282 errors.append(e) 1283 1284 raise ParseError( 1285 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1286 errors=merge_errors(errors), 1287 ) from errors[-1] 1288 1289 def _parse( 1290 self, 1291 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1292 raw_tokens: t.List[Token], 1293 sql: t.Optional[str] = None, 1294 ) -> t.List[t.Optional[exp.Expression]]: 1295 self.reset() 1296 self.sql = sql or "" 1297 1298 total = len(raw_tokens) 1299 chunks: t.List[t.List[Token]] = [[]] 1300 1301 for i, token in enumerate(raw_tokens): 1302 if token.token_type == TokenType.SEMICOLON: 1303 if token.comments: 1304 chunks.append([token]) 1305 1306 if i < total - 1: 1307 chunks.append([]) 1308 else: 1309 chunks[-1].append(token) 1310 1311 expressions = [] 1312 1313 for tokens in chunks: 1314 self._index = -1 1315 self._tokens = tokens 1316 self._advance() 1317 1318 expressions.append(parse_method(self)) 1319 1320 if self._index < len(self._tokens): 1321 self.raise_error("Invalid expression / Unexpected token") 1322 1323 self.check_errors() 1324 1325 return expressions 1326 1327 def check_errors(self) -> None: 1328 """Logs or raises any found errors, depending on the chosen error level setting.""" 1329 if self.error_level == ErrorLevel.WARN: 1330 for error in self.errors: 1331 logger.error(str(error)) 1332 elif self.error_level == ErrorLevel.RAISE and self.errors: 1333 raise ParseError( 1334 concat_messages(self.errors, self.max_errors), 1335 errors=merge_errors(self.errors), 1336 ) 1337 1338 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1339 """ 1340 Appends an error in the list of recorded errors or raises it, depending on the chosen 1341 error level setting. 1342 """ 1343 token = token or self._curr or self._prev or Token.string("") 1344 start = token.start 1345 end = token.end + 1 1346 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1347 highlight = self.sql[start:end] 1348 end_context = self.sql[end : end + self.error_message_context] 1349 1350 error = ParseError.new( 1351 f"{message}. Line {token.line}, Col: {token.col}.\n" 1352 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1353 description=message, 1354 line=token.line, 1355 col=token.col, 1356 start_context=start_context, 1357 highlight=highlight, 1358 end_context=end_context, 1359 ) 1360 1361 if self.error_level == ErrorLevel.IMMEDIATE: 1362 raise error 1363 1364 self.errors.append(error) 1365 1366 def expression( 1367 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1368 ) -> E: 1369 """ 1370 Creates a new, validated Expression. 1371 1372 Args: 1373 exp_class: The expression class to instantiate. 1374 comments: An optional list of comments to attach to the expression. 1375 kwargs: The arguments to set for the expression along with their respective values. 1376 1377 Returns: 1378 The target expression. 1379 """ 1380 instance = exp_class(**kwargs) 1381 instance.add_comments(comments) if comments else self._add_comments(instance) 1382 return self.validate_expression(instance) 1383 1384 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1385 if expression and self._prev_comments: 1386 expression.add_comments(self._prev_comments) 1387 self._prev_comments = None 1388 1389 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1390 """ 1391 Validates an Expression, making sure that all its mandatory arguments are set. 1392 1393 Args: 1394 expression: The expression to validate. 1395 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1396 1397 Returns: 1398 The validated expression. 1399 """ 1400 if self.error_level != ErrorLevel.IGNORE: 1401 for error_message in expression.error_messages(args): 1402 self.raise_error(error_message) 1403 1404 return expression 1405 1406 def _find_sql(self, start: Token, end: Token) -> str: 1407 return self.sql[start.start : end.end + 1] 1408 1409 def _is_connected(self) -> bool: 1410 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1411 1412 def _advance(self, times: int = 1) -> None: 1413 self._index += times 1414 self._curr = seq_get(self._tokens, self._index) 1415 self._next = seq_get(self._tokens, self._index + 1) 1416 1417 if self._index > 0: 1418 self._prev = self._tokens[self._index - 1] 1419 self._prev_comments = self._prev.comments 1420 else: 1421 self._prev = None 1422 self._prev_comments = None 1423 1424 def _retreat(self, index: int) -> None: 1425 if index != self._index: 1426 self._advance(index - self._index) 1427 1428 def _warn_unsupported(self) -> None: 1429 if len(self._tokens) <= 1: 1430 return 1431 1432 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1433 # interested in emitting a warning for the one being currently processed. 1434 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1435 1436 logger.warning( 1437 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1438 ) 1439 1440 def _parse_command(self) -> exp.Command: 1441 self._warn_unsupported() 1442 return self.expression( 1443 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1444 ) 1445 1446 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1447 """ 1448 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1449 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1450 the parser state accordingly 1451 """ 1452 index = self._index 1453 error_level = self.error_level 1454 1455 self.error_level = ErrorLevel.IMMEDIATE 1456 try: 1457 this = parse_method() 1458 except ParseError: 1459 this = None 1460 finally: 1461 if not this or retreat: 1462 self._retreat(index) 1463 self.error_level = error_level 1464 1465 return this 1466 1467 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1468 start = self._prev 1469 exists = self._parse_exists() if allow_exists else None 1470 1471 self._match(TokenType.ON) 1472 1473 materialized = self._match_text_seq("MATERIALIZED") 1474 kind = self._match_set(self.CREATABLES) and self._prev 1475 if not kind: 1476 return self._parse_as_command(start) 1477 1478 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1479 this = self._parse_user_defined_function(kind=kind.token_type) 1480 elif kind.token_type == TokenType.TABLE: 1481 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1482 elif kind.token_type == TokenType.COLUMN: 1483 this = self._parse_column() 1484 else: 1485 this = self._parse_id_var() 1486 1487 self._match(TokenType.IS) 1488 1489 return self.expression( 1490 exp.Comment, 1491 this=this, 1492 kind=kind.text, 1493 expression=self._parse_string(), 1494 exists=exists, 1495 materialized=materialized, 1496 ) 1497 1498 def _parse_to_table( 1499 self, 1500 ) -> exp.ToTableProperty: 1501 table = self._parse_table_parts(schema=True) 1502 return self.expression(exp.ToTableProperty, this=table) 1503 1504 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1505 def _parse_ttl(self) -> exp.Expression: 1506 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1507 this = self._parse_bitwise() 1508 1509 if self._match_text_seq("DELETE"): 1510 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1511 if self._match_text_seq("RECOMPRESS"): 1512 return self.expression( 1513 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1514 ) 1515 if self._match_text_seq("TO", "DISK"): 1516 return self.expression( 1517 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1518 ) 1519 if self._match_text_seq("TO", "VOLUME"): 1520 return self.expression( 1521 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1522 ) 1523 1524 return this 1525 1526 expressions = self._parse_csv(_parse_ttl_action) 1527 where = self._parse_where() 1528 group = self._parse_group() 1529 1530 aggregates = None 1531 if group and self._match(TokenType.SET): 1532 aggregates = self._parse_csv(self._parse_set_item) 1533 1534 return self.expression( 1535 exp.MergeTreeTTL, 1536 expressions=expressions, 1537 where=where, 1538 group=group, 1539 aggregates=aggregates, 1540 ) 1541 1542 def _parse_statement(self) -> t.Optional[exp.Expression]: 1543 if self._curr is None: 1544 return None 1545 1546 if self._match_set(self.STATEMENT_PARSERS): 1547 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1548 1549 if self._match_set(self.dialect.tokenizer.COMMANDS): 1550 return self._parse_command() 1551 1552 expression = self._parse_expression() 1553 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1554 return self._parse_query_modifiers(expression) 1555 1556 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1557 start = self._prev 1558 temporary = self._match(TokenType.TEMPORARY) 1559 materialized = self._match_text_seq("MATERIALIZED") 1560 1561 kind = self._match_set(self.CREATABLES) and self._prev.text 1562 if not kind: 1563 return self._parse_as_command(start) 1564 1565 if_exists = exists or self._parse_exists() 1566 table = self._parse_table_parts( 1567 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1568 ) 1569 1570 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1571 1572 if self._match(TokenType.L_PAREN, advance=False): 1573 expressions = self._parse_wrapped_csv(self._parse_types) 1574 else: 1575 expressions = None 1576 1577 return self.expression( 1578 exp.Drop, 1579 comments=start.comments, 1580 exists=if_exists, 1581 this=table, 1582 expressions=expressions, 1583 kind=kind.upper(), 1584 temporary=temporary, 1585 materialized=materialized, 1586 cascade=self._match_text_seq("CASCADE"), 1587 constraints=self._match_text_seq("CONSTRAINTS"), 1588 purge=self._match_text_seq("PURGE"), 1589 cluster=cluster, 1590 ) 1591 1592 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1593 return ( 1594 self._match_text_seq("IF") 1595 and (not not_ or self._match(TokenType.NOT)) 1596 and self._match(TokenType.EXISTS) 1597 ) 1598 1599 def _parse_create(self) -> exp.Create | exp.Command: 1600 # Note: this can't be None because we've matched a statement parser 1601 start = self._prev 1602 comments = self._prev_comments 1603 1604 replace = ( 1605 start.token_type == TokenType.REPLACE 1606 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1607 or self._match_pair(TokenType.OR, TokenType.ALTER) 1608 ) 1609 1610 unique = self._match(TokenType.UNIQUE) 1611 1612 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1613 self._advance() 1614 1615 properties = None 1616 create_token = self._match_set(self.CREATABLES) and self._prev 1617 1618 if not create_token: 1619 # exp.Properties.Location.POST_CREATE 1620 properties = self._parse_properties() 1621 create_token = self._match_set(self.CREATABLES) and self._prev 1622 1623 if not properties or not create_token: 1624 return self._parse_as_command(start) 1625 1626 exists = self._parse_exists(not_=True) 1627 this = None 1628 expression: t.Optional[exp.Expression] = None 1629 indexes = None 1630 no_schema_binding = None 1631 begin = None 1632 end = None 1633 clone = None 1634 1635 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1636 nonlocal properties 1637 if properties and temp_props: 1638 properties.expressions.extend(temp_props.expressions) 1639 elif temp_props: 1640 properties = temp_props 1641 1642 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1643 this = self._parse_user_defined_function(kind=create_token.token_type) 1644 1645 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1646 extend_props(self._parse_properties()) 1647 1648 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1649 extend_props(self._parse_properties()) 1650 1651 if not expression: 1652 if self._match(TokenType.COMMAND): 1653 expression = self._parse_as_command(self._prev) 1654 else: 1655 begin = self._match(TokenType.BEGIN) 1656 return_ = self._match_text_seq("RETURN") 1657 1658 if self._match(TokenType.STRING, advance=False): 1659 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1660 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1661 expression = self._parse_string() 1662 extend_props(self._parse_properties()) 1663 else: 1664 expression = self._parse_statement() 1665 1666 end = self._match_text_seq("END") 1667 1668 if return_: 1669 expression = self.expression(exp.Return, this=expression) 1670 elif create_token.token_type == TokenType.INDEX: 1671 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1672 if not self._match(TokenType.ON): 1673 index = self._parse_id_var() 1674 anonymous = False 1675 else: 1676 index = None 1677 anonymous = True 1678 1679 this = self._parse_index(index=index, anonymous=anonymous) 1680 elif create_token.token_type in self.DB_CREATABLES: 1681 table_parts = self._parse_table_parts( 1682 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1683 ) 1684 1685 # exp.Properties.Location.POST_NAME 1686 self._match(TokenType.COMMA) 1687 extend_props(self._parse_properties(before=True)) 1688 1689 this = self._parse_schema(this=table_parts) 1690 1691 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1692 extend_props(self._parse_properties()) 1693 1694 self._match(TokenType.ALIAS) 1695 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1696 # exp.Properties.Location.POST_ALIAS 1697 extend_props(self._parse_properties()) 1698 1699 if create_token.token_type == TokenType.SEQUENCE: 1700 expression = self._parse_types() 1701 extend_props(self._parse_properties()) 1702 else: 1703 expression = self._parse_ddl_select() 1704 1705 if create_token.token_type == TokenType.TABLE: 1706 # exp.Properties.Location.POST_EXPRESSION 1707 extend_props(self._parse_properties()) 1708 1709 indexes = [] 1710 while True: 1711 index = self._parse_index() 1712 1713 # exp.Properties.Location.POST_INDEX 1714 extend_props(self._parse_properties()) 1715 1716 if not index: 1717 break 1718 else: 1719 self._match(TokenType.COMMA) 1720 indexes.append(index) 1721 elif create_token.token_type == TokenType.VIEW: 1722 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1723 no_schema_binding = True 1724 1725 shallow = self._match_text_seq("SHALLOW") 1726 1727 if self._match_texts(self.CLONE_KEYWORDS): 1728 copy = self._prev.text.lower() == "copy" 1729 clone = self.expression( 1730 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1731 ) 1732 1733 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1734 return self._parse_as_command(start) 1735 1736 return self.expression( 1737 exp.Create, 1738 comments=comments, 1739 this=this, 1740 kind=create_token.text.upper(), 1741 replace=replace, 1742 unique=unique, 1743 expression=expression, 1744 exists=exists, 1745 properties=properties, 1746 indexes=indexes, 1747 no_schema_binding=no_schema_binding, 1748 begin=begin, 1749 end=end, 1750 clone=clone, 1751 ) 1752 1753 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1754 seq = exp.SequenceProperties() 1755 1756 options = [] 1757 index = self._index 1758 1759 while self._curr: 1760 self._match(TokenType.COMMA) 1761 if self._match_text_seq("INCREMENT"): 1762 self._match_text_seq("BY") 1763 self._match_text_seq("=") 1764 seq.set("increment", self._parse_term()) 1765 elif self._match_text_seq("MINVALUE"): 1766 seq.set("minvalue", self._parse_term()) 1767 elif self._match_text_seq("MAXVALUE"): 1768 seq.set("maxvalue", self._parse_term()) 1769 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1770 self._match_text_seq("=") 1771 seq.set("start", self._parse_term()) 1772 elif self._match_text_seq("CACHE"): 1773 # T-SQL allows empty CACHE which is initialized dynamically 1774 seq.set("cache", self._parse_number() or True) 1775 elif self._match_text_seq("OWNED", "BY"): 1776 # "OWNED BY NONE" is the default 1777 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1778 else: 1779 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1780 if opt: 1781 options.append(opt) 1782 else: 1783 break 1784 1785 seq.set("options", options if options else None) 1786 return None if self._index == index else seq 1787 1788 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1789 # only used for teradata currently 1790 self._match(TokenType.COMMA) 1791 1792 kwargs = { 1793 "no": self._match_text_seq("NO"), 1794 "dual": self._match_text_seq("DUAL"), 1795 "before": self._match_text_seq("BEFORE"), 1796 "default": self._match_text_seq("DEFAULT"), 1797 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1798 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1799 "after": self._match_text_seq("AFTER"), 1800 "minimum": self._match_texts(("MIN", "MINIMUM")), 1801 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1802 } 1803 1804 if self._match_texts(self.PROPERTY_PARSERS): 1805 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1806 try: 1807 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1808 except TypeError: 1809 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1810 1811 return None 1812 1813 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1814 return self._parse_wrapped_csv(self._parse_property) 1815 1816 def _parse_property(self) -> t.Optional[exp.Expression]: 1817 if self._match_texts(self.PROPERTY_PARSERS): 1818 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1819 1820 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1821 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1822 1823 if self._match_text_seq("COMPOUND", "SORTKEY"): 1824 return self._parse_sortkey(compound=True) 1825 1826 if self._match_text_seq("SQL", "SECURITY"): 1827 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1828 1829 index = self._index 1830 key = self._parse_column() 1831 1832 if not self._match(TokenType.EQ): 1833 self._retreat(index) 1834 return self._parse_sequence_properties() 1835 1836 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1837 if isinstance(key, exp.Column): 1838 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1839 1840 value = self._parse_bitwise() or self._parse_var(any_token=True) 1841 1842 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1843 if isinstance(value, exp.Column): 1844 value = exp.var(value.name) 1845 1846 return self.expression(exp.Property, this=key, value=value) 1847 1848 def _parse_stored(self) -> exp.FileFormatProperty: 1849 self._match(TokenType.ALIAS) 1850 1851 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1852 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1853 1854 return self.expression( 1855 exp.FileFormatProperty, 1856 this=( 1857 self.expression( 1858 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1859 ) 1860 if input_format or output_format 1861 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1862 ), 1863 ) 1864 1865 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1866 field = self._parse_field() 1867 if isinstance(field, exp.Identifier) and not field.quoted: 1868 field = exp.var(field) 1869 1870 return field 1871 1872 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1873 self._match(TokenType.EQ) 1874 self._match(TokenType.ALIAS) 1875 1876 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1877 1878 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1879 properties = [] 1880 while True: 1881 if before: 1882 prop = self._parse_property_before() 1883 else: 1884 prop = self._parse_property() 1885 if not prop: 1886 break 1887 for p in ensure_list(prop): 1888 properties.append(p) 1889 1890 if properties: 1891 return self.expression(exp.Properties, expressions=properties) 1892 1893 return None 1894 1895 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1896 return self.expression( 1897 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1898 ) 1899 1900 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1901 if self._index >= 2: 1902 pre_volatile_token = self._tokens[self._index - 2] 1903 else: 1904 pre_volatile_token = None 1905 1906 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1907 return exp.VolatileProperty() 1908 1909 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1910 1911 def _parse_retention_period(self) -> exp.Var: 1912 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1913 number = self._parse_number() 1914 number_str = f"{number} " if number else "" 1915 unit = self._parse_var(any_token=True) 1916 return exp.var(f"{number_str}{unit}") 1917 1918 def _parse_system_versioning_property( 1919 self, with_: bool = False 1920 ) -> exp.WithSystemVersioningProperty: 1921 self._match(TokenType.EQ) 1922 prop = self.expression( 1923 exp.WithSystemVersioningProperty, 1924 **{ # type: ignore 1925 "on": True, 1926 "with": with_, 1927 }, 1928 ) 1929 1930 if self._match_text_seq("OFF"): 1931 prop.set("on", False) 1932 return prop 1933 1934 self._match(TokenType.ON) 1935 if self._match(TokenType.L_PAREN): 1936 while self._curr and not self._match(TokenType.R_PAREN): 1937 if self._match_text_seq("HISTORY_TABLE", "="): 1938 prop.set("this", self._parse_table_parts()) 1939 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1940 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1941 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1942 prop.set("retention_period", self._parse_retention_period()) 1943 1944 self._match(TokenType.COMMA) 1945 1946 return prop 1947 1948 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1949 self._match(TokenType.EQ) 1950 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1951 prop = self.expression(exp.DataDeletionProperty, on=on) 1952 1953 if self._match(TokenType.L_PAREN): 1954 while self._curr and not self._match(TokenType.R_PAREN): 1955 if self._match_text_seq("FILTER_COLUMN", "="): 1956 prop.set("filter_column", self._parse_column()) 1957 elif self._match_text_seq("RETENTION_PERIOD", "="): 1958 prop.set("retention_period", self._parse_retention_period()) 1959 1960 self._match(TokenType.COMMA) 1961 1962 return prop 1963 1964 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1965 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1966 prop = self._parse_system_versioning_property(with_=True) 1967 self._match_r_paren() 1968 return prop 1969 1970 if self._match(TokenType.L_PAREN, advance=False): 1971 return self._parse_wrapped_properties() 1972 1973 if self._match_text_seq("JOURNAL"): 1974 return self._parse_withjournaltable() 1975 1976 if self._match_texts(self.VIEW_ATTRIBUTES): 1977 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1978 1979 if self._match_text_seq("DATA"): 1980 return self._parse_withdata(no=False) 1981 elif self._match_text_seq("NO", "DATA"): 1982 return self._parse_withdata(no=True) 1983 1984 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1985 return self._parse_serde_properties(with_=True) 1986 1987 if not self._next: 1988 return None 1989 1990 return self._parse_withisolatedloading() 1991 1992 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1993 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1994 self._match(TokenType.EQ) 1995 1996 user = self._parse_id_var() 1997 self._match(TokenType.PARAMETER) 1998 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1999 2000 if not user or not host: 2001 return None 2002 2003 return exp.DefinerProperty(this=f"{user}@{host}") 2004 2005 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2006 self._match(TokenType.TABLE) 2007 self._match(TokenType.EQ) 2008 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2009 2010 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2011 return self.expression(exp.LogProperty, no=no) 2012 2013 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2014 return self.expression(exp.JournalProperty, **kwargs) 2015 2016 def _parse_checksum(self) -> exp.ChecksumProperty: 2017 self._match(TokenType.EQ) 2018 2019 on = None 2020 if self._match(TokenType.ON): 2021 on = True 2022 elif self._match_text_seq("OFF"): 2023 on = False 2024 2025 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2026 2027 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2028 return self.expression( 2029 exp.Cluster, 2030 expressions=( 2031 self._parse_wrapped_csv(self._parse_ordered) 2032 if wrapped 2033 else self._parse_csv(self._parse_ordered) 2034 ), 2035 ) 2036 2037 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2038 self._match_text_seq("BY") 2039 2040 self._match_l_paren() 2041 expressions = self._parse_csv(self._parse_column) 2042 self._match_r_paren() 2043 2044 if self._match_text_seq("SORTED", "BY"): 2045 self._match_l_paren() 2046 sorted_by = self._parse_csv(self._parse_ordered) 2047 self._match_r_paren() 2048 else: 2049 sorted_by = None 2050 2051 self._match(TokenType.INTO) 2052 buckets = self._parse_number() 2053 self._match_text_seq("BUCKETS") 2054 2055 return self.expression( 2056 exp.ClusteredByProperty, 2057 expressions=expressions, 2058 sorted_by=sorted_by, 2059 buckets=buckets, 2060 ) 2061 2062 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2063 if not self._match_text_seq("GRANTS"): 2064 self._retreat(self._index - 1) 2065 return None 2066 2067 return self.expression(exp.CopyGrantsProperty) 2068 2069 def _parse_freespace(self) -> exp.FreespaceProperty: 2070 self._match(TokenType.EQ) 2071 return self.expression( 2072 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2073 ) 2074 2075 def _parse_mergeblockratio( 2076 self, no: bool = False, default: bool = False 2077 ) -> exp.MergeBlockRatioProperty: 2078 if self._match(TokenType.EQ): 2079 return self.expression( 2080 exp.MergeBlockRatioProperty, 2081 this=self._parse_number(), 2082 percent=self._match(TokenType.PERCENT), 2083 ) 2084 2085 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2086 2087 def _parse_datablocksize( 2088 self, 2089 default: t.Optional[bool] = None, 2090 minimum: t.Optional[bool] = None, 2091 maximum: t.Optional[bool] = None, 2092 ) -> exp.DataBlocksizeProperty: 2093 self._match(TokenType.EQ) 2094 size = self._parse_number() 2095 2096 units = None 2097 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2098 units = self._prev.text 2099 2100 return self.expression( 2101 exp.DataBlocksizeProperty, 2102 size=size, 2103 units=units, 2104 default=default, 2105 minimum=minimum, 2106 maximum=maximum, 2107 ) 2108 2109 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2110 self._match(TokenType.EQ) 2111 always = self._match_text_seq("ALWAYS") 2112 manual = self._match_text_seq("MANUAL") 2113 never = self._match_text_seq("NEVER") 2114 default = self._match_text_seq("DEFAULT") 2115 2116 autotemp = None 2117 if self._match_text_seq("AUTOTEMP"): 2118 autotemp = self._parse_schema() 2119 2120 return self.expression( 2121 exp.BlockCompressionProperty, 2122 always=always, 2123 manual=manual, 2124 never=never, 2125 default=default, 2126 autotemp=autotemp, 2127 ) 2128 2129 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2130 index = self._index 2131 no = self._match_text_seq("NO") 2132 concurrent = self._match_text_seq("CONCURRENT") 2133 2134 if not self._match_text_seq("ISOLATED", "LOADING"): 2135 self._retreat(index) 2136 return None 2137 2138 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2139 return self.expression( 2140 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2141 ) 2142 2143 def _parse_locking(self) -> exp.LockingProperty: 2144 if self._match(TokenType.TABLE): 2145 kind = "TABLE" 2146 elif self._match(TokenType.VIEW): 2147 kind = "VIEW" 2148 elif self._match(TokenType.ROW): 2149 kind = "ROW" 2150 elif self._match_text_seq("DATABASE"): 2151 kind = "DATABASE" 2152 else: 2153 kind = None 2154 2155 if kind in ("DATABASE", "TABLE", "VIEW"): 2156 this = self._parse_table_parts() 2157 else: 2158 this = None 2159 2160 if self._match(TokenType.FOR): 2161 for_or_in = "FOR" 2162 elif self._match(TokenType.IN): 2163 for_or_in = "IN" 2164 else: 2165 for_or_in = None 2166 2167 if self._match_text_seq("ACCESS"): 2168 lock_type = "ACCESS" 2169 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2170 lock_type = "EXCLUSIVE" 2171 elif self._match_text_seq("SHARE"): 2172 lock_type = "SHARE" 2173 elif self._match_text_seq("READ"): 2174 lock_type = "READ" 2175 elif self._match_text_seq("WRITE"): 2176 lock_type = "WRITE" 2177 elif self._match_text_seq("CHECKSUM"): 2178 lock_type = "CHECKSUM" 2179 else: 2180 lock_type = None 2181 2182 override = self._match_text_seq("OVERRIDE") 2183 2184 return self.expression( 2185 exp.LockingProperty, 2186 this=this, 2187 kind=kind, 2188 for_or_in=for_or_in, 2189 lock_type=lock_type, 2190 override=override, 2191 ) 2192 2193 def _parse_partition_by(self) -> t.List[exp.Expression]: 2194 if self._match(TokenType.PARTITION_BY): 2195 return self._parse_csv(self._parse_conjunction) 2196 return [] 2197 2198 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2199 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2200 if self._match_text_seq("MINVALUE"): 2201 return exp.var("MINVALUE") 2202 if self._match_text_seq("MAXVALUE"): 2203 return exp.var("MAXVALUE") 2204 return self._parse_bitwise() 2205 2206 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2207 expression = None 2208 from_expressions = None 2209 to_expressions = None 2210 2211 if self._match(TokenType.IN): 2212 this = self._parse_wrapped_csv(self._parse_bitwise) 2213 elif self._match(TokenType.FROM): 2214 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2215 self._match_text_seq("TO") 2216 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2217 elif self._match_text_seq("WITH", "(", "MODULUS"): 2218 this = self._parse_number() 2219 self._match_text_seq(",", "REMAINDER") 2220 expression = self._parse_number() 2221 self._match_r_paren() 2222 else: 2223 self.raise_error("Failed to parse partition bound spec.") 2224 2225 return self.expression( 2226 exp.PartitionBoundSpec, 2227 this=this, 2228 expression=expression, 2229 from_expressions=from_expressions, 2230 to_expressions=to_expressions, 2231 ) 2232 2233 # https://www.postgresql.org/docs/current/sql-createtable.html 2234 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2235 if not self._match_text_seq("OF"): 2236 self._retreat(self._index - 1) 2237 return None 2238 2239 this = self._parse_table(schema=True) 2240 2241 if self._match(TokenType.DEFAULT): 2242 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2243 elif self._match_text_seq("FOR", "VALUES"): 2244 expression = self._parse_partition_bound_spec() 2245 else: 2246 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2247 2248 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2249 2250 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2251 self._match(TokenType.EQ) 2252 return self.expression( 2253 exp.PartitionedByProperty, 2254 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2255 ) 2256 2257 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2258 if self._match_text_seq("AND", "STATISTICS"): 2259 statistics = True 2260 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2261 statistics = False 2262 else: 2263 statistics = None 2264 2265 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2266 2267 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2268 if self._match_text_seq("SQL"): 2269 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2270 return None 2271 2272 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2273 if self._match_text_seq("SQL", "DATA"): 2274 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2275 return None 2276 2277 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2278 if self._match_text_seq("PRIMARY", "INDEX"): 2279 return exp.NoPrimaryIndexProperty() 2280 if self._match_text_seq("SQL"): 2281 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2282 return None 2283 2284 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2285 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2286 return exp.OnCommitProperty() 2287 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2288 return exp.OnCommitProperty(delete=True) 2289 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2290 2291 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2292 if self._match_text_seq("SQL", "DATA"): 2293 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2294 return None 2295 2296 def _parse_distkey(self) -> exp.DistKeyProperty: 2297 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2298 2299 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2300 table = self._parse_table(schema=True) 2301 2302 options = [] 2303 while self._match_texts(("INCLUDING", "EXCLUDING")): 2304 this = self._prev.text.upper() 2305 2306 id_var = self._parse_id_var() 2307 if not id_var: 2308 return None 2309 2310 options.append( 2311 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2312 ) 2313 2314 return self.expression(exp.LikeProperty, this=table, expressions=options) 2315 2316 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2317 return self.expression( 2318 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2319 ) 2320 2321 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2322 self._match(TokenType.EQ) 2323 return self.expression( 2324 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2325 ) 2326 2327 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2328 self._match_text_seq("WITH", "CONNECTION") 2329 return self.expression( 2330 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2331 ) 2332 2333 def _parse_returns(self) -> exp.ReturnsProperty: 2334 value: t.Optional[exp.Expression] 2335 null = None 2336 is_table = self._match(TokenType.TABLE) 2337 2338 if is_table: 2339 if self._match(TokenType.LT): 2340 value = self.expression( 2341 exp.Schema, 2342 this="TABLE", 2343 expressions=self._parse_csv(self._parse_struct_types), 2344 ) 2345 if not self._match(TokenType.GT): 2346 self.raise_error("Expecting >") 2347 else: 2348 value = self._parse_schema(exp.var("TABLE")) 2349 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2350 null = True 2351 value = None 2352 else: 2353 value = self._parse_types() 2354 2355 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2356 2357 def _parse_describe(self) -> exp.Describe: 2358 kind = self._match_set(self.CREATABLES) and self._prev.text 2359 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2360 if self._match(TokenType.DOT): 2361 style = None 2362 self._retreat(self._index - 2) 2363 this = self._parse_table(schema=True) 2364 properties = self._parse_properties() 2365 expressions = properties.expressions if properties else None 2366 return self.expression( 2367 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2368 ) 2369 2370 def _parse_insert(self) -> exp.Insert: 2371 comments = ensure_list(self._prev_comments) 2372 hint = self._parse_hint() 2373 overwrite = self._match(TokenType.OVERWRITE) 2374 ignore = self._match(TokenType.IGNORE) 2375 local = self._match_text_seq("LOCAL") 2376 alternative = None 2377 is_function = None 2378 2379 if self._match_text_seq("DIRECTORY"): 2380 this: t.Optional[exp.Expression] = self.expression( 2381 exp.Directory, 2382 this=self._parse_var_or_string(), 2383 local=local, 2384 row_format=self._parse_row_format(match_row=True), 2385 ) 2386 else: 2387 if self._match(TokenType.OR): 2388 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2389 2390 self._match(TokenType.INTO) 2391 comments += ensure_list(self._prev_comments) 2392 self._match(TokenType.TABLE) 2393 is_function = self._match(TokenType.FUNCTION) 2394 2395 this = ( 2396 self._parse_table(schema=True, parse_partition=True) 2397 if not is_function 2398 else self._parse_function() 2399 ) 2400 2401 returning = self._parse_returning() 2402 2403 return self.expression( 2404 exp.Insert, 2405 comments=comments, 2406 hint=hint, 2407 is_function=is_function, 2408 this=this, 2409 stored=self._match_text_seq("STORED") and self._parse_stored(), 2410 by_name=self._match_text_seq("BY", "NAME"), 2411 exists=self._parse_exists(), 2412 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2413 and self._parse_conjunction(), 2414 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2415 conflict=self._parse_on_conflict(), 2416 returning=returning or self._parse_returning(), 2417 overwrite=overwrite, 2418 alternative=alternative, 2419 ignore=ignore, 2420 ) 2421 2422 def _parse_kill(self) -> exp.Kill: 2423 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2424 2425 return self.expression( 2426 exp.Kill, 2427 this=self._parse_primary(), 2428 kind=kind, 2429 ) 2430 2431 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2432 conflict = self._match_text_seq("ON", "CONFLICT") 2433 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2434 2435 if not conflict and not duplicate: 2436 return None 2437 2438 conflict_keys = None 2439 constraint = None 2440 2441 if conflict: 2442 if self._match_text_seq("ON", "CONSTRAINT"): 2443 constraint = self._parse_id_var() 2444 elif self._match(TokenType.L_PAREN): 2445 conflict_keys = self._parse_csv(self._parse_id_var) 2446 self._match_r_paren() 2447 2448 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2449 if self._prev.token_type == TokenType.UPDATE: 2450 self._match(TokenType.SET) 2451 expressions = self._parse_csv(self._parse_equality) 2452 else: 2453 expressions = None 2454 2455 return self.expression( 2456 exp.OnConflict, 2457 duplicate=duplicate, 2458 expressions=expressions, 2459 action=action, 2460 conflict_keys=conflict_keys, 2461 constraint=constraint, 2462 ) 2463 2464 def _parse_returning(self) -> t.Optional[exp.Returning]: 2465 if not self._match(TokenType.RETURNING): 2466 return None 2467 return self.expression( 2468 exp.Returning, 2469 expressions=self._parse_csv(self._parse_expression), 2470 into=self._match(TokenType.INTO) and self._parse_table_part(), 2471 ) 2472 2473 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2474 if not self._match(TokenType.FORMAT): 2475 return None 2476 return self._parse_row_format() 2477 2478 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2479 index = self._index 2480 with_ = with_ or self._match_text_seq("WITH") 2481 2482 if not self._match(TokenType.SERDE_PROPERTIES): 2483 self._retreat(index) 2484 return None 2485 return self.expression( 2486 exp.SerdeProperties, 2487 **{ # type: ignore 2488 "expressions": self._parse_wrapped_properties(), 2489 "with": with_, 2490 }, 2491 ) 2492 2493 def _parse_row_format( 2494 self, match_row: bool = False 2495 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2496 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2497 return None 2498 2499 if self._match_text_seq("SERDE"): 2500 this = self._parse_string() 2501 2502 serde_properties = self._parse_serde_properties() 2503 2504 return self.expression( 2505 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2506 ) 2507 2508 self._match_text_seq("DELIMITED") 2509 2510 kwargs = {} 2511 2512 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2513 kwargs["fields"] = self._parse_string() 2514 if self._match_text_seq("ESCAPED", "BY"): 2515 kwargs["escaped"] = self._parse_string() 2516 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2517 kwargs["collection_items"] = self._parse_string() 2518 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2519 kwargs["map_keys"] = self._parse_string() 2520 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2521 kwargs["lines"] = self._parse_string() 2522 if self._match_text_seq("NULL", "DEFINED", "AS"): 2523 kwargs["null"] = self._parse_string() 2524 2525 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2526 2527 def _parse_load(self) -> exp.LoadData | exp.Command: 2528 if self._match_text_seq("DATA"): 2529 local = self._match_text_seq("LOCAL") 2530 self._match_text_seq("INPATH") 2531 inpath = self._parse_string() 2532 overwrite = self._match(TokenType.OVERWRITE) 2533 self._match_pair(TokenType.INTO, TokenType.TABLE) 2534 2535 return self.expression( 2536 exp.LoadData, 2537 this=self._parse_table(schema=True), 2538 local=local, 2539 overwrite=overwrite, 2540 inpath=inpath, 2541 partition=self._parse_partition(), 2542 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2543 serde=self._match_text_seq("SERDE") and self._parse_string(), 2544 ) 2545 return self._parse_as_command(self._prev) 2546 2547 def _parse_delete(self) -> exp.Delete: 2548 # This handles MySQL's "Multiple-Table Syntax" 2549 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2550 tables = None 2551 comments = self._prev_comments 2552 if not self._match(TokenType.FROM, advance=False): 2553 tables = self._parse_csv(self._parse_table) or None 2554 2555 returning = self._parse_returning() 2556 2557 return self.expression( 2558 exp.Delete, 2559 comments=comments, 2560 tables=tables, 2561 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2562 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2563 where=self._parse_where(), 2564 returning=returning or self._parse_returning(), 2565 limit=self._parse_limit(), 2566 ) 2567 2568 def _parse_update(self) -> exp.Update: 2569 comments = self._prev_comments 2570 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2571 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2572 returning = self._parse_returning() 2573 return self.expression( 2574 exp.Update, 2575 comments=comments, 2576 **{ # type: ignore 2577 "this": this, 2578 "expressions": expressions, 2579 "from": self._parse_from(joins=True), 2580 "where": self._parse_where(), 2581 "returning": returning or self._parse_returning(), 2582 "order": self._parse_order(), 2583 "limit": self._parse_limit(), 2584 }, 2585 ) 2586 2587 def _parse_uncache(self) -> exp.Uncache: 2588 if not self._match(TokenType.TABLE): 2589 self.raise_error("Expecting TABLE after UNCACHE") 2590 2591 return self.expression( 2592 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2593 ) 2594 2595 def _parse_cache(self) -> exp.Cache: 2596 lazy = self._match_text_seq("LAZY") 2597 self._match(TokenType.TABLE) 2598 table = self._parse_table(schema=True) 2599 2600 options = [] 2601 if self._match_text_seq("OPTIONS"): 2602 self._match_l_paren() 2603 k = self._parse_string() 2604 self._match(TokenType.EQ) 2605 v = self._parse_string() 2606 options = [k, v] 2607 self._match_r_paren() 2608 2609 self._match(TokenType.ALIAS) 2610 return self.expression( 2611 exp.Cache, 2612 this=table, 2613 lazy=lazy, 2614 options=options, 2615 expression=self._parse_select(nested=True), 2616 ) 2617 2618 def _parse_partition(self) -> t.Optional[exp.Partition]: 2619 if not self._match(TokenType.PARTITION): 2620 return None 2621 2622 return self.expression( 2623 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2624 ) 2625 2626 def _parse_value(self) -> t.Optional[exp.Tuple]: 2627 if self._match(TokenType.L_PAREN): 2628 expressions = self._parse_csv(self._parse_expression) 2629 self._match_r_paren() 2630 return self.expression(exp.Tuple, expressions=expressions) 2631 2632 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2633 expression = self._parse_expression() 2634 if expression: 2635 return self.expression(exp.Tuple, expressions=[expression]) 2636 return None 2637 2638 def _parse_projections(self) -> t.List[exp.Expression]: 2639 return self._parse_expressions() 2640 2641 def _parse_select( 2642 self, 2643 nested: bool = False, 2644 table: bool = False, 2645 parse_subquery_alias: bool = True, 2646 parse_set_operation: bool = True, 2647 ) -> t.Optional[exp.Expression]: 2648 cte = self._parse_with() 2649 2650 if cte: 2651 this = self._parse_statement() 2652 2653 if not this: 2654 self.raise_error("Failed to parse any statement following CTE") 2655 return cte 2656 2657 if "with" in this.arg_types: 2658 this.set("with", cte) 2659 else: 2660 self.raise_error(f"{this.key} does not support CTE") 2661 this = cte 2662 2663 return this 2664 2665 # duckdb supports leading with FROM x 2666 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2667 2668 if self._match(TokenType.SELECT): 2669 comments = self._prev_comments 2670 2671 hint = self._parse_hint() 2672 all_ = self._match(TokenType.ALL) 2673 distinct = self._match_set(self.DISTINCT_TOKENS) 2674 2675 kind = ( 2676 self._match(TokenType.ALIAS) 2677 and self._match_texts(("STRUCT", "VALUE")) 2678 and self._prev.text.upper() 2679 ) 2680 2681 if distinct: 2682 distinct = self.expression( 2683 exp.Distinct, 2684 on=self._parse_value() if self._match(TokenType.ON) else None, 2685 ) 2686 2687 if all_ and distinct: 2688 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2689 2690 limit = self._parse_limit(top=True) 2691 projections = self._parse_projections() 2692 2693 this = self.expression( 2694 exp.Select, 2695 kind=kind, 2696 hint=hint, 2697 distinct=distinct, 2698 expressions=projections, 2699 limit=limit, 2700 ) 2701 this.comments = comments 2702 2703 into = self._parse_into() 2704 if into: 2705 this.set("into", into) 2706 2707 if not from_: 2708 from_ = self._parse_from() 2709 2710 if from_: 2711 this.set("from", from_) 2712 2713 this = self._parse_query_modifiers(this) 2714 elif (table or nested) and self._match(TokenType.L_PAREN): 2715 if self._match(TokenType.PIVOT): 2716 this = self._parse_simplified_pivot() 2717 elif self._match(TokenType.FROM): 2718 this = exp.select("*").from_( 2719 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2720 ) 2721 else: 2722 this = ( 2723 self._parse_table() 2724 if table 2725 else self._parse_select(nested=True, parse_set_operation=False) 2726 ) 2727 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2728 2729 self._match_r_paren() 2730 2731 # We return early here so that the UNION isn't attached to the subquery by the 2732 # following call to _parse_set_operations, but instead becomes the parent node 2733 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2734 elif self._match(TokenType.VALUES, advance=False): 2735 this = self._parse_derived_table_values() 2736 elif from_: 2737 this = exp.select("*").from_(from_.this, copy=False) 2738 else: 2739 this = None 2740 2741 if parse_set_operation: 2742 return self._parse_set_operations(this) 2743 return this 2744 2745 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2746 if not skip_with_token and not self._match(TokenType.WITH): 2747 return None 2748 2749 comments = self._prev_comments 2750 recursive = self._match(TokenType.RECURSIVE) 2751 2752 expressions = [] 2753 while True: 2754 expressions.append(self._parse_cte()) 2755 2756 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2757 break 2758 else: 2759 self._match(TokenType.WITH) 2760 2761 return self.expression( 2762 exp.With, comments=comments, expressions=expressions, recursive=recursive 2763 ) 2764 2765 def _parse_cte(self) -> exp.CTE: 2766 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2767 if not alias or not alias.this: 2768 self.raise_error("Expected CTE to have alias") 2769 2770 self._match(TokenType.ALIAS) 2771 2772 if self._match_text_seq("NOT", "MATERIALIZED"): 2773 materialized = False 2774 elif self._match_text_seq("MATERIALIZED"): 2775 materialized = True 2776 else: 2777 materialized = None 2778 2779 return self.expression( 2780 exp.CTE, 2781 this=self._parse_wrapped(self._parse_statement), 2782 alias=alias, 2783 materialized=materialized, 2784 ) 2785 2786 def _parse_table_alias( 2787 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2788 ) -> t.Optional[exp.TableAlias]: 2789 any_token = self._match(TokenType.ALIAS) 2790 alias = ( 2791 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2792 or self._parse_string_as_identifier() 2793 ) 2794 2795 index = self._index 2796 if self._match(TokenType.L_PAREN): 2797 columns = self._parse_csv(self._parse_function_parameter) 2798 self._match_r_paren() if columns else self._retreat(index) 2799 else: 2800 columns = None 2801 2802 if not alias and not columns: 2803 return None 2804 2805 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2806 2807 # We bubble up comments from the Identifier to the TableAlias 2808 if isinstance(alias, exp.Identifier): 2809 table_alias.add_comments(alias.pop_comments()) 2810 2811 return table_alias 2812 2813 def _parse_subquery( 2814 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2815 ) -> t.Optional[exp.Subquery]: 2816 if not this: 2817 return None 2818 2819 return self.expression( 2820 exp.Subquery, 2821 this=this, 2822 pivots=self._parse_pivots(), 2823 alias=self._parse_table_alias() if parse_alias else None, 2824 ) 2825 2826 def _implicit_unnests_to_explicit(self, this: E) -> E: 2827 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2828 2829 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2830 for i, join in enumerate(this.args.get("joins") or []): 2831 table = join.this 2832 normalized_table = table.copy() 2833 normalized_table.meta["maybe_column"] = True 2834 normalized_table = _norm(normalized_table, dialect=self.dialect) 2835 2836 if isinstance(table, exp.Table) and not join.args.get("on"): 2837 if normalized_table.parts[0].name in refs: 2838 table_as_column = table.to_column() 2839 unnest = exp.Unnest(expressions=[table_as_column]) 2840 2841 # Table.to_column creates a parent Alias node that we want to convert to 2842 # a TableAlias and attach to the Unnest, so it matches the parser's output 2843 if isinstance(table.args.get("alias"), exp.TableAlias): 2844 table_as_column.replace(table_as_column.this) 2845 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2846 2847 table.replace(unnest) 2848 2849 refs.add(normalized_table.alias_or_name) 2850 2851 return this 2852 2853 def _parse_query_modifiers( 2854 self, this: t.Optional[exp.Expression] 2855 ) -> t.Optional[exp.Expression]: 2856 if isinstance(this, (exp.Query, exp.Table)): 2857 for join in self._parse_joins(): 2858 this.append("joins", join) 2859 for lateral in iter(self._parse_lateral, None): 2860 this.append("laterals", lateral) 2861 2862 while True: 2863 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2864 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2865 key, expression = parser(self) 2866 2867 if expression: 2868 this.set(key, expression) 2869 if key == "limit": 2870 offset = expression.args.pop("offset", None) 2871 2872 if offset: 2873 offset = exp.Offset(expression=offset) 2874 this.set("offset", offset) 2875 2876 limit_by_expressions = expression.expressions 2877 expression.set("expressions", None) 2878 offset.set("expressions", limit_by_expressions) 2879 continue 2880 break 2881 2882 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2883 this = self._implicit_unnests_to_explicit(this) 2884 2885 return this 2886 2887 def _parse_hint(self) -> t.Optional[exp.Hint]: 2888 if self._match(TokenType.HINT): 2889 hints = [] 2890 for hint in iter( 2891 lambda: self._parse_csv( 2892 lambda: self._parse_function() or self._parse_var(upper=True) 2893 ), 2894 [], 2895 ): 2896 hints.extend(hint) 2897 2898 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2899 self.raise_error("Expected */ after HINT") 2900 2901 return self.expression(exp.Hint, expressions=hints) 2902 2903 return None 2904 2905 def _parse_into(self) -> t.Optional[exp.Into]: 2906 if not self._match(TokenType.INTO): 2907 return None 2908 2909 temp = self._match(TokenType.TEMPORARY) 2910 unlogged = self._match_text_seq("UNLOGGED") 2911 self._match(TokenType.TABLE) 2912 2913 return self.expression( 2914 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2915 ) 2916 2917 def _parse_from( 2918 self, joins: bool = False, skip_from_token: bool = False 2919 ) -> t.Optional[exp.From]: 2920 if not skip_from_token and not self._match(TokenType.FROM): 2921 return None 2922 2923 return self.expression( 2924 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2925 ) 2926 2927 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2928 return self.expression( 2929 exp.MatchRecognizeMeasure, 2930 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2931 this=self._parse_expression(), 2932 ) 2933 2934 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2935 if not self._match(TokenType.MATCH_RECOGNIZE): 2936 return None 2937 2938 self._match_l_paren() 2939 2940 partition = self._parse_partition_by() 2941 order = self._parse_order() 2942 2943 measures = ( 2944 self._parse_csv(self._parse_match_recognize_measure) 2945 if self._match_text_seq("MEASURES") 2946 else None 2947 ) 2948 2949 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2950 rows = exp.var("ONE ROW PER MATCH") 2951 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2952 text = "ALL ROWS PER MATCH" 2953 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2954 text += " SHOW EMPTY MATCHES" 2955 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2956 text += " OMIT EMPTY MATCHES" 2957 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2958 text += " WITH UNMATCHED ROWS" 2959 rows = exp.var(text) 2960 else: 2961 rows = None 2962 2963 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2964 text = "AFTER MATCH SKIP" 2965 if self._match_text_seq("PAST", "LAST", "ROW"): 2966 text += " PAST LAST ROW" 2967 elif self._match_text_seq("TO", "NEXT", "ROW"): 2968 text += " TO NEXT ROW" 2969 elif self._match_text_seq("TO", "FIRST"): 2970 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2971 elif self._match_text_seq("TO", "LAST"): 2972 text += f" TO LAST {self._advance_any().text}" # type: ignore 2973 after = exp.var(text) 2974 else: 2975 after = None 2976 2977 if self._match_text_seq("PATTERN"): 2978 self._match_l_paren() 2979 2980 if not self._curr: 2981 self.raise_error("Expecting )", self._curr) 2982 2983 paren = 1 2984 start = self._curr 2985 2986 while self._curr and paren > 0: 2987 if self._curr.token_type == TokenType.L_PAREN: 2988 paren += 1 2989 if self._curr.token_type == TokenType.R_PAREN: 2990 paren -= 1 2991 2992 end = self._prev 2993 self._advance() 2994 2995 if paren > 0: 2996 self.raise_error("Expecting )", self._curr) 2997 2998 pattern = exp.var(self._find_sql(start, end)) 2999 else: 3000 pattern = None 3001 3002 define = ( 3003 self._parse_csv(self._parse_name_as_expression) 3004 if self._match_text_seq("DEFINE") 3005 else None 3006 ) 3007 3008 self._match_r_paren() 3009 3010 return self.expression( 3011 exp.MatchRecognize, 3012 partition_by=partition, 3013 order=order, 3014 measures=measures, 3015 rows=rows, 3016 after=after, 3017 pattern=pattern, 3018 define=define, 3019 alias=self._parse_table_alias(), 3020 ) 3021 3022 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3023 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3024 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3025 cross_apply = False 3026 3027 if cross_apply is not None: 3028 this = self._parse_select(table=True) 3029 view = None 3030 outer = None 3031 elif self._match(TokenType.LATERAL): 3032 this = self._parse_select(table=True) 3033 view = self._match(TokenType.VIEW) 3034 outer = self._match(TokenType.OUTER) 3035 else: 3036 return None 3037 3038 if not this: 3039 this = ( 3040 self._parse_unnest() 3041 or self._parse_function() 3042 or self._parse_id_var(any_token=False) 3043 ) 3044 3045 while self._match(TokenType.DOT): 3046 this = exp.Dot( 3047 this=this, 3048 expression=self._parse_function() or self._parse_id_var(any_token=False), 3049 ) 3050 3051 if view: 3052 table = self._parse_id_var(any_token=False) 3053 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3054 table_alias: t.Optional[exp.TableAlias] = self.expression( 3055 exp.TableAlias, this=table, columns=columns 3056 ) 3057 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3058 # We move the alias from the lateral's child node to the lateral itself 3059 table_alias = this.args["alias"].pop() 3060 else: 3061 table_alias = self._parse_table_alias() 3062 3063 return self.expression( 3064 exp.Lateral, 3065 this=this, 3066 view=view, 3067 outer=outer, 3068 alias=table_alias, 3069 cross_apply=cross_apply, 3070 ) 3071 3072 def _parse_join_parts( 3073 self, 3074 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3075 return ( 3076 self._match_set(self.JOIN_METHODS) and self._prev, 3077 self._match_set(self.JOIN_SIDES) and self._prev, 3078 self._match_set(self.JOIN_KINDS) and self._prev, 3079 ) 3080 3081 def _parse_join( 3082 self, skip_join_token: bool = False, parse_bracket: bool = False 3083 ) -> t.Optional[exp.Join]: 3084 if self._match(TokenType.COMMA): 3085 return self.expression(exp.Join, this=self._parse_table()) 3086 3087 index = self._index 3088 method, side, kind = self._parse_join_parts() 3089 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3090 join = self._match(TokenType.JOIN) 3091 3092 if not skip_join_token and not join: 3093 self._retreat(index) 3094 kind = None 3095 method = None 3096 side = None 3097 3098 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3099 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3100 3101 if not skip_join_token and not join and not outer_apply and not cross_apply: 3102 return None 3103 3104 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3105 3106 if method: 3107 kwargs["method"] = method.text 3108 if side: 3109 kwargs["side"] = side.text 3110 if kind: 3111 kwargs["kind"] = kind.text 3112 if hint: 3113 kwargs["hint"] = hint 3114 3115 if self._match(TokenType.MATCH_CONDITION): 3116 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3117 3118 if self._match(TokenType.ON): 3119 kwargs["on"] = self._parse_conjunction() 3120 elif self._match(TokenType.USING): 3121 kwargs["using"] = self._parse_wrapped_id_vars() 3122 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3123 kind and kind.token_type == TokenType.CROSS 3124 ): 3125 index = self._index 3126 joins: t.Optional[list] = list(self._parse_joins()) 3127 3128 if joins and self._match(TokenType.ON): 3129 kwargs["on"] = self._parse_conjunction() 3130 elif joins and self._match(TokenType.USING): 3131 kwargs["using"] = self._parse_wrapped_id_vars() 3132 else: 3133 joins = None 3134 self._retreat(index) 3135 3136 kwargs["this"].set("joins", joins if joins else None) 3137 3138 comments = [c for token in (method, side, kind) if token for c in token.comments] 3139 return self.expression(exp.Join, comments=comments, **kwargs) 3140 3141 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3142 this = self._parse_conjunction() 3143 3144 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3145 return this 3146 3147 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3148 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3149 3150 return this 3151 3152 def _parse_index_params(self) -> exp.IndexParameters: 3153 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3154 3155 if self._match(TokenType.L_PAREN, advance=False): 3156 columns = self._parse_wrapped_csv(self._parse_with_operator) 3157 else: 3158 columns = None 3159 3160 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3161 partition_by = self._parse_partition_by() 3162 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3163 tablespace = ( 3164 self._parse_var(any_token=True) 3165 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3166 else None 3167 ) 3168 where = self._parse_where() 3169 3170 return self.expression( 3171 exp.IndexParameters, 3172 using=using, 3173 columns=columns, 3174 include=include, 3175 partition_by=partition_by, 3176 where=where, 3177 with_storage=with_storage, 3178 tablespace=tablespace, 3179 ) 3180 3181 def _parse_index( 3182 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3183 ) -> t.Optional[exp.Index]: 3184 if index or anonymous: 3185 unique = None 3186 primary = None 3187 amp = None 3188 3189 self._match(TokenType.ON) 3190 self._match(TokenType.TABLE) # hive 3191 table = self._parse_table_parts(schema=True) 3192 else: 3193 unique = self._match(TokenType.UNIQUE) 3194 primary = self._match_text_seq("PRIMARY") 3195 amp = self._match_text_seq("AMP") 3196 3197 if not self._match(TokenType.INDEX): 3198 return None 3199 3200 index = self._parse_id_var() 3201 table = None 3202 3203 params = self._parse_index_params() 3204 3205 return self.expression( 3206 exp.Index, 3207 this=index, 3208 table=table, 3209 unique=unique, 3210 primary=primary, 3211 amp=amp, 3212 params=params, 3213 ) 3214 3215 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3216 hints: t.List[exp.Expression] = [] 3217 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3218 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3219 hints.append( 3220 self.expression( 3221 exp.WithTableHint, 3222 expressions=self._parse_csv( 3223 lambda: self._parse_function() or self._parse_var(any_token=True) 3224 ), 3225 ) 3226 ) 3227 self._match_r_paren() 3228 else: 3229 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3230 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3231 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3232 3233 self._match_texts(("INDEX", "KEY")) 3234 if self._match(TokenType.FOR): 3235 hint.set("target", self._advance_any() and self._prev.text.upper()) 3236 3237 hint.set("expressions", self._parse_wrapped_id_vars()) 3238 hints.append(hint) 3239 3240 return hints or None 3241 3242 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3243 return ( 3244 (not schema and self._parse_function(optional_parens=False)) 3245 or self._parse_id_var(any_token=False) 3246 or self._parse_string_as_identifier() 3247 or self._parse_placeholder() 3248 ) 3249 3250 def _parse_table_parts( 3251 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3252 ) -> exp.Table: 3253 catalog = None 3254 db = None 3255 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3256 3257 while self._match(TokenType.DOT): 3258 if catalog: 3259 # This allows nesting the table in arbitrarily many dot expressions if needed 3260 table = self.expression( 3261 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3262 ) 3263 else: 3264 catalog = db 3265 db = table 3266 # "" used for tsql FROM a..b case 3267 table = self._parse_table_part(schema=schema) or "" 3268 3269 if ( 3270 wildcard 3271 and self._is_connected() 3272 and (isinstance(table, exp.Identifier) or not table) 3273 and self._match(TokenType.STAR) 3274 ): 3275 if isinstance(table, exp.Identifier): 3276 table.args["this"] += "*" 3277 else: 3278 table = exp.Identifier(this="*") 3279 3280 # We bubble up comments from the Identifier to the Table 3281 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3282 3283 if is_db_reference: 3284 catalog = db 3285 db = table 3286 table = None 3287 3288 if not table and not is_db_reference: 3289 self.raise_error(f"Expected table name but got {self._curr}") 3290 if not db and is_db_reference: 3291 self.raise_error(f"Expected database name but got {self._curr}") 3292 3293 return self.expression( 3294 exp.Table, 3295 comments=comments, 3296 this=table, 3297 db=db, 3298 catalog=catalog, 3299 pivots=self._parse_pivots(), 3300 ) 3301 3302 def _parse_table( 3303 self, 3304 schema: bool = False, 3305 joins: bool = False, 3306 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3307 parse_bracket: bool = False, 3308 is_db_reference: bool = False, 3309 parse_partition: bool = False, 3310 ) -> t.Optional[exp.Expression]: 3311 lateral = self._parse_lateral() 3312 if lateral: 3313 return lateral 3314 3315 unnest = self._parse_unnest() 3316 if unnest: 3317 return unnest 3318 3319 values = self._parse_derived_table_values() 3320 if values: 3321 return values 3322 3323 subquery = self._parse_select(table=True) 3324 if subquery: 3325 if not subquery.args.get("pivots"): 3326 subquery.set("pivots", self._parse_pivots()) 3327 return subquery 3328 3329 bracket = parse_bracket and self._parse_bracket(None) 3330 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3331 3332 only = self._match(TokenType.ONLY) 3333 3334 this = t.cast( 3335 exp.Expression, 3336 bracket 3337 or self._parse_bracket( 3338 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3339 ), 3340 ) 3341 3342 if only: 3343 this.set("only", only) 3344 3345 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3346 self._match_text_seq("*") 3347 3348 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3349 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3350 this.set("partition", self._parse_partition()) 3351 3352 if schema: 3353 return self._parse_schema(this=this) 3354 3355 version = self._parse_version() 3356 3357 if version: 3358 this.set("version", version) 3359 3360 if self.dialect.ALIAS_POST_TABLESAMPLE: 3361 table_sample = self._parse_table_sample() 3362 3363 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3364 if alias: 3365 this.set("alias", alias) 3366 3367 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3368 return self.expression( 3369 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3370 ) 3371 3372 this.set("hints", self._parse_table_hints()) 3373 3374 if not this.args.get("pivots"): 3375 this.set("pivots", self._parse_pivots()) 3376 3377 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3378 table_sample = self._parse_table_sample() 3379 3380 if table_sample: 3381 table_sample.set("this", this) 3382 this = table_sample 3383 3384 if joins: 3385 for join in self._parse_joins(): 3386 this.append("joins", join) 3387 3388 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3389 this.set("ordinality", True) 3390 this.set("alias", self._parse_table_alias()) 3391 3392 return this 3393 3394 def _parse_version(self) -> t.Optional[exp.Version]: 3395 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3396 this = "TIMESTAMP" 3397 elif self._match(TokenType.VERSION_SNAPSHOT): 3398 this = "VERSION" 3399 else: 3400 return None 3401 3402 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3403 kind = self._prev.text.upper() 3404 start = self._parse_bitwise() 3405 self._match_texts(("TO", "AND")) 3406 end = self._parse_bitwise() 3407 expression: t.Optional[exp.Expression] = self.expression( 3408 exp.Tuple, expressions=[start, end] 3409 ) 3410 elif self._match_text_seq("CONTAINED", "IN"): 3411 kind = "CONTAINED IN" 3412 expression = self.expression( 3413 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3414 ) 3415 elif self._match(TokenType.ALL): 3416 kind = "ALL" 3417 expression = None 3418 else: 3419 self._match_text_seq("AS", "OF") 3420 kind = "AS OF" 3421 expression = self._parse_type() 3422 3423 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3424 3425 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3426 if not self._match(TokenType.UNNEST): 3427 return None 3428 3429 expressions = self._parse_wrapped_csv(self._parse_equality) 3430 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3431 3432 alias = self._parse_table_alias() if with_alias else None 3433 3434 if alias: 3435 if self.dialect.UNNEST_COLUMN_ONLY: 3436 if alias.args.get("columns"): 3437 self.raise_error("Unexpected extra column alias in unnest.") 3438 3439 alias.set("columns", [alias.this]) 3440 alias.set("this", None) 3441 3442 columns = alias.args.get("columns") or [] 3443 if offset and len(expressions) < len(columns): 3444 offset = columns.pop() 3445 3446 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3447 self._match(TokenType.ALIAS) 3448 offset = self._parse_id_var( 3449 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3450 ) or exp.to_identifier("offset") 3451 3452 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3453 3454 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3455 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3456 if not is_derived and not self._match_text_seq("VALUES"): 3457 return None 3458 3459 expressions = self._parse_csv(self._parse_value) 3460 alias = self._parse_table_alias() 3461 3462 if is_derived: 3463 self._match_r_paren() 3464 3465 return self.expression( 3466 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3467 ) 3468 3469 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3470 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3471 as_modifier and self._match_text_seq("USING", "SAMPLE") 3472 ): 3473 return None 3474 3475 bucket_numerator = None 3476 bucket_denominator = None 3477 bucket_field = None 3478 percent = None 3479 size = None 3480 seed = None 3481 3482 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3483 matched_l_paren = self._match(TokenType.L_PAREN) 3484 3485 if self.TABLESAMPLE_CSV: 3486 num = None 3487 expressions = self._parse_csv(self._parse_primary) 3488 else: 3489 expressions = None 3490 num = ( 3491 self._parse_factor() 3492 if self._match(TokenType.NUMBER, advance=False) 3493 else self._parse_primary() or self._parse_placeholder() 3494 ) 3495 3496 if self._match_text_seq("BUCKET"): 3497 bucket_numerator = self._parse_number() 3498 self._match_text_seq("OUT", "OF") 3499 bucket_denominator = bucket_denominator = self._parse_number() 3500 self._match(TokenType.ON) 3501 bucket_field = self._parse_field() 3502 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3503 percent = num 3504 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3505 size = num 3506 else: 3507 percent = num 3508 3509 if matched_l_paren: 3510 self._match_r_paren() 3511 3512 if self._match(TokenType.L_PAREN): 3513 method = self._parse_var(upper=True) 3514 seed = self._match(TokenType.COMMA) and self._parse_number() 3515 self._match_r_paren() 3516 elif self._match_texts(("SEED", "REPEATABLE")): 3517 seed = self._parse_wrapped(self._parse_number) 3518 3519 if not method and self.DEFAULT_SAMPLING_METHOD: 3520 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3521 3522 return self.expression( 3523 exp.TableSample, 3524 expressions=expressions, 3525 method=method, 3526 bucket_numerator=bucket_numerator, 3527 bucket_denominator=bucket_denominator, 3528 bucket_field=bucket_field, 3529 percent=percent, 3530 size=size, 3531 seed=seed, 3532 ) 3533 3534 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3535 return list(iter(self._parse_pivot, None)) or None 3536 3537 def _parse_joins(self) -> t.Iterator[exp.Join]: 3538 return iter(self._parse_join, None) 3539 3540 # https://duckdb.org/docs/sql/statements/pivot 3541 def _parse_simplified_pivot(self) -> exp.Pivot: 3542 def _parse_on() -> t.Optional[exp.Expression]: 3543 this = self._parse_bitwise() 3544 return self._parse_in(this) if self._match(TokenType.IN) else this 3545 3546 this = self._parse_table() 3547 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3548 using = self._match(TokenType.USING) and self._parse_csv( 3549 lambda: self._parse_alias(self._parse_function()) 3550 ) 3551 group = self._parse_group() 3552 return self.expression( 3553 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3554 ) 3555 3556 def _parse_pivot_in(self) -> exp.In: 3557 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3558 this = self._parse_conjunction() 3559 3560 self._match(TokenType.ALIAS) 3561 alias = self._parse_field() 3562 if alias: 3563 return self.expression(exp.PivotAlias, this=this, alias=alias) 3564 3565 return this 3566 3567 value = self._parse_column() 3568 3569 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3570 self.raise_error("Expecting IN (") 3571 3572 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3573 3574 self._match_r_paren() 3575 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3576 3577 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3578 index = self._index 3579 include_nulls = None 3580 3581 if self._match(TokenType.PIVOT): 3582 unpivot = False 3583 elif self._match(TokenType.UNPIVOT): 3584 unpivot = True 3585 3586 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3587 if self._match_text_seq("INCLUDE", "NULLS"): 3588 include_nulls = True 3589 elif self._match_text_seq("EXCLUDE", "NULLS"): 3590 include_nulls = False 3591 else: 3592 return None 3593 3594 expressions = [] 3595 3596 if not self._match(TokenType.L_PAREN): 3597 self._retreat(index) 3598 return None 3599 3600 if unpivot: 3601 expressions = self._parse_csv(self._parse_column) 3602 else: 3603 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3604 3605 if not expressions: 3606 self.raise_error("Failed to parse PIVOT's aggregation list") 3607 3608 if not self._match(TokenType.FOR): 3609 self.raise_error("Expecting FOR") 3610 3611 field = self._parse_pivot_in() 3612 3613 self._match_r_paren() 3614 3615 pivot = self.expression( 3616 exp.Pivot, 3617 expressions=expressions, 3618 field=field, 3619 unpivot=unpivot, 3620 include_nulls=include_nulls, 3621 ) 3622 3623 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3624 pivot.set("alias", self._parse_table_alias()) 3625 3626 if not unpivot: 3627 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3628 3629 columns: t.List[exp.Expression] = [] 3630 for fld in pivot.args["field"].expressions: 3631 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3632 for name in names: 3633 if self.PREFIXED_PIVOT_COLUMNS: 3634 name = f"{name}_{field_name}" if name else field_name 3635 else: 3636 name = f"{field_name}_{name}" if name else field_name 3637 3638 columns.append(exp.to_identifier(name)) 3639 3640 pivot.set("columns", columns) 3641 3642 return pivot 3643 3644 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3645 return [agg.alias for agg in aggregations] 3646 3647 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3648 if not skip_where_token and not self._match(TokenType.PREWHERE): 3649 return None 3650 3651 return self.expression( 3652 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3653 ) 3654 3655 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3656 if not skip_where_token and not self._match(TokenType.WHERE): 3657 return None 3658 3659 return self.expression( 3660 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3661 ) 3662 3663 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3664 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3665 return None 3666 3667 elements: t.Dict[str, t.Any] = defaultdict(list) 3668 3669 if self._match(TokenType.ALL): 3670 elements["all"] = True 3671 elif self._match(TokenType.DISTINCT): 3672 elements["all"] = False 3673 3674 while True: 3675 expressions = self._parse_csv( 3676 lambda: None 3677 if self._match(TokenType.ROLLUP, advance=False) 3678 else self._parse_conjunction() 3679 ) 3680 if expressions: 3681 elements["expressions"].extend(expressions) 3682 3683 grouping_sets = self._parse_grouping_sets() 3684 if grouping_sets: 3685 elements["grouping_sets"].extend(grouping_sets) 3686 3687 rollup = None 3688 cube = None 3689 totals = None 3690 3691 index = self._index 3692 with_ = self._match(TokenType.WITH) 3693 if self._match(TokenType.ROLLUP): 3694 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3695 elements["rollup"].extend(ensure_list(rollup)) 3696 3697 if self._match(TokenType.CUBE): 3698 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3699 elements["cube"].extend(ensure_list(cube)) 3700 3701 if self._match_text_seq("TOTALS"): 3702 totals = True 3703 elements["totals"] = True # type: ignore 3704 3705 if not (grouping_sets or rollup or cube or totals): 3706 if with_: 3707 self._retreat(index) 3708 break 3709 3710 return self.expression(exp.Group, **elements) # type: ignore 3711 3712 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3713 if not self._match(TokenType.GROUPING_SETS): 3714 return None 3715 3716 return self._parse_wrapped_csv(self._parse_grouping_set) 3717 3718 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3719 if self._match(TokenType.L_PAREN): 3720 grouping_set = self._parse_csv(self._parse_column) 3721 self._match_r_paren() 3722 return self.expression(exp.Tuple, expressions=grouping_set) 3723 3724 return self._parse_column() 3725 3726 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3727 if not skip_having_token and not self._match(TokenType.HAVING): 3728 return None 3729 return self.expression(exp.Having, this=self._parse_conjunction()) 3730 3731 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3732 if not self._match(TokenType.QUALIFY): 3733 return None 3734 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3735 3736 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3737 if skip_start_token: 3738 start = None 3739 elif self._match(TokenType.START_WITH): 3740 start = self._parse_conjunction() 3741 else: 3742 return None 3743 3744 self._match(TokenType.CONNECT_BY) 3745 nocycle = self._match_text_seq("NOCYCLE") 3746 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3747 exp.Prior, this=self._parse_bitwise() 3748 ) 3749 connect = self._parse_conjunction() 3750 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3751 3752 if not start and self._match(TokenType.START_WITH): 3753 start = self._parse_conjunction() 3754 3755 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3756 3757 def _parse_name_as_expression(self) -> exp.Alias: 3758 return self.expression( 3759 exp.Alias, 3760 alias=self._parse_id_var(any_token=True), 3761 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3762 ) 3763 3764 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3765 if self._match_text_seq("INTERPOLATE"): 3766 return self._parse_wrapped_csv(self._parse_name_as_expression) 3767 return None 3768 3769 def _parse_order( 3770 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3771 ) -> t.Optional[exp.Expression]: 3772 siblings = None 3773 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3774 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3775 return this 3776 3777 siblings = True 3778 3779 return self.expression( 3780 exp.Order, 3781 this=this, 3782 expressions=self._parse_csv(self._parse_ordered), 3783 interpolate=self._parse_interpolate(), 3784 siblings=siblings, 3785 ) 3786 3787 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3788 if not self._match(token): 3789 return None 3790 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3791 3792 def _parse_ordered( 3793 self, parse_method: t.Optional[t.Callable] = None 3794 ) -> t.Optional[exp.Ordered]: 3795 this = parse_method() if parse_method else self._parse_conjunction() 3796 if not this: 3797 return None 3798 3799 asc = self._match(TokenType.ASC) 3800 desc = self._match(TokenType.DESC) or (asc and False) 3801 3802 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3803 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3804 3805 nulls_first = is_nulls_first or False 3806 explicitly_null_ordered = is_nulls_first or is_nulls_last 3807 3808 if ( 3809 not explicitly_null_ordered 3810 and ( 3811 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3812 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3813 ) 3814 and self.dialect.NULL_ORDERING != "nulls_are_last" 3815 ): 3816 nulls_first = True 3817 3818 if self._match_text_seq("WITH", "FILL"): 3819 with_fill = self.expression( 3820 exp.WithFill, 3821 **{ # type: ignore 3822 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3823 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3824 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3825 }, 3826 ) 3827 else: 3828 with_fill = None 3829 3830 return self.expression( 3831 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3832 ) 3833 3834 def _parse_limit( 3835 self, 3836 this: t.Optional[exp.Expression] = None, 3837 top: bool = False, 3838 skip_limit_token: bool = False, 3839 ) -> t.Optional[exp.Expression]: 3840 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3841 comments = self._prev_comments 3842 if top: 3843 limit_paren = self._match(TokenType.L_PAREN) 3844 expression = self._parse_term() if limit_paren else self._parse_number() 3845 3846 if limit_paren: 3847 self._match_r_paren() 3848 else: 3849 expression = self._parse_term() 3850 3851 if self._match(TokenType.COMMA): 3852 offset = expression 3853 expression = self._parse_term() 3854 else: 3855 offset = None 3856 3857 limit_exp = self.expression( 3858 exp.Limit, 3859 this=this, 3860 expression=expression, 3861 offset=offset, 3862 comments=comments, 3863 expressions=self._parse_limit_by(), 3864 ) 3865 3866 return limit_exp 3867 3868 if self._match(TokenType.FETCH): 3869 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3870 direction = self._prev.text.upper() if direction else "FIRST" 3871 3872 count = self._parse_field(tokens=self.FETCH_TOKENS) 3873 percent = self._match(TokenType.PERCENT) 3874 3875 self._match_set((TokenType.ROW, TokenType.ROWS)) 3876 3877 only = self._match_text_seq("ONLY") 3878 with_ties = self._match_text_seq("WITH", "TIES") 3879 3880 if only and with_ties: 3881 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3882 3883 return self.expression( 3884 exp.Fetch, 3885 direction=direction, 3886 count=count, 3887 percent=percent, 3888 with_ties=with_ties, 3889 ) 3890 3891 return this 3892 3893 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3894 if not self._match(TokenType.OFFSET): 3895 return this 3896 3897 count = self._parse_term() 3898 self._match_set((TokenType.ROW, TokenType.ROWS)) 3899 3900 return self.expression( 3901 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3902 ) 3903 3904 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3905 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3906 3907 def _parse_locks(self) -> t.List[exp.Lock]: 3908 locks = [] 3909 while True: 3910 if self._match_text_seq("FOR", "UPDATE"): 3911 update = True 3912 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3913 "LOCK", "IN", "SHARE", "MODE" 3914 ): 3915 update = False 3916 else: 3917 break 3918 3919 expressions = None 3920 if self._match_text_seq("OF"): 3921 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3922 3923 wait: t.Optional[bool | exp.Expression] = None 3924 if self._match_text_seq("NOWAIT"): 3925 wait = True 3926 elif self._match_text_seq("WAIT"): 3927 wait = self._parse_primary() 3928 elif self._match_text_seq("SKIP", "LOCKED"): 3929 wait = False 3930 3931 locks.append( 3932 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3933 ) 3934 3935 return locks 3936 3937 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3938 while this and self._match_set(self.SET_OPERATIONS): 3939 token_type = self._prev.token_type 3940 3941 if token_type == TokenType.UNION: 3942 operation = exp.Union 3943 elif token_type == TokenType.EXCEPT: 3944 operation = exp.Except 3945 else: 3946 operation = exp.Intersect 3947 3948 comments = self._prev.comments 3949 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3950 by_name = self._match_text_seq("BY", "NAME") 3951 expression = self._parse_select(nested=True, parse_set_operation=False) 3952 3953 this = self.expression( 3954 operation, 3955 comments=comments, 3956 this=this, 3957 distinct=distinct, 3958 by_name=by_name, 3959 expression=expression, 3960 ) 3961 3962 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3963 expression = this.expression 3964 3965 if expression: 3966 for arg in self.UNION_MODIFIERS: 3967 expr = expression.args.get(arg) 3968 if expr: 3969 this.set(arg, expr.pop()) 3970 3971 return this 3972 3973 def _parse_expression(self) -> t.Optional[exp.Expression]: 3974 return self._parse_alias(self._parse_conjunction()) 3975 3976 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3977 this = self._parse_equality() 3978 3979 if self._match(TokenType.COLON_EQ): 3980 this = self.expression( 3981 exp.PropertyEQ, 3982 this=this, 3983 comments=self._prev_comments, 3984 expression=self._parse_conjunction(), 3985 ) 3986 3987 while self._match_set(self.CONJUNCTION): 3988 this = self.expression( 3989 self.CONJUNCTION[self._prev.token_type], 3990 this=this, 3991 comments=self._prev_comments, 3992 expression=self._parse_equality(), 3993 ) 3994 return this 3995 3996 def _parse_equality(self) -> t.Optional[exp.Expression]: 3997 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3998 3999 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4000 return self._parse_tokens(self._parse_range, self.COMPARISON) 4001 4002 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4003 this = this or self._parse_bitwise() 4004 negate = self._match(TokenType.NOT) 4005 4006 if self._match_set(self.RANGE_PARSERS): 4007 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4008 if not expression: 4009 return this 4010 4011 this = expression 4012 elif self._match(TokenType.ISNULL): 4013 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4014 4015 # Postgres supports ISNULL and NOTNULL for conditions. 4016 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4017 if self._match(TokenType.NOTNULL): 4018 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4019 this = self.expression(exp.Not, this=this) 4020 4021 if negate: 4022 this = self.expression(exp.Not, this=this) 4023 4024 if self._match(TokenType.IS): 4025 this = self._parse_is(this) 4026 4027 return this 4028 4029 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4030 index = self._index - 1 4031 negate = self._match(TokenType.NOT) 4032 4033 if self._match_text_seq("DISTINCT", "FROM"): 4034 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4035 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4036 4037 expression = self._parse_null() or self._parse_boolean() 4038 if not expression: 4039 self._retreat(index) 4040 return None 4041 4042 this = self.expression(exp.Is, this=this, expression=expression) 4043 return self.expression(exp.Not, this=this) if negate else this 4044 4045 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4046 unnest = self._parse_unnest(with_alias=False) 4047 if unnest: 4048 this = self.expression(exp.In, this=this, unnest=unnest) 4049 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4050 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4051 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4052 4053 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4054 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4055 else: 4056 this = self.expression(exp.In, this=this, expressions=expressions) 4057 4058 if matched_l_paren: 4059 self._match_r_paren(this) 4060 elif not self._match(TokenType.R_BRACKET, expression=this): 4061 self.raise_error("Expecting ]") 4062 else: 4063 this = self.expression(exp.In, this=this, field=self._parse_field()) 4064 4065 return this 4066 4067 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4068 low = self._parse_bitwise() 4069 self._match(TokenType.AND) 4070 high = self._parse_bitwise() 4071 return self.expression(exp.Between, this=this, low=low, high=high) 4072 4073 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4074 if not self._match(TokenType.ESCAPE): 4075 return this 4076 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4077 4078 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4079 index = self._index 4080 4081 if not self._match(TokenType.INTERVAL) and match_interval: 4082 return None 4083 4084 if self._match(TokenType.STRING, advance=False): 4085 this = self._parse_primary() 4086 else: 4087 this = self._parse_term() 4088 4089 if not this or ( 4090 isinstance(this, exp.Column) 4091 and not this.table 4092 and not this.this.quoted 4093 and this.name.upper() == "IS" 4094 ): 4095 self._retreat(index) 4096 return None 4097 4098 unit = self._parse_function() or ( 4099 not self._match(TokenType.ALIAS, advance=False) 4100 and self._parse_var(any_token=True, upper=True) 4101 ) 4102 4103 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4104 # each INTERVAL expression into this canonical form so it's easy to transpile 4105 if this and this.is_number: 4106 this = exp.Literal.string(this.name) 4107 elif this and this.is_string: 4108 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4109 if len(parts) == 1: 4110 if unit: 4111 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4112 self._retreat(self._index - 1) 4113 4114 this = exp.Literal.string(parts[0][0]) 4115 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4116 4117 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4118 unit = self.expression( 4119 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4120 ) 4121 4122 interval = self.expression(exp.Interval, this=this, unit=unit) 4123 4124 index = self._index 4125 self._match(TokenType.PLUS) 4126 4127 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4128 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4129 return self.expression( 4130 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4131 ) 4132 4133 self._retreat(index) 4134 return interval 4135 4136 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4137 this = self._parse_term() 4138 4139 while True: 4140 if self._match_set(self.BITWISE): 4141 this = self.expression( 4142 self.BITWISE[self._prev.token_type], 4143 this=this, 4144 expression=self._parse_term(), 4145 ) 4146 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4147 this = self.expression( 4148 exp.DPipe, 4149 this=this, 4150 expression=self._parse_term(), 4151 safe=not self.dialect.STRICT_STRING_CONCAT, 4152 ) 4153 elif self._match(TokenType.DQMARK): 4154 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4155 elif self._match_pair(TokenType.LT, TokenType.LT): 4156 this = self.expression( 4157 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4158 ) 4159 elif self._match_pair(TokenType.GT, TokenType.GT): 4160 this = self.expression( 4161 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4162 ) 4163 else: 4164 break 4165 4166 return this 4167 4168 def _parse_term(self) -> t.Optional[exp.Expression]: 4169 return self._parse_tokens(self._parse_factor, self.TERM) 4170 4171 def _parse_factor(self) -> t.Optional[exp.Expression]: 4172 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4173 this = parse_method() 4174 4175 while self._match_set(self.FACTOR): 4176 this = self.expression( 4177 self.FACTOR[self._prev.token_type], 4178 this=this, 4179 comments=self._prev_comments, 4180 expression=parse_method(), 4181 ) 4182 if isinstance(this, exp.Div): 4183 this.args["typed"] = self.dialect.TYPED_DIVISION 4184 this.args["safe"] = self.dialect.SAFE_DIVISION 4185 4186 return this 4187 4188 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4189 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4190 4191 def _parse_unary(self) -> t.Optional[exp.Expression]: 4192 if self._match_set(self.UNARY_PARSERS): 4193 return self.UNARY_PARSERS[self._prev.token_type](self) 4194 return self._parse_at_time_zone(self._parse_type()) 4195 4196 def _parse_type( 4197 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4198 ) -> t.Optional[exp.Expression]: 4199 interval = parse_interval and self._parse_interval() 4200 if interval: 4201 return interval 4202 4203 index = self._index 4204 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4205 4206 if data_type: 4207 index2 = self._index 4208 this = self._parse_primary() 4209 4210 if isinstance(this, exp.Literal): 4211 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4212 if parser: 4213 return parser(self, this, data_type) 4214 4215 return self.expression(exp.Cast, this=this, to=data_type) 4216 4217 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4218 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4219 # 4220 # If the index difference here is greater than 1, that means the parser itself must have 4221 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4222 # 4223 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4224 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4225 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4226 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4227 # 4228 # In these cases, we don't really want to return the converted type, but instead retreat 4229 # and try to parse a Column or Identifier in the section below. 4230 if data_type.expressions and index2 - index > 1: 4231 self._retreat(index2) 4232 return self._parse_column_ops(data_type) 4233 4234 self._retreat(index) 4235 4236 if fallback_to_identifier: 4237 return self._parse_id_var() 4238 4239 this = self._parse_column() 4240 return this and self._parse_column_ops(this) 4241 4242 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4243 this = self._parse_type() 4244 if not this: 4245 return None 4246 4247 if isinstance(this, exp.Column) and not this.table: 4248 this = exp.var(this.name.upper()) 4249 4250 return self.expression( 4251 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4252 ) 4253 4254 def _parse_types( 4255 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4256 ) -> t.Optional[exp.Expression]: 4257 index = self._index 4258 4259 this: t.Optional[exp.Expression] = None 4260 prefix = self._match_text_seq("SYSUDTLIB", ".") 4261 4262 if not self._match_set(self.TYPE_TOKENS): 4263 identifier = allow_identifiers and self._parse_id_var( 4264 any_token=False, tokens=(TokenType.VAR,) 4265 ) 4266 if identifier: 4267 tokens = self.dialect.tokenize(identifier.name) 4268 4269 if len(tokens) != 1: 4270 self.raise_error("Unexpected identifier", self._prev) 4271 4272 if tokens[0].token_type in self.TYPE_TOKENS: 4273 self._prev = tokens[0] 4274 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4275 type_name = identifier.name 4276 4277 while self._match(TokenType.DOT): 4278 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4279 4280 this = exp.DataType.build(type_name, udt=True) 4281 else: 4282 self._retreat(self._index - 1) 4283 return None 4284 else: 4285 return None 4286 4287 type_token = self._prev.token_type 4288 4289 if type_token == TokenType.PSEUDO_TYPE: 4290 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4291 4292 if type_token == TokenType.OBJECT_IDENTIFIER: 4293 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4294 4295 nested = type_token in self.NESTED_TYPE_TOKENS 4296 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4297 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4298 expressions = None 4299 maybe_func = False 4300 4301 if self._match(TokenType.L_PAREN): 4302 if is_struct: 4303 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4304 elif nested: 4305 expressions = self._parse_csv( 4306 lambda: self._parse_types( 4307 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4308 ) 4309 ) 4310 elif type_token in self.ENUM_TYPE_TOKENS: 4311 expressions = self._parse_csv(self._parse_equality) 4312 elif is_aggregate: 4313 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4314 any_token=False, tokens=(TokenType.VAR,) 4315 ) 4316 if not func_or_ident or not self._match(TokenType.COMMA): 4317 return None 4318 expressions = self._parse_csv( 4319 lambda: self._parse_types( 4320 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4321 ) 4322 ) 4323 expressions.insert(0, func_or_ident) 4324 else: 4325 expressions = self._parse_csv(self._parse_type_size) 4326 4327 if not expressions or not self._match(TokenType.R_PAREN): 4328 self._retreat(index) 4329 return None 4330 4331 maybe_func = True 4332 4333 values: t.Optional[t.List[exp.Expression]] = None 4334 4335 if nested and self._match(TokenType.LT): 4336 if is_struct: 4337 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4338 else: 4339 expressions = self._parse_csv( 4340 lambda: self._parse_types( 4341 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4342 ) 4343 ) 4344 4345 if not self._match(TokenType.GT): 4346 self.raise_error("Expecting >") 4347 4348 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4349 values = self._parse_csv(self._parse_conjunction) 4350 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4351 4352 if type_token in self.TIMESTAMPS: 4353 if self._match_text_seq("WITH", "TIME", "ZONE"): 4354 maybe_func = False 4355 tz_type = ( 4356 exp.DataType.Type.TIMETZ 4357 if type_token in self.TIMES 4358 else exp.DataType.Type.TIMESTAMPTZ 4359 ) 4360 this = exp.DataType(this=tz_type, expressions=expressions) 4361 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4362 maybe_func = False 4363 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4364 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4365 maybe_func = False 4366 elif type_token == TokenType.INTERVAL: 4367 unit = self._parse_var(upper=True) 4368 if unit: 4369 if self._match_text_seq("TO"): 4370 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4371 4372 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4373 else: 4374 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4375 4376 if maybe_func and check_func: 4377 index2 = self._index 4378 peek = self._parse_string() 4379 4380 if not peek: 4381 self._retreat(index) 4382 return None 4383 4384 self._retreat(index2) 4385 4386 if not this: 4387 if self._match_text_seq("UNSIGNED"): 4388 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4389 if not unsigned_type_token: 4390 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4391 4392 type_token = unsigned_type_token or type_token 4393 4394 this = exp.DataType( 4395 this=exp.DataType.Type[type_token.value], 4396 expressions=expressions, 4397 nested=nested, 4398 values=values, 4399 prefix=prefix, 4400 ) 4401 elif expressions: 4402 this.set("expressions", expressions) 4403 4404 index = self._index 4405 4406 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4407 matched_array = self._match(TokenType.ARRAY) 4408 4409 while self._curr: 4410 matched_l_bracket = self._match(TokenType.L_BRACKET) 4411 if not matched_l_bracket and not matched_array: 4412 break 4413 4414 matched_array = False 4415 values = self._parse_csv(self._parse_conjunction) or None 4416 if values and not schema: 4417 self._retreat(index) 4418 break 4419 4420 this = exp.DataType( 4421 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4422 ) 4423 self._match(TokenType.R_BRACKET) 4424 4425 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4426 converter = self.TYPE_CONVERTER.get(this.this) 4427 if converter: 4428 this = converter(t.cast(exp.DataType, this)) 4429 4430 return this 4431 4432 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4433 index = self._index 4434 this = ( 4435 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4436 or self._parse_id_var() 4437 ) 4438 self._match(TokenType.COLON) 4439 4440 if ( 4441 type_required 4442 and not isinstance(this, exp.DataType) 4443 and not self._match_set(self.TYPE_TOKENS, advance=False) 4444 ): 4445 self._retreat(index) 4446 return self._parse_types() 4447 4448 return self._parse_column_def(this) 4449 4450 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4451 if not self._match_text_seq("AT", "TIME", "ZONE"): 4452 return this 4453 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4454 4455 def _parse_column(self) -> t.Optional[exp.Expression]: 4456 this = self._parse_column_reference() 4457 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4458 4459 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4460 this = self._parse_field() 4461 if ( 4462 not this 4463 and self._match(TokenType.VALUES, advance=False) 4464 and self.VALUES_FOLLOWED_BY_PAREN 4465 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4466 ): 4467 this = self._parse_id_var() 4468 4469 if isinstance(this, exp.Identifier): 4470 # We bubble up comments from the Identifier to the Column 4471 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4472 4473 return this 4474 4475 def _parse_colon_as_json_extract( 4476 self, this: t.Optional[exp.Expression] 4477 ) -> t.Optional[exp.Expression]: 4478 casts = [] 4479 json_path = [] 4480 4481 while self._match(TokenType.COLON): 4482 start_index = self._index 4483 path = self._parse_column_ops(self._parse_field(any_token=True)) 4484 4485 # The cast :: operator has a lower precedence than the extraction operator :, so 4486 # we rearrange the AST appropriately to avoid casting the JSON path 4487 while isinstance(path, exp.Cast): 4488 casts.append(path.to) 4489 path = path.this 4490 4491 if casts: 4492 dcolon_offset = next( 4493 i 4494 for i, t in enumerate(self._tokens[start_index:]) 4495 if t.token_type == TokenType.DCOLON 4496 ) 4497 end_token = self._tokens[start_index + dcolon_offset - 1] 4498 else: 4499 end_token = self._prev 4500 4501 if path: 4502 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4503 4504 if json_path: 4505 this = self.expression( 4506 exp.JSONExtract, 4507 this=this, 4508 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4509 ) 4510 4511 while casts: 4512 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4513 4514 return this 4515 4516 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4517 this = self._parse_bracket(this) 4518 4519 while self._match_set(self.COLUMN_OPERATORS): 4520 op_token = self._prev.token_type 4521 op = self.COLUMN_OPERATORS.get(op_token) 4522 4523 if op_token == TokenType.DCOLON: 4524 field = self._parse_types() 4525 if not field: 4526 self.raise_error("Expected type") 4527 elif op and self._curr: 4528 field = self._parse_column_reference() 4529 else: 4530 field = self._parse_field(any_token=True, anonymous_func=True) 4531 4532 if isinstance(field, exp.Func) and this: 4533 # bigquery allows function calls like x.y.count(...) 4534 # SAFE.SUBSTR(...) 4535 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4536 this = exp.replace_tree( 4537 this, 4538 lambda n: ( 4539 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4540 if n.table 4541 else n.this 4542 ) 4543 if isinstance(n, exp.Column) 4544 else n, 4545 ) 4546 4547 if op: 4548 this = op(self, this, field) 4549 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4550 this = self.expression( 4551 exp.Column, 4552 this=field, 4553 table=this.this, 4554 db=this.args.get("table"), 4555 catalog=this.args.get("db"), 4556 ) 4557 else: 4558 this = self.expression(exp.Dot, this=this, expression=field) 4559 4560 this = self._parse_bracket(this) 4561 4562 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4563 4564 def _parse_primary(self) -> t.Optional[exp.Expression]: 4565 if self._match_set(self.PRIMARY_PARSERS): 4566 token_type = self._prev.token_type 4567 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4568 4569 if token_type == TokenType.STRING: 4570 expressions = [primary] 4571 while self._match(TokenType.STRING): 4572 expressions.append(exp.Literal.string(self._prev.text)) 4573 4574 if len(expressions) > 1: 4575 return self.expression(exp.Concat, expressions=expressions) 4576 4577 return primary 4578 4579 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4580 return exp.Literal.number(f"0.{self._prev.text}") 4581 4582 if self._match(TokenType.L_PAREN): 4583 comments = self._prev_comments 4584 query = self._parse_select() 4585 4586 if query: 4587 expressions = [query] 4588 else: 4589 expressions = self._parse_expressions() 4590 4591 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4592 4593 if not this and self._match(TokenType.R_PAREN, advance=False): 4594 this = self.expression(exp.Tuple) 4595 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4596 this = self._parse_subquery(this=this, parse_alias=False) 4597 elif isinstance(this, exp.Subquery): 4598 this = self._parse_subquery( 4599 this=self._parse_set_operations(this), parse_alias=False 4600 ) 4601 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4602 this = self.expression(exp.Tuple, expressions=expressions) 4603 else: 4604 this = self.expression(exp.Paren, this=this) 4605 4606 if this: 4607 this.add_comments(comments) 4608 4609 self._match_r_paren(expression=this) 4610 return this 4611 4612 return None 4613 4614 def _parse_field( 4615 self, 4616 any_token: bool = False, 4617 tokens: t.Optional[t.Collection[TokenType]] = None, 4618 anonymous_func: bool = False, 4619 ) -> t.Optional[exp.Expression]: 4620 if anonymous_func: 4621 field = ( 4622 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4623 or self._parse_primary() 4624 ) 4625 else: 4626 field = self._parse_primary() or self._parse_function( 4627 anonymous=anonymous_func, any_token=any_token 4628 ) 4629 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4630 4631 def _parse_function( 4632 self, 4633 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4634 anonymous: bool = False, 4635 optional_parens: bool = True, 4636 any_token: bool = False, 4637 ) -> t.Optional[exp.Expression]: 4638 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4639 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4640 fn_syntax = False 4641 if ( 4642 self._match(TokenType.L_BRACE, advance=False) 4643 and self._next 4644 and self._next.text.upper() == "FN" 4645 ): 4646 self._advance(2) 4647 fn_syntax = True 4648 4649 func = self._parse_function_call( 4650 functions=functions, 4651 anonymous=anonymous, 4652 optional_parens=optional_parens, 4653 any_token=any_token, 4654 ) 4655 4656 if fn_syntax: 4657 self._match(TokenType.R_BRACE) 4658 4659 return func 4660 4661 def _parse_function_call( 4662 self, 4663 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4664 anonymous: bool = False, 4665 optional_parens: bool = True, 4666 any_token: bool = False, 4667 ) -> t.Optional[exp.Expression]: 4668 if not self._curr: 4669 return None 4670 4671 comments = self._curr.comments 4672 token_type = self._curr.token_type 4673 this = self._curr.text 4674 upper = this.upper() 4675 4676 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4677 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4678 self._advance() 4679 return self._parse_window(parser(self)) 4680 4681 if not self._next or self._next.token_type != TokenType.L_PAREN: 4682 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4683 self._advance() 4684 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4685 4686 return None 4687 4688 if any_token: 4689 if token_type in self.RESERVED_TOKENS: 4690 return None 4691 elif token_type not in self.FUNC_TOKENS: 4692 return None 4693 4694 self._advance(2) 4695 4696 parser = self.FUNCTION_PARSERS.get(upper) 4697 if parser and not anonymous: 4698 this = parser(self) 4699 else: 4700 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4701 4702 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4703 this = self.expression(subquery_predicate, this=self._parse_select()) 4704 self._match_r_paren() 4705 return this 4706 4707 if functions is None: 4708 functions = self.FUNCTIONS 4709 4710 function = functions.get(upper) 4711 4712 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4713 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4714 4715 if alias: 4716 args = self._kv_to_prop_eq(args) 4717 4718 if function and not anonymous: 4719 if "dialect" in function.__code__.co_varnames: 4720 func = function(args, dialect=self.dialect) 4721 else: 4722 func = function(args) 4723 4724 func = self.validate_expression(func, args) 4725 if not self.dialect.NORMALIZE_FUNCTIONS: 4726 func.meta["name"] = this 4727 4728 this = func 4729 else: 4730 if token_type == TokenType.IDENTIFIER: 4731 this = exp.Identifier(this=this, quoted=True) 4732 this = self.expression(exp.Anonymous, this=this, expressions=args) 4733 4734 if isinstance(this, exp.Expression): 4735 this.add_comments(comments) 4736 4737 self._match_r_paren(this) 4738 return self._parse_window(this) 4739 4740 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4741 transformed = [] 4742 4743 for e in expressions: 4744 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4745 if isinstance(e, exp.Alias): 4746 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4747 4748 if not isinstance(e, exp.PropertyEQ): 4749 e = self.expression( 4750 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4751 ) 4752 4753 if isinstance(e.this, exp.Column): 4754 e.this.replace(e.this.this) 4755 4756 transformed.append(e) 4757 4758 return transformed 4759 4760 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4761 return self._parse_column_def(self._parse_id_var()) 4762 4763 def _parse_user_defined_function( 4764 self, kind: t.Optional[TokenType] = None 4765 ) -> t.Optional[exp.Expression]: 4766 this = self._parse_id_var() 4767 4768 while self._match(TokenType.DOT): 4769 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4770 4771 if not self._match(TokenType.L_PAREN): 4772 return this 4773 4774 expressions = self._parse_csv(self._parse_function_parameter) 4775 self._match_r_paren() 4776 return self.expression( 4777 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4778 ) 4779 4780 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4781 literal = self._parse_primary() 4782 if literal: 4783 return self.expression(exp.Introducer, this=token.text, expression=literal) 4784 4785 return self.expression(exp.Identifier, this=token.text) 4786 4787 def _parse_session_parameter(self) -> exp.SessionParameter: 4788 kind = None 4789 this = self._parse_id_var() or self._parse_primary() 4790 4791 if this and self._match(TokenType.DOT): 4792 kind = this.name 4793 this = self._parse_var() or self._parse_primary() 4794 4795 return self.expression(exp.SessionParameter, this=this, kind=kind) 4796 4797 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4798 return self._parse_id_var() 4799 4800 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4801 index = self._index 4802 4803 if self._match(TokenType.L_PAREN): 4804 expressions = t.cast( 4805 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4806 ) 4807 4808 if not self._match(TokenType.R_PAREN): 4809 self._retreat(index) 4810 else: 4811 expressions = [self._parse_lambda_arg()] 4812 4813 if self._match_set(self.LAMBDAS): 4814 return self.LAMBDAS[self._prev.token_type](self, expressions) 4815 4816 self._retreat(index) 4817 4818 this: t.Optional[exp.Expression] 4819 4820 if self._match(TokenType.DISTINCT): 4821 this = self.expression( 4822 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4823 ) 4824 else: 4825 this = self._parse_select_or_expression(alias=alias) 4826 4827 return self._parse_limit( 4828 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4829 ) 4830 4831 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4832 index = self._index 4833 if not self._match(TokenType.L_PAREN): 4834 return this 4835 4836 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4837 # expr can be of both types 4838 if self._match_set(self.SELECT_START_TOKENS): 4839 self._retreat(index) 4840 return this 4841 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4842 self._match_r_paren() 4843 return self.expression(exp.Schema, this=this, expressions=args) 4844 4845 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4846 return self._parse_column_def(self._parse_field(any_token=True)) 4847 4848 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4849 # column defs are not really columns, they're identifiers 4850 if isinstance(this, exp.Column): 4851 this = this.this 4852 4853 kind = self._parse_types(schema=True) 4854 4855 if self._match_text_seq("FOR", "ORDINALITY"): 4856 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4857 4858 constraints: t.List[exp.Expression] = [] 4859 4860 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4861 ("ALIAS", "MATERIALIZED") 4862 ): 4863 persisted = self._prev.text.upper() == "MATERIALIZED" 4864 constraints.append( 4865 self.expression( 4866 exp.ComputedColumnConstraint, 4867 this=self._parse_conjunction(), 4868 persisted=persisted or self._match_text_seq("PERSISTED"), 4869 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4870 ) 4871 ) 4872 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4873 self._match(TokenType.ALIAS) 4874 constraints.append( 4875 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4876 ) 4877 4878 while True: 4879 constraint = self._parse_column_constraint() 4880 if not constraint: 4881 break 4882 constraints.append(constraint) 4883 4884 if not kind and not constraints: 4885 return this 4886 4887 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4888 4889 def _parse_auto_increment( 4890 self, 4891 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4892 start = None 4893 increment = None 4894 4895 if self._match(TokenType.L_PAREN, advance=False): 4896 args = self._parse_wrapped_csv(self._parse_bitwise) 4897 start = seq_get(args, 0) 4898 increment = seq_get(args, 1) 4899 elif self._match_text_seq("START"): 4900 start = self._parse_bitwise() 4901 self._match_text_seq("INCREMENT") 4902 increment = self._parse_bitwise() 4903 4904 if start and increment: 4905 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4906 4907 return exp.AutoIncrementColumnConstraint() 4908 4909 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4910 if not self._match_text_seq("REFRESH"): 4911 self._retreat(self._index - 1) 4912 return None 4913 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4914 4915 def _parse_compress(self) -> exp.CompressColumnConstraint: 4916 if self._match(TokenType.L_PAREN, advance=False): 4917 return self.expression( 4918 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4919 ) 4920 4921 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4922 4923 def _parse_generated_as_identity( 4924 self, 4925 ) -> ( 4926 exp.GeneratedAsIdentityColumnConstraint 4927 | exp.ComputedColumnConstraint 4928 | exp.GeneratedAsRowColumnConstraint 4929 ): 4930 if self._match_text_seq("BY", "DEFAULT"): 4931 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4932 this = self.expression( 4933 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4934 ) 4935 else: 4936 self._match_text_seq("ALWAYS") 4937 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4938 4939 self._match(TokenType.ALIAS) 4940 4941 if self._match_text_seq("ROW"): 4942 start = self._match_text_seq("START") 4943 if not start: 4944 self._match(TokenType.END) 4945 hidden = self._match_text_seq("HIDDEN") 4946 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4947 4948 identity = self._match_text_seq("IDENTITY") 4949 4950 if self._match(TokenType.L_PAREN): 4951 if self._match(TokenType.START_WITH): 4952 this.set("start", self._parse_bitwise()) 4953 if self._match_text_seq("INCREMENT", "BY"): 4954 this.set("increment", self._parse_bitwise()) 4955 if self._match_text_seq("MINVALUE"): 4956 this.set("minvalue", self._parse_bitwise()) 4957 if self._match_text_seq("MAXVALUE"): 4958 this.set("maxvalue", self._parse_bitwise()) 4959 4960 if self._match_text_seq("CYCLE"): 4961 this.set("cycle", True) 4962 elif self._match_text_seq("NO", "CYCLE"): 4963 this.set("cycle", False) 4964 4965 if not identity: 4966 this.set("expression", self._parse_range()) 4967 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4968 args = self._parse_csv(self._parse_bitwise) 4969 this.set("start", seq_get(args, 0)) 4970 this.set("increment", seq_get(args, 1)) 4971 4972 self._match_r_paren() 4973 4974 return this 4975 4976 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4977 self._match_text_seq("LENGTH") 4978 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4979 4980 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4981 if self._match_text_seq("NULL"): 4982 return self.expression(exp.NotNullColumnConstraint) 4983 if self._match_text_seq("CASESPECIFIC"): 4984 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4985 if self._match_text_seq("FOR", "REPLICATION"): 4986 return self.expression(exp.NotForReplicationColumnConstraint) 4987 return None 4988 4989 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4990 if self._match(TokenType.CONSTRAINT): 4991 this = self._parse_id_var() 4992 else: 4993 this = None 4994 4995 if self._match_texts(self.CONSTRAINT_PARSERS): 4996 return self.expression( 4997 exp.ColumnConstraint, 4998 this=this, 4999 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5000 ) 5001 5002 return this 5003 5004 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5005 if not self._match(TokenType.CONSTRAINT): 5006 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5007 5008 return self.expression( 5009 exp.Constraint, 5010 this=self._parse_id_var(), 5011 expressions=self._parse_unnamed_constraints(), 5012 ) 5013 5014 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5015 constraints = [] 5016 while True: 5017 constraint = self._parse_unnamed_constraint() or self._parse_function() 5018 if not constraint: 5019 break 5020 constraints.append(constraint) 5021 5022 return constraints 5023 5024 def _parse_unnamed_constraint( 5025 self, constraints: t.Optional[t.Collection[str]] = None 5026 ) -> t.Optional[exp.Expression]: 5027 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5028 constraints or self.CONSTRAINT_PARSERS 5029 ): 5030 return None 5031 5032 constraint = self._prev.text.upper() 5033 if constraint not in self.CONSTRAINT_PARSERS: 5034 self.raise_error(f"No parser found for schema constraint {constraint}.") 5035 5036 return self.CONSTRAINT_PARSERS[constraint](self) 5037 5038 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5039 self._match_text_seq("KEY") 5040 return self.expression( 5041 exp.UniqueColumnConstraint, 5042 this=self._parse_schema(self._parse_id_var(any_token=False)), 5043 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5044 on_conflict=self._parse_on_conflict(), 5045 ) 5046 5047 def _parse_key_constraint_options(self) -> t.List[str]: 5048 options = [] 5049 while True: 5050 if not self._curr: 5051 break 5052 5053 if self._match(TokenType.ON): 5054 action = None 5055 on = self._advance_any() and self._prev.text 5056 5057 if self._match_text_seq("NO", "ACTION"): 5058 action = "NO ACTION" 5059 elif self._match_text_seq("CASCADE"): 5060 action = "CASCADE" 5061 elif self._match_text_seq("RESTRICT"): 5062 action = "RESTRICT" 5063 elif self._match_pair(TokenType.SET, TokenType.NULL): 5064 action = "SET NULL" 5065 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5066 action = "SET DEFAULT" 5067 else: 5068 self.raise_error("Invalid key constraint") 5069 5070 options.append(f"ON {on} {action}") 5071 elif self._match_text_seq("NOT", "ENFORCED"): 5072 options.append("NOT ENFORCED") 5073 elif self._match_text_seq("DEFERRABLE"): 5074 options.append("DEFERRABLE") 5075 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5076 options.append("INITIALLY DEFERRED") 5077 elif self._match_text_seq("NORELY"): 5078 options.append("NORELY") 5079 elif self._match_text_seq("MATCH", "FULL"): 5080 options.append("MATCH FULL") 5081 else: 5082 break 5083 5084 return options 5085 5086 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5087 if match and not self._match(TokenType.REFERENCES): 5088 return None 5089 5090 expressions = None 5091 this = self._parse_table(schema=True) 5092 options = self._parse_key_constraint_options() 5093 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5094 5095 def _parse_foreign_key(self) -> exp.ForeignKey: 5096 expressions = self._parse_wrapped_id_vars() 5097 reference = self._parse_references() 5098 options = {} 5099 5100 while self._match(TokenType.ON): 5101 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5102 self.raise_error("Expected DELETE or UPDATE") 5103 5104 kind = self._prev.text.lower() 5105 5106 if self._match_text_seq("NO", "ACTION"): 5107 action = "NO ACTION" 5108 elif self._match(TokenType.SET): 5109 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5110 action = "SET " + self._prev.text.upper() 5111 else: 5112 self._advance() 5113 action = self._prev.text.upper() 5114 5115 options[kind] = action 5116 5117 return self.expression( 5118 exp.ForeignKey, 5119 expressions=expressions, 5120 reference=reference, 5121 **options, # type: ignore 5122 ) 5123 5124 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5125 return self._parse_field() 5126 5127 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5128 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5129 self._retreat(self._index - 1) 5130 return None 5131 5132 id_vars = self._parse_wrapped_id_vars() 5133 return self.expression( 5134 exp.PeriodForSystemTimeConstraint, 5135 this=seq_get(id_vars, 0), 5136 expression=seq_get(id_vars, 1), 5137 ) 5138 5139 def _parse_primary_key( 5140 self, wrapped_optional: bool = False, in_props: bool = False 5141 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5142 desc = ( 5143 self._match_set((TokenType.ASC, TokenType.DESC)) 5144 and self._prev.token_type == TokenType.DESC 5145 ) 5146 5147 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5148 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5149 5150 expressions = self._parse_wrapped_csv( 5151 self._parse_primary_key_part, optional=wrapped_optional 5152 ) 5153 options = self._parse_key_constraint_options() 5154 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5155 5156 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5157 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 5158 5159 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5160 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5161 return this 5162 5163 bracket_kind = self._prev.token_type 5164 expressions = self._parse_csv( 5165 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5166 ) 5167 5168 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5169 self.raise_error("Expected ]") 5170 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5171 self.raise_error("Expected }") 5172 5173 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5174 if bracket_kind == TokenType.L_BRACE: 5175 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5176 elif not this or this.name.upper() == "ARRAY": 5177 this = self.expression(exp.Array, expressions=expressions) 5178 else: 5179 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5180 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5181 5182 self._add_comments(this) 5183 return self._parse_bracket(this) 5184 5185 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5186 if self._match(TokenType.COLON): 5187 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 5188 return this 5189 5190 def _parse_case(self) -> t.Optional[exp.Expression]: 5191 ifs = [] 5192 default = None 5193 5194 comments = self._prev_comments 5195 expression = self._parse_conjunction() 5196 5197 while self._match(TokenType.WHEN): 5198 this = self._parse_conjunction() 5199 self._match(TokenType.THEN) 5200 then = self._parse_conjunction() 5201 ifs.append(self.expression(exp.If, this=this, true=then)) 5202 5203 if self._match(TokenType.ELSE): 5204 default = self._parse_conjunction() 5205 5206 if not self._match(TokenType.END): 5207 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5208 default = exp.column("interval") 5209 else: 5210 self.raise_error("Expected END after CASE", self._prev) 5211 5212 return self.expression( 5213 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5214 ) 5215 5216 def _parse_if(self) -> t.Optional[exp.Expression]: 5217 if self._match(TokenType.L_PAREN): 5218 args = self._parse_csv(self._parse_conjunction) 5219 this = self.validate_expression(exp.If.from_arg_list(args), args) 5220 self._match_r_paren() 5221 else: 5222 index = self._index - 1 5223 5224 if self.NO_PAREN_IF_COMMANDS and index == 0: 5225 return self._parse_as_command(self._prev) 5226 5227 condition = self._parse_conjunction() 5228 5229 if not condition: 5230 self._retreat(index) 5231 return None 5232 5233 self._match(TokenType.THEN) 5234 true = self._parse_conjunction() 5235 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 5236 self._match(TokenType.END) 5237 this = self.expression(exp.If, this=condition, true=true, false=false) 5238 5239 return this 5240 5241 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5242 if not self._match_text_seq("VALUE", "FOR"): 5243 self._retreat(self._index - 1) 5244 return None 5245 5246 return self.expression( 5247 exp.NextValueFor, 5248 this=self._parse_column(), 5249 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5250 ) 5251 5252 def _parse_extract(self) -> exp.Extract: 5253 this = self._parse_function() or self._parse_var() or self._parse_type() 5254 5255 if self._match(TokenType.FROM): 5256 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5257 5258 if not self._match(TokenType.COMMA): 5259 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5260 5261 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5262 5263 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5264 this = self._parse_conjunction() 5265 5266 if not self._match(TokenType.ALIAS): 5267 if self._match(TokenType.COMMA): 5268 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5269 5270 self.raise_error("Expected AS after CAST") 5271 5272 fmt = None 5273 to = self._parse_types() 5274 5275 if self._match(TokenType.FORMAT): 5276 fmt_string = self._parse_string() 5277 fmt = self._parse_at_time_zone(fmt_string) 5278 5279 if not to: 5280 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5281 if to.this in exp.DataType.TEMPORAL_TYPES: 5282 this = self.expression( 5283 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5284 this=this, 5285 format=exp.Literal.string( 5286 format_time( 5287 fmt_string.this if fmt_string else "", 5288 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5289 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5290 ) 5291 ), 5292 ) 5293 5294 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5295 this.set("zone", fmt.args["zone"]) 5296 return this 5297 elif not to: 5298 self.raise_error("Expected TYPE after CAST") 5299 elif isinstance(to, exp.Identifier): 5300 to = exp.DataType.build(to.name, udt=True) 5301 elif to.this == exp.DataType.Type.CHAR: 5302 if self._match(TokenType.CHARACTER_SET): 5303 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5304 5305 return self.expression( 5306 exp.Cast if strict else exp.TryCast, 5307 this=this, 5308 to=to, 5309 format=fmt, 5310 safe=safe, 5311 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5312 ) 5313 5314 def _parse_string_agg(self) -> exp.Expression: 5315 if self._match(TokenType.DISTINCT): 5316 args: t.List[t.Optional[exp.Expression]] = [ 5317 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5318 ] 5319 if self._match(TokenType.COMMA): 5320 args.extend(self._parse_csv(self._parse_conjunction)) 5321 else: 5322 args = self._parse_csv(self._parse_conjunction) # type: ignore 5323 5324 index = self._index 5325 if not self._match(TokenType.R_PAREN) and args: 5326 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5327 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5328 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5329 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5330 5331 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5332 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5333 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5334 if not self._match_text_seq("WITHIN", "GROUP"): 5335 self._retreat(index) 5336 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5337 5338 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5339 order = self._parse_order(this=seq_get(args, 0)) 5340 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5341 5342 def _parse_convert( 5343 self, strict: bool, safe: t.Optional[bool] = None 5344 ) -> t.Optional[exp.Expression]: 5345 this = self._parse_bitwise() 5346 5347 if self._match(TokenType.USING): 5348 to: t.Optional[exp.Expression] = self.expression( 5349 exp.CharacterSet, this=self._parse_var() 5350 ) 5351 elif self._match(TokenType.COMMA): 5352 to = self._parse_types() 5353 else: 5354 to = None 5355 5356 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5357 5358 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5359 """ 5360 There are generally two variants of the DECODE function: 5361 5362 - DECODE(bin, charset) 5363 - DECODE(expression, search, result [, search, result] ... [, default]) 5364 5365 The second variant will always be parsed into a CASE expression. Note that NULL 5366 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5367 instead of relying on pattern matching. 5368 """ 5369 args = self._parse_csv(self._parse_conjunction) 5370 5371 if len(args) < 3: 5372 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5373 5374 expression, *expressions = args 5375 if not expression: 5376 return None 5377 5378 ifs = [] 5379 for search, result in zip(expressions[::2], expressions[1::2]): 5380 if not search or not result: 5381 return None 5382 5383 if isinstance(search, exp.Literal): 5384 ifs.append( 5385 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5386 ) 5387 elif isinstance(search, exp.Null): 5388 ifs.append( 5389 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5390 ) 5391 else: 5392 cond = exp.or_( 5393 exp.EQ(this=expression.copy(), expression=search), 5394 exp.and_( 5395 exp.Is(this=expression.copy(), expression=exp.Null()), 5396 exp.Is(this=search.copy(), expression=exp.Null()), 5397 copy=False, 5398 ), 5399 copy=False, 5400 ) 5401 ifs.append(exp.If(this=cond, true=result)) 5402 5403 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5404 5405 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5406 self._match_text_seq("KEY") 5407 key = self._parse_column() 5408 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5409 self._match_text_seq("VALUE") 5410 value = self._parse_bitwise() 5411 5412 if not key and not value: 5413 return None 5414 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5415 5416 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5417 if not this or not self._match_text_seq("FORMAT", "JSON"): 5418 return this 5419 5420 return self.expression(exp.FormatJson, this=this) 5421 5422 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5423 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5424 for value in values: 5425 if self._match_text_seq(value, "ON", on): 5426 return f"{value} ON {on}" 5427 5428 return None 5429 5430 @t.overload 5431 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5432 5433 @t.overload 5434 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5435 5436 def _parse_json_object(self, agg=False): 5437 star = self._parse_star() 5438 expressions = ( 5439 [star] 5440 if star 5441 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5442 ) 5443 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5444 5445 unique_keys = None 5446 if self._match_text_seq("WITH", "UNIQUE"): 5447 unique_keys = True 5448 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5449 unique_keys = False 5450 5451 self._match_text_seq("KEYS") 5452 5453 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5454 self._parse_type() 5455 ) 5456 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5457 5458 return self.expression( 5459 exp.JSONObjectAgg if agg else exp.JSONObject, 5460 expressions=expressions, 5461 null_handling=null_handling, 5462 unique_keys=unique_keys, 5463 return_type=return_type, 5464 encoding=encoding, 5465 ) 5466 5467 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5468 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5469 if not self._match_text_seq("NESTED"): 5470 this = self._parse_id_var() 5471 kind = self._parse_types(allow_identifiers=False) 5472 nested = None 5473 else: 5474 this = None 5475 kind = None 5476 nested = True 5477 5478 path = self._match_text_seq("PATH") and self._parse_string() 5479 nested_schema = nested and self._parse_json_schema() 5480 5481 return self.expression( 5482 exp.JSONColumnDef, 5483 this=this, 5484 kind=kind, 5485 path=path, 5486 nested_schema=nested_schema, 5487 ) 5488 5489 def _parse_json_schema(self) -> exp.JSONSchema: 5490 self._match_text_seq("COLUMNS") 5491 return self.expression( 5492 exp.JSONSchema, 5493 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5494 ) 5495 5496 def _parse_json_table(self) -> exp.JSONTable: 5497 this = self._parse_format_json(self._parse_bitwise()) 5498 path = self._match(TokenType.COMMA) and self._parse_string() 5499 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5500 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5501 schema = self._parse_json_schema() 5502 5503 return exp.JSONTable( 5504 this=this, 5505 schema=schema, 5506 path=path, 5507 error_handling=error_handling, 5508 empty_handling=empty_handling, 5509 ) 5510 5511 def _parse_match_against(self) -> exp.MatchAgainst: 5512 expressions = self._parse_csv(self._parse_column) 5513 5514 self._match_text_seq(")", "AGAINST", "(") 5515 5516 this = self._parse_string() 5517 5518 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5519 modifier = "IN NATURAL LANGUAGE MODE" 5520 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5521 modifier = f"{modifier} WITH QUERY EXPANSION" 5522 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5523 modifier = "IN BOOLEAN MODE" 5524 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5525 modifier = "WITH QUERY EXPANSION" 5526 else: 5527 modifier = None 5528 5529 return self.expression( 5530 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5531 ) 5532 5533 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5534 def _parse_open_json(self) -> exp.OpenJSON: 5535 this = self._parse_bitwise() 5536 path = self._match(TokenType.COMMA) and self._parse_string() 5537 5538 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5539 this = self._parse_field(any_token=True) 5540 kind = self._parse_types() 5541 path = self._parse_string() 5542 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5543 5544 return self.expression( 5545 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5546 ) 5547 5548 expressions = None 5549 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5550 self._match_l_paren() 5551 expressions = self._parse_csv(_parse_open_json_column_def) 5552 5553 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5554 5555 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5556 args = self._parse_csv(self._parse_bitwise) 5557 5558 if self._match(TokenType.IN): 5559 return self.expression( 5560 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5561 ) 5562 5563 if haystack_first: 5564 haystack = seq_get(args, 0) 5565 needle = seq_get(args, 1) 5566 else: 5567 needle = seq_get(args, 0) 5568 haystack = seq_get(args, 1) 5569 5570 return self.expression( 5571 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5572 ) 5573 5574 def _parse_predict(self) -> exp.Predict: 5575 self._match_text_seq("MODEL") 5576 this = self._parse_table() 5577 5578 self._match(TokenType.COMMA) 5579 self._match_text_seq("TABLE") 5580 5581 return self.expression( 5582 exp.Predict, 5583 this=this, 5584 expression=self._parse_table(), 5585 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5586 ) 5587 5588 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5589 args = self._parse_csv(self._parse_table) 5590 return exp.JoinHint(this=func_name.upper(), expressions=args) 5591 5592 def _parse_substring(self) -> exp.Substring: 5593 # Postgres supports the form: substring(string [from int] [for int]) 5594 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5595 5596 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5597 5598 if self._match(TokenType.FROM): 5599 args.append(self._parse_bitwise()) 5600 if self._match(TokenType.FOR): 5601 if len(args) == 1: 5602 args.append(exp.Literal.number(1)) 5603 args.append(self._parse_bitwise()) 5604 5605 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5606 5607 def _parse_trim(self) -> exp.Trim: 5608 # https://www.w3resource.com/sql/character-functions/trim.php 5609 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5610 5611 position = None 5612 collation = None 5613 expression = None 5614 5615 if self._match_texts(self.TRIM_TYPES): 5616 position = self._prev.text.upper() 5617 5618 this = self._parse_bitwise() 5619 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5620 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5621 expression = self._parse_bitwise() 5622 5623 if invert_order: 5624 this, expression = expression, this 5625 5626 if self._match(TokenType.COLLATE): 5627 collation = self._parse_bitwise() 5628 5629 return self.expression( 5630 exp.Trim, this=this, position=position, expression=expression, collation=collation 5631 ) 5632 5633 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5634 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5635 5636 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5637 return self._parse_window(self._parse_id_var(), alias=True) 5638 5639 def _parse_respect_or_ignore_nulls( 5640 self, this: t.Optional[exp.Expression] 5641 ) -> t.Optional[exp.Expression]: 5642 if self._match_text_seq("IGNORE", "NULLS"): 5643 return self.expression(exp.IgnoreNulls, this=this) 5644 if self._match_text_seq("RESPECT", "NULLS"): 5645 return self.expression(exp.RespectNulls, this=this) 5646 return this 5647 5648 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5649 if self._match(TokenType.HAVING): 5650 self._match_texts(("MAX", "MIN")) 5651 max = self._prev.text.upper() != "MIN" 5652 return self.expression( 5653 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5654 ) 5655 5656 return this 5657 5658 def _parse_window( 5659 self, this: t.Optional[exp.Expression], alias: bool = False 5660 ) -> t.Optional[exp.Expression]: 5661 func = this 5662 comments = func.comments if isinstance(func, exp.Expression) else None 5663 5664 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5665 self._match(TokenType.WHERE) 5666 this = self.expression( 5667 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5668 ) 5669 self._match_r_paren() 5670 5671 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5672 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5673 if self._match_text_seq("WITHIN", "GROUP"): 5674 order = self._parse_wrapped(self._parse_order) 5675 this = self.expression(exp.WithinGroup, this=this, expression=order) 5676 5677 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5678 # Some dialects choose to implement and some do not. 5679 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5680 5681 # There is some code above in _parse_lambda that handles 5682 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5683 5684 # The below changes handle 5685 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5686 5687 # Oracle allows both formats 5688 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5689 # and Snowflake chose to do the same for familiarity 5690 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5691 if isinstance(this, exp.AggFunc): 5692 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5693 5694 if ignore_respect and ignore_respect is not this: 5695 ignore_respect.replace(ignore_respect.this) 5696 this = self.expression(ignore_respect.__class__, this=this) 5697 5698 this = self._parse_respect_or_ignore_nulls(this) 5699 5700 # bigquery select from window x AS (partition by ...) 5701 if alias: 5702 over = None 5703 self._match(TokenType.ALIAS) 5704 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5705 return this 5706 else: 5707 over = self._prev.text.upper() 5708 5709 if comments and isinstance(func, exp.Expression): 5710 func.pop_comments() 5711 5712 if not self._match(TokenType.L_PAREN): 5713 return self.expression( 5714 exp.Window, 5715 comments=comments, 5716 this=this, 5717 alias=self._parse_id_var(False), 5718 over=over, 5719 ) 5720 5721 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5722 5723 first = self._match(TokenType.FIRST) 5724 if self._match_text_seq("LAST"): 5725 first = False 5726 5727 partition, order = self._parse_partition_and_order() 5728 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5729 5730 if kind: 5731 self._match(TokenType.BETWEEN) 5732 start = self._parse_window_spec() 5733 self._match(TokenType.AND) 5734 end = self._parse_window_spec() 5735 5736 spec = self.expression( 5737 exp.WindowSpec, 5738 kind=kind, 5739 start=start["value"], 5740 start_side=start["side"], 5741 end=end["value"], 5742 end_side=end["side"], 5743 ) 5744 else: 5745 spec = None 5746 5747 self._match_r_paren() 5748 5749 window = self.expression( 5750 exp.Window, 5751 comments=comments, 5752 this=this, 5753 partition_by=partition, 5754 order=order, 5755 spec=spec, 5756 alias=window_alias, 5757 over=over, 5758 first=first, 5759 ) 5760 5761 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5762 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5763 return self._parse_window(window, alias=alias) 5764 5765 return window 5766 5767 def _parse_partition_and_order( 5768 self, 5769 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5770 return self._parse_partition_by(), self._parse_order() 5771 5772 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5773 self._match(TokenType.BETWEEN) 5774 5775 return { 5776 "value": ( 5777 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5778 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5779 or self._parse_bitwise() 5780 ), 5781 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5782 } 5783 5784 def _parse_alias( 5785 self, this: t.Optional[exp.Expression], explicit: bool = False 5786 ) -> t.Optional[exp.Expression]: 5787 any_token = self._match(TokenType.ALIAS) 5788 comments = self._prev_comments or [] 5789 5790 if explicit and not any_token: 5791 return this 5792 5793 if self._match(TokenType.L_PAREN): 5794 aliases = self.expression( 5795 exp.Aliases, 5796 comments=comments, 5797 this=this, 5798 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5799 ) 5800 self._match_r_paren(aliases) 5801 return aliases 5802 5803 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5804 self.STRING_ALIASES and self._parse_string_as_identifier() 5805 ) 5806 5807 if alias: 5808 comments.extend(alias.pop_comments()) 5809 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5810 column = this.this 5811 5812 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5813 if not this.comments and column and column.comments: 5814 this.comments = column.pop_comments() 5815 5816 return this 5817 5818 def _parse_id_var( 5819 self, 5820 any_token: bool = True, 5821 tokens: t.Optional[t.Collection[TokenType]] = None, 5822 ) -> t.Optional[exp.Expression]: 5823 expression = self._parse_identifier() 5824 if not expression and ( 5825 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5826 ): 5827 quoted = self._prev.token_type == TokenType.STRING 5828 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5829 5830 return expression 5831 5832 def _parse_string(self) -> t.Optional[exp.Expression]: 5833 if self._match_set(self.STRING_PARSERS): 5834 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5835 return self._parse_placeholder() 5836 5837 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5838 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5839 5840 def _parse_number(self) -> t.Optional[exp.Expression]: 5841 if self._match_set(self.NUMERIC_PARSERS): 5842 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5843 return self._parse_placeholder() 5844 5845 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5846 if self._match(TokenType.IDENTIFIER): 5847 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5848 return self._parse_placeholder() 5849 5850 def _parse_var( 5851 self, 5852 any_token: bool = False, 5853 tokens: t.Optional[t.Collection[TokenType]] = None, 5854 upper: bool = False, 5855 ) -> t.Optional[exp.Expression]: 5856 if ( 5857 (any_token and self._advance_any()) 5858 or self._match(TokenType.VAR) 5859 or (self._match_set(tokens) if tokens else False) 5860 ): 5861 return self.expression( 5862 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5863 ) 5864 return self._parse_placeholder() 5865 5866 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5867 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5868 self._advance() 5869 return self._prev 5870 return None 5871 5872 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5873 return self._parse_var() or self._parse_string() 5874 5875 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5876 return self._parse_primary() or self._parse_var(any_token=True) 5877 5878 def _parse_null(self) -> t.Optional[exp.Expression]: 5879 if self._match_set(self.NULL_TOKENS): 5880 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5881 return self._parse_placeholder() 5882 5883 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5884 if self._match(TokenType.TRUE): 5885 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5886 if self._match(TokenType.FALSE): 5887 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5888 return self._parse_placeholder() 5889 5890 def _parse_star(self) -> t.Optional[exp.Expression]: 5891 if self._match(TokenType.STAR): 5892 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5893 return self._parse_placeholder() 5894 5895 def _parse_parameter(self) -> exp.Parameter: 5896 this = self._parse_identifier() or self._parse_primary_or_var() 5897 return self.expression(exp.Parameter, this=this) 5898 5899 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5900 if self._match_set(self.PLACEHOLDER_PARSERS): 5901 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5902 if placeholder: 5903 return placeholder 5904 self._advance(-1) 5905 return None 5906 5907 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5908 if not self._match_texts(keywords): 5909 return None 5910 if self._match(TokenType.L_PAREN, advance=False): 5911 return self._parse_wrapped_csv(self._parse_expression) 5912 5913 expression = self._parse_expression() 5914 return [expression] if expression else None 5915 5916 def _parse_csv( 5917 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5918 ) -> t.List[exp.Expression]: 5919 parse_result = parse_method() 5920 items = [parse_result] if parse_result is not None else [] 5921 5922 while self._match(sep): 5923 self._add_comments(parse_result) 5924 parse_result = parse_method() 5925 if parse_result is not None: 5926 items.append(parse_result) 5927 5928 return items 5929 5930 def _parse_tokens( 5931 self, parse_method: t.Callable, expressions: t.Dict 5932 ) -> t.Optional[exp.Expression]: 5933 this = parse_method() 5934 5935 while self._match_set(expressions): 5936 this = self.expression( 5937 expressions[self._prev.token_type], 5938 this=this, 5939 comments=self._prev_comments, 5940 expression=parse_method(), 5941 ) 5942 5943 return this 5944 5945 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5946 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5947 5948 def _parse_wrapped_csv( 5949 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5950 ) -> t.List[exp.Expression]: 5951 return self._parse_wrapped( 5952 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5953 ) 5954 5955 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5956 wrapped = self._match(TokenType.L_PAREN) 5957 if not wrapped and not optional: 5958 self.raise_error("Expecting (") 5959 parse_result = parse_method() 5960 if wrapped: 5961 self._match_r_paren() 5962 return parse_result 5963 5964 def _parse_expressions(self) -> t.List[exp.Expression]: 5965 return self._parse_csv(self._parse_expression) 5966 5967 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5968 return self._parse_select() or self._parse_set_operations( 5969 self._parse_expression() if alias else self._parse_conjunction() 5970 ) 5971 5972 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5973 return self._parse_query_modifiers( 5974 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5975 ) 5976 5977 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5978 this = None 5979 if self._match_texts(self.TRANSACTION_KIND): 5980 this = self._prev.text 5981 5982 self._match_texts(("TRANSACTION", "WORK")) 5983 5984 modes = [] 5985 while True: 5986 mode = [] 5987 while self._match(TokenType.VAR): 5988 mode.append(self._prev.text) 5989 5990 if mode: 5991 modes.append(" ".join(mode)) 5992 if not self._match(TokenType.COMMA): 5993 break 5994 5995 return self.expression(exp.Transaction, this=this, modes=modes) 5996 5997 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5998 chain = None 5999 savepoint = None 6000 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6001 6002 self._match_texts(("TRANSACTION", "WORK")) 6003 6004 if self._match_text_seq("TO"): 6005 self._match_text_seq("SAVEPOINT") 6006 savepoint = self._parse_id_var() 6007 6008 if self._match(TokenType.AND): 6009 chain = not self._match_text_seq("NO") 6010 self._match_text_seq("CHAIN") 6011 6012 if is_rollback: 6013 return self.expression(exp.Rollback, savepoint=savepoint) 6014 6015 return self.expression(exp.Commit, chain=chain) 6016 6017 def _parse_refresh(self) -> exp.Refresh: 6018 self._match(TokenType.TABLE) 6019 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6020 6021 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6022 if not self._match_text_seq("ADD"): 6023 return None 6024 6025 self._match(TokenType.COLUMN) 6026 exists_column = self._parse_exists(not_=True) 6027 expression = self._parse_field_def() 6028 6029 if expression: 6030 expression.set("exists", exists_column) 6031 6032 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6033 if self._match_texts(("FIRST", "AFTER")): 6034 position = self._prev.text 6035 column_position = self.expression( 6036 exp.ColumnPosition, this=self._parse_column(), position=position 6037 ) 6038 expression.set("position", column_position) 6039 6040 return expression 6041 6042 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6043 drop = self._match(TokenType.DROP) and self._parse_drop() 6044 if drop and not isinstance(drop, exp.Command): 6045 drop.set("kind", drop.args.get("kind", "COLUMN")) 6046 return drop 6047 6048 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6049 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6050 return self.expression( 6051 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6052 ) 6053 6054 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6055 index = self._index - 1 6056 6057 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6058 return self._parse_csv( 6059 lambda: self.expression( 6060 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6061 ) 6062 ) 6063 6064 self._retreat(index) 6065 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6066 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6067 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6068 6069 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6070 if self._match_texts(self.ALTER_ALTER_PARSERS): 6071 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6072 6073 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6074 # keyword after ALTER we default to parsing this statement 6075 self._match(TokenType.COLUMN) 6076 column = self._parse_field(any_token=True) 6077 6078 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6079 return self.expression(exp.AlterColumn, this=column, drop=True) 6080 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6081 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 6082 if self._match(TokenType.COMMENT): 6083 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6084 if self._match_text_seq("DROP", "NOT", "NULL"): 6085 return self.expression( 6086 exp.AlterColumn, 6087 this=column, 6088 drop=True, 6089 allow_null=True, 6090 ) 6091 if self._match_text_seq("SET", "NOT", "NULL"): 6092 return self.expression( 6093 exp.AlterColumn, 6094 this=column, 6095 allow_null=False, 6096 ) 6097 self._match_text_seq("SET", "DATA") 6098 self._match_text_seq("TYPE") 6099 return self.expression( 6100 exp.AlterColumn, 6101 this=column, 6102 dtype=self._parse_types(), 6103 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6104 using=self._match(TokenType.USING) and self._parse_conjunction(), 6105 ) 6106 6107 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6108 if self._match_texts(("ALL", "EVEN", "AUTO")): 6109 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6110 6111 self._match_text_seq("KEY", "DISTKEY") 6112 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6113 6114 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6115 if compound: 6116 self._match_text_seq("SORTKEY") 6117 6118 if self._match(TokenType.L_PAREN, advance=False): 6119 return self.expression( 6120 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6121 ) 6122 6123 self._match_texts(("AUTO", "NONE")) 6124 return self.expression( 6125 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6126 ) 6127 6128 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6129 index = self._index - 1 6130 6131 partition_exists = self._parse_exists() 6132 if self._match(TokenType.PARTITION, advance=False): 6133 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6134 6135 self._retreat(index) 6136 return self._parse_csv(self._parse_drop_column) 6137 6138 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6139 if self._match(TokenType.COLUMN): 6140 exists = self._parse_exists() 6141 old_column = self._parse_column() 6142 to = self._match_text_seq("TO") 6143 new_column = self._parse_column() 6144 6145 if old_column is None or to is None or new_column is None: 6146 return None 6147 6148 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6149 6150 self._match_text_seq("TO") 6151 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6152 6153 def _parse_alter_table_set(self) -> exp.AlterSet: 6154 alter_set = self.expression(exp.AlterSet) 6155 6156 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6157 "TABLE", "PROPERTIES" 6158 ): 6159 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_conjunction)) 6160 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6161 alter_set.set("expressions", [self._parse_conjunction()]) 6162 elif self._match_texts(("LOGGED", "UNLOGGED")): 6163 alter_set.set("option", exp.var(self._prev.text.upper())) 6164 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6165 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6166 elif self._match_text_seq("LOCATION"): 6167 alter_set.set("location", self._parse_field()) 6168 elif self._match_text_seq("ACCESS", "METHOD"): 6169 alter_set.set("access_method", self._parse_field()) 6170 elif self._match_text_seq("TABLESPACE"): 6171 alter_set.set("tablespace", self._parse_field()) 6172 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6173 alter_set.set("file_format", [self._parse_field()]) 6174 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6175 alter_set.set("file_format", self._parse_wrapped_options()) 6176 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6177 alter_set.set("copy_options", self._parse_wrapped_options()) 6178 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6179 alter_set.set("tag", self._parse_csv(self._parse_conjunction)) 6180 else: 6181 if self._match_text_seq("SERDE"): 6182 alter_set.set("serde", self._parse_field()) 6183 6184 alter_set.set("expressions", [self._parse_properties()]) 6185 6186 return alter_set 6187 6188 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6189 start = self._prev 6190 6191 if not self._match(TokenType.TABLE): 6192 return self._parse_as_command(start) 6193 6194 exists = self._parse_exists() 6195 only = self._match_text_seq("ONLY") 6196 this = self._parse_table(schema=True) 6197 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6198 6199 if self._next: 6200 self._advance() 6201 6202 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6203 if parser: 6204 actions = ensure_list(parser(self)) 6205 options = self._parse_csv(self._parse_property) 6206 6207 if not self._curr and actions: 6208 return self.expression( 6209 exp.AlterTable, 6210 this=this, 6211 exists=exists, 6212 actions=actions, 6213 only=only, 6214 options=options, 6215 cluster=cluster, 6216 ) 6217 6218 return self._parse_as_command(start) 6219 6220 def _parse_merge(self) -> exp.Merge: 6221 self._match(TokenType.INTO) 6222 target = self._parse_table() 6223 6224 if target and self._match(TokenType.ALIAS, advance=False): 6225 target.set("alias", self._parse_table_alias()) 6226 6227 self._match(TokenType.USING) 6228 using = self._parse_table() 6229 6230 self._match(TokenType.ON) 6231 on = self._parse_conjunction() 6232 6233 return self.expression( 6234 exp.Merge, 6235 this=target, 6236 using=using, 6237 on=on, 6238 expressions=self._parse_when_matched(), 6239 ) 6240 6241 def _parse_when_matched(self) -> t.List[exp.When]: 6242 whens = [] 6243 6244 while self._match(TokenType.WHEN): 6245 matched = not self._match(TokenType.NOT) 6246 self._match_text_seq("MATCHED") 6247 source = ( 6248 False 6249 if self._match_text_seq("BY", "TARGET") 6250 else self._match_text_seq("BY", "SOURCE") 6251 ) 6252 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 6253 6254 self._match(TokenType.THEN) 6255 6256 if self._match(TokenType.INSERT): 6257 _this = self._parse_star() 6258 if _this: 6259 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6260 else: 6261 then = self.expression( 6262 exp.Insert, 6263 this=self._parse_value(), 6264 expression=self._match_text_seq("VALUES") and self._parse_value(), 6265 ) 6266 elif self._match(TokenType.UPDATE): 6267 expressions = self._parse_star() 6268 if expressions: 6269 then = self.expression(exp.Update, expressions=expressions) 6270 else: 6271 then = self.expression( 6272 exp.Update, 6273 expressions=self._match(TokenType.SET) 6274 and self._parse_csv(self._parse_equality), 6275 ) 6276 elif self._match(TokenType.DELETE): 6277 then = self.expression(exp.Var, this=self._prev.text) 6278 else: 6279 then = None 6280 6281 whens.append( 6282 self.expression( 6283 exp.When, 6284 matched=matched, 6285 source=source, 6286 condition=condition, 6287 then=then, 6288 ) 6289 ) 6290 return whens 6291 6292 def _parse_show(self) -> t.Optional[exp.Expression]: 6293 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6294 if parser: 6295 return parser(self) 6296 return self._parse_as_command(self._prev) 6297 6298 def _parse_set_item_assignment( 6299 self, kind: t.Optional[str] = None 6300 ) -> t.Optional[exp.Expression]: 6301 index = self._index 6302 6303 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6304 return self._parse_set_transaction(global_=kind == "GLOBAL") 6305 6306 left = self._parse_primary() or self._parse_column() 6307 assignment_delimiter = self._match_texts(("=", "TO")) 6308 6309 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6310 self._retreat(index) 6311 return None 6312 6313 right = self._parse_statement() or self._parse_id_var() 6314 if isinstance(right, (exp.Column, exp.Identifier)): 6315 right = exp.var(right.name) 6316 6317 this = self.expression(exp.EQ, this=left, expression=right) 6318 return self.expression(exp.SetItem, this=this, kind=kind) 6319 6320 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6321 self._match_text_seq("TRANSACTION") 6322 characteristics = self._parse_csv( 6323 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6324 ) 6325 return self.expression( 6326 exp.SetItem, 6327 expressions=characteristics, 6328 kind="TRANSACTION", 6329 **{"global": global_}, # type: ignore 6330 ) 6331 6332 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6333 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6334 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6335 6336 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6337 index = self._index 6338 set_ = self.expression( 6339 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6340 ) 6341 6342 if self._curr: 6343 self._retreat(index) 6344 return self._parse_as_command(self._prev) 6345 6346 return set_ 6347 6348 def _parse_var_from_options( 6349 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6350 ) -> t.Optional[exp.Var]: 6351 start = self._curr 6352 if not start: 6353 return None 6354 6355 option = start.text.upper() 6356 continuations = options.get(option) 6357 6358 index = self._index 6359 self._advance() 6360 for keywords in continuations or []: 6361 if isinstance(keywords, str): 6362 keywords = (keywords,) 6363 6364 if self._match_text_seq(*keywords): 6365 option = f"{option} {' '.join(keywords)}" 6366 break 6367 else: 6368 if continuations or continuations is None: 6369 if raise_unmatched: 6370 self.raise_error(f"Unknown option {option}") 6371 6372 self._retreat(index) 6373 return None 6374 6375 return exp.var(option) 6376 6377 def _parse_as_command(self, start: Token) -> exp.Command: 6378 while self._curr: 6379 self._advance() 6380 text = self._find_sql(start, self._prev) 6381 size = len(start.text) 6382 self._warn_unsupported() 6383 return exp.Command(this=text[:size], expression=text[size:]) 6384 6385 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6386 settings = [] 6387 6388 self._match_l_paren() 6389 kind = self._parse_id_var() 6390 6391 if self._match(TokenType.L_PAREN): 6392 while True: 6393 key = self._parse_id_var() 6394 value = self._parse_primary() 6395 6396 if not key and value is None: 6397 break 6398 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6399 self._match(TokenType.R_PAREN) 6400 6401 self._match_r_paren() 6402 6403 return self.expression( 6404 exp.DictProperty, 6405 this=this, 6406 kind=kind.this if kind else None, 6407 settings=settings, 6408 ) 6409 6410 def _parse_dict_range(self, this: str) -> exp.DictRange: 6411 self._match_l_paren() 6412 has_min = self._match_text_seq("MIN") 6413 if has_min: 6414 min = self._parse_var() or self._parse_primary() 6415 self._match_text_seq("MAX") 6416 max = self._parse_var() or self._parse_primary() 6417 else: 6418 max = self._parse_var() or self._parse_primary() 6419 min = exp.Literal.number(0) 6420 self._match_r_paren() 6421 return self.expression(exp.DictRange, this=this, min=min, max=max) 6422 6423 def _parse_comprehension( 6424 self, this: t.Optional[exp.Expression] 6425 ) -> t.Optional[exp.Comprehension]: 6426 index = self._index 6427 expression = self._parse_column() 6428 if not self._match(TokenType.IN): 6429 self._retreat(index - 1) 6430 return None 6431 iterator = self._parse_column() 6432 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6433 return self.expression( 6434 exp.Comprehension, 6435 this=this, 6436 expression=expression, 6437 iterator=iterator, 6438 condition=condition, 6439 ) 6440 6441 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6442 if self._match(TokenType.HEREDOC_STRING): 6443 return self.expression(exp.Heredoc, this=self._prev.text) 6444 6445 if not self._match_text_seq("$"): 6446 return None 6447 6448 tags = ["$"] 6449 tag_text = None 6450 6451 if self._is_connected(): 6452 self._advance() 6453 tags.append(self._prev.text.upper()) 6454 else: 6455 self.raise_error("No closing $ found") 6456 6457 if tags[-1] != "$": 6458 if self._is_connected() and self._match_text_seq("$"): 6459 tag_text = tags[-1] 6460 tags.append("$") 6461 else: 6462 self.raise_error("No closing $ found") 6463 6464 heredoc_start = self._curr 6465 6466 while self._curr: 6467 if self._match_text_seq(*tags, advance=False): 6468 this = self._find_sql(heredoc_start, self._prev) 6469 self._advance(len(tags)) 6470 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6471 6472 self._advance() 6473 6474 self.raise_error(f"No closing {''.join(tags)} found") 6475 return None 6476 6477 def _find_parser( 6478 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6479 ) -> t.Optional[t.Callable]: 6480 if not self._curr: 6481 return None 6482 6483 index = self._index 6484 this = [] 6485 while True: 6486 # The current token might be multiple words 6487 curr = self._curr.text.upper() 6488 key = curr.split(" ") 6489 this.append(curr) 6490 6491 self._advance() 6492 result, trie = in_trie(trie, key) 6493 if result == TrieResult.FAILED: 6494 break 6495 6496 if result == TrieResult.EXISTS: 6497 subparser = parsers[" ".join(this)] 6498 return subparser 6499 6500 self._retreat(index) 6501 return None 6502 6503 def _match(self, token_type, advance=True, expression=None): 6504 if not self._curr: 6505 return None 6506 6507 if self._curr.token_type == token_type: 6508 if advance: 6509 self._advance() 6510 self._add_comments(expression) 6511 return True 6512 6513 return None 6514 6515 def _match_set(self, types, advance=True): 6516 if not self._curr: 6517 return None 6518 6519 if self._curr.token_type in types: 6520 if advance: 6521 self._advance() 6522 return True 6523 6524 return None 6525 6526 def _match_pair(self, token_type_a, token_type_b, advance=True): 6527 if not self._curr or not self._next: 6528 return None 6529 6530 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6531 if advance: 6532 self._advance(2) 6533 return True 6534 6535 return None 6536 6537 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6538 if not self._match(TokenType.L_PAREN, expression=expression): 6539 self.raise_error("Expecting (") 6540 6541 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6542 if not self._match(TokenType.R_PAREN, expression=expression): 6543 self.raise_error("Expecting )") 6544 6545 def _match_texts(self, texts, advance=True): 6546 if self._curr and self._curr.text.upper() in texts: 6547 if advance: 6548 self._advance() 6549 return True 6550 return None 6551 6552 def _match_text_seq(self, *texts, advance=True): 6553 index = self._index 6554 for text in texts: 6555 if self._curr and self._curr.text.upper() == text: 6556 self._advance() 6557 else: 6558 self._retreat(index) 6559 return None 6560 6561 if not advance: 6562 self._retreat(index) 6563 6564 return True 6565 6566 def _replace_lambda( 6567 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6568 ) -> t.Optional[exp.Expression]: 6569 if not node: 6570 return node 6571 6572 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6573 6574 for column in node.find_all(exp.Column): 6575 typ = lambda_types.get(column.parts[0].name) 6576 if typ is not None: 6577 dot_or_id = column.to_dot() if column.table else column.this 6578 6579 if typ: 6580 dot_or_id = self.expression( 6581 exp.Cast, 6582 this=dot_or_id, 6583 to=typ, 6584 ) 6585 6586 parent = column.parent 6587 6588 while isinstance(parent, exp.Dot): 6589 if not isinstance(parent.parent, exp.Dot): 6590 parent.replace(dot_or_id) 6591 break 6592 parent = parent.parent 6593 else: 6594 if column is node: 6595 node = dot_or_id 6596 else: 6597 column.replace(dot_or_id) 6598 return node 6599 6600 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6601 start = self._prev 6602 6603 # Not to be confused with TRUNCATE(number, decimals) function call 6604 if self._match(TokenType.L_PAREN): 6605 self._retreat(self._index - 2) 6606 return self._parse_function() 6607 6608 # Clickhouse supports TRUNCATE DATABASE as well 6609 is_database = self._match(TokenType.DATABASE) 6610 6611 self._match(TokenType.TABLE) 6612 6613 exists = self._parse_exists(not_=False) 6614 6615 expressions = self._parse_csv( 6616 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6617 ) 6618 6619 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6620 6621 if self._match_text_seq("RESTART", "IDENTITY"): 6622 identity = "RESTART" 6623 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6624 identity = "CONTINUE" 6625 else: 6626 identity = None 6627 6628 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6629 option = self._prev.text 6630 else: 6631 option = None 6632 6633 partition = self._parse_partition() 6634 6635 # Fallback case 6636 if self._curr: 6637 return self._parse_as_command(start) 6638 6639 return self.expression( 6640 exp.TruncateTable, 6641 expressions=expressions, 6642 is_database=is_database, 6643 exists=exists, 6644 cluster=cluster, 6645 identity=identity, 6646 option=option, 6647 partition=partition, 6648 ) 6649 6650 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6651 this = self._parse_ordered(self._parse_opclass) 6652 6653 if not self._match(TokenType.WITH): 6654 return this 6655 6656 op = self._parse_var(any_token=True) 6657 6658 return self.expression(exp.WithOperator, this=this, op=op) 6659 6660 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6661 self._match(TokenType.EQ) 6662 self._match(TokenType.L_PAREN) 6663 6664 opts: t.List[t.Optional[exp.Expression]] = [] 6665 while self._curr and not self._match(TokenType.R_PAREN): 6666 if self._match_text_seq("FORMAT_NAME", "="): 6667 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6668 # so we parse it separately to use _parse_field() 6669 prop = self.expression( 6670 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6671 ) 6672 opts.append(prop) 6673 else: 6674 opts.append(self._parse_property()) 6675 6676 self._match(TokenType.COMMA) 6677 6678 return opts 6679 6680 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6681 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6682 6683 options = [] 6684 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6685 option = self._parse_var(any_token=True) 6686 prev = self._prev.text.upper() 6687 6688 # Different dialects might separate options and values by white space, "=" and "AS" 6689 self._match(TokenType.EQ) 6690 self._match(TokenType.ALIAS) 6691 6692 param = self.expression(exp.CopyParameter, this=option) 6693 6694 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6695 TokenType.L_PAREN, advance=False 6696 ): 6697 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6698 param.set("expressions", self._parse_wrapped_options()) 6699 elif prev == "FILE_FORMAT": 6700 # T-SQL's external file format case 6701 param.set("expression", self._parse_field()) 6702 else: 6703 param.set("expression", self._parse_unquoted_field()) 6704 6705 options.append(param) 6706 self._match(sep) 6707 6708 return options 6709 6710 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6711 expr = self.expression(exp.Credentials) 6712 6713 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6714 expr.set("storage", self._parse_field()) 6715 if self._match_text_seq("CREDENTIALS"): 6716 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6717 creds = ( 6718 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6719 ) 6720 expr.set("credentials", creds) 6721 if self._match_text_seq("ENCRYPTION"): 6722 expr.set("encryption", self._parse_wrapped_options()) 6723 if self._match_text_seq("IAM_ROLE"): 6724 expr.set("iam_role", self._parse_field()) 6725 if self._match_text_seq("REGION"): 6726 expr.set("region", self._parse_field()) 6727 6728 return expr 6729 6730 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6731 return self._parse_field() 6732 6733 def _parse_copy(self) -> exp.Copy | exp.Command: 6734 start = self._prev 6735 6736 self._match(TokenType.INTO) 6737 6738 this = ( 6739 self._parse_select(nested=True, parse_subquery_alias=False) 6740 if self._match(TokenType.L_PAREN, advance=False) 6741 else self._parse_table(schema=True) 6742 ) 6743 6744 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6745 6746 files = self._parse_csv(self._parse_file_location) 6747 credentials = self._parse_credentials() 6748 6749 self._match_text_seq("WITH") 6750 6751 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6752 6753 # Fallback case 6754 if self._curr: 6755 return self._parse_as_command(start) 6756 6757 return self.expression( 6758 exp.Copy, 6759 this=this, 6760 kind=kind, 6761 credentials=credentials, 6762 files=files, 6763 params=params, 6764 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1210 def __init__( 1211 self, 1212 error_level: t.Optional[ErrorLevel] = None, 1213 error_message_context: int = 100, 1214 max_errors: int = 3, 1215 dialect: DialectType = None, 1216 ): 1217 from sqlglot.dialects import Dialect 1218 1219 self.error_level = error_level or ErrorLevel.IMMEDIATE 1220 self.error_message_context = error_message_context 1221 self.max_errors = max_errors 1222 self.dialect = Dialect.get_or_raise(dialect) 1223 self.reset()
1235 def parse( 1236 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1237 ) -> t.List[t.Optional[exp.Expression]]: 1238 """ 1239 Parses a list of tokens and returns a list of syntax trees, one tree 1240 per parsed SQL statement. 1241 1242 Args: 1243 raw_tokens: The list of tokens. 1244 sql: The original SQL string, used to produce helpful debug messages. 1245 1246 Returns: 1247 The list of the produced syntax trees. 1248 """ 1249 return self._parse( 1250 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1251 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1253 def parse_into( 1254 self, 1255 expression_types: exp.IntoType, 1256 raw_tokens: t.List[Token], 1257 sql: t.Optional[str] = None, 1258 ) -> t.List[t.Optional[exp.Expression]]: 1259 """ 1260 Parses a list of tokens into a given Expression type. If a collection of Expression 1261 types is given instead, this method will try to parse the token list into each one 1262 of them, stopping at the first for which the parsing succeeds. 1263 1264 Args: 1265 expression_types: The expression type(s) to try and parse the token list into. 1266 raw_tokens: The list of tokens. 1267 sql: The original SQL string, used to produce helpful debug messages. 1268 1269 Returns: 1270 The target Expression. 1271 """ 1272 errors = [] 1273 for expression_type in ensure_list(expression_types): 1274 parser = self.EXPRESSION_PARSERS.get(expression_type) 1275 if not parser: 1276 raise TypeError(f"No parser registered for {expression_type}") 1277 1278 try: 1279 return self._parse(parser, raw_tokens, sql) 1280 except ParseError as e: 1281 e.errors[0]["into_expression"] = expression_type 1282 errors.append(e) 1283 1284 raise ParseError( 1285 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1286 errors=merge_errors(errors), 1287 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1327 def check_errors(self) -> None: 1328 """Logs or raises any found errors, depending on the chosen error level setting.""" 1329 if self.error_level == ErrorLevel.WARN: 1330 for error in self.errors: 1331 logger.error(str(error)) 1332 elif self.error_level == ErrorLevel.RAISE and self.errors: 1333 raise ParseError( 1334 concat_messages(self.errors, self.max_errors), 1335 errors=merge_errors(self.errors), 1336 )
Logs or raises any found errors, depending on the chosen error level setting.
1338 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1339 """ 1340 Appends an error in the list of recorded errors or raises it, depending on the chosen 1341 error level setting. 1342 """ 1343 token = token or self._curr or self._prev or Token.string("") 1344 start = token.start 1345 end = token.end + 1 1346 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1347 highlight = self.sql[start:end] 1348 end_context = self.sql[end : end + self.error_message_context] 1349 1350 error = ParseError.new( 1351 f"{message}. Line {token.line}, Col: {token.col}.\n" 1352 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1353 description=message, 1354 line=token.line, 1355 col=token.col, 1356 start_context=start_context, 1357 highlight=highlight, 1358 end_context=end_context, 1359 ) 1360 1361 if self.error_level == ErrorLevel.IMMEDIATE: 1362 raise error 1363 1364 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1366 def expression( 1367 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1368 ) -> E: 1369 """ 1370 Creates a new, validated Expression. 1371 1372 Args: 1373 exp_class: The expression class to instantiate. 1374 comments: An optional list of comments to attach to the expression. 1375 kwargs: The arguments to set for the expression along with their respective values. 1376 1377 Returns: 1378 The target expression. 1379 """ 1380 instance = exp_class(**kwargs) 1381 instance.add_comments(comments) if comments else self._add_comments(instance) 1382 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1389 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1390 """ 1391 Validates an Expression, making sure that all its mandatory arguments are set. 1392 1393 Args: 1394 expression: The expression to validate. 1395 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1396 1397 Returns: 1398 The validated expression. 1399 """ 1400 if self.error_level != ErrorLevel.IGNORE: 1401 for error_message in expression.error_messages(args): 1402 self.raise_error(error_message) 1403 1404 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.