sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111class _Parser(type): 112 def __new__(cls, clsname, bases, attrs): 113 klass = super().__new__(cls, clsname, bases, attrs) 114 115 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 116 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 117 118 return klass 119 120 121class Parser(metaclass=_Parser): 122 """ 123 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 124 125 Args: 126 error_level: The desired error level. 127 Default: ErrorLevel.IMMEDIATE 128 error_message_context: The amount of context to capture from a query string when displaying 129 the error message (in number of characters). 130 Default: 100 131 max_errors: Maximum number of error messages to include in a raised ParseError. 132 This is only relevant if error_level is ErrorLevel.RAISE. 133 Default: 3 134 """ 135 136 FUNCTIONS: t.Dict[str, t.Callable] = { 137 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 138 "CONCAT": lambda args, dialect: exp.Concat( 139 expressions=args, 140 safe=not dialect.STRICT_STRING_CONCAT, 141 coalesce=dialect.CONCAT_COALESCE, 142 ), 143 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 144 expressions=args, 145 safe=not dialect.STRICT_STRING_CONCAT, 146 coalesce=dialect.CONCAT_COALESCE, 147 ), 148 "DATE_TO_DATE_STR": lambda args: exp.Cast( 149 this=seq_get(args, 0), 150 to=exp.DataType(this=exp.DataType.Type.TEXT), 151 ), 152 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 153 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 154 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 155 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 156 "LIKE": build_like, 157 "LOG": build_logarithm, 158 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 159 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 160 "MOD": build_mod, 161 "TIME_TO_TIME_STR": lambda args: exp.Cast( 162 this=seq_get(args, 0), 163 to=exp.DataType(this=exp.DataType.Type.TEXT), 164 ), 165 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 166 this=exp.Cast( 167 this=seq_get(args, 0), 168 to=exp.DataType(this=exp.DataType.Type.TEXT), 169 ), 170 start=exp.Literal.number(1), 171 length=exp.Literal.number(10), 172 ), 173 "VAR_MAP": build_var_map, 174 "LOWER": build_lower, 175 "UPPER": build_upper, 176 "HEX": build_hex, 177 "TO_HEX": build_hex, 178 } 179 180 NO_PAREN_FUNCTIONS = { 181 TokenType.CURRENT_DATE: exp.CurrentDate, 182 TokenType.CURRENT_DATETIME: exp.CurrentDate, 183 TokenType.CURRENT_TIME: exp.CurrentTime, 184 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 185 TokenType.CURRENT_USER: exp.CurrentUser, 186 } 187 188 STRUCT_TYPE_TOKENS = { 189 TokenType.NESTED, 190 TokenType.OBJECT, 191 TokenType.STRUCT, 192 } 193 194 NESTED_TYPE_TOKENS = { 195 TokenType.ARRAY, 196 TokenType.LOWCARDINALITY, 197 TokenType.MAP, 198 TokenType.NULLABLE, 199 *STRUCT_TYPE_TOKENS, 200 } 201 202 ENUM_TYPE_TOKENS = { 203 TokenType.ENUM, 204 TokenType.ENUM8, 205 TokenType.ENUM16, 206 } 207 208 AGGREGATE_TYPE_TOKENS = { 209 TokenType.AGGREGATEFUNCTION, 210 TokenType.SIMPLEAGGREGATEFUNCTION, 211 } 212 213 TYPE_TOKENS = { 214 TokenType.BIT, 215 TokenType.BOOLEAN, 216 TokenType.TINYINT, 217 TokenType.UTINYINT, 218 TokenType.SMALLINT, 219 TokenType.USMALLINT, 220 TokenType.INT, 221 TokenType.UINT, 222 TokenType.BIGINT, 223 TokenType.UBIGINT, 224 TokenType.INT128, 225 TokenType.UINT128, 226 TokenType.INT256, 227 TokenType.UINT256, 228 TokenType.MEDIUMINT, 229 TokenType.UMEDIUMINT, 230 TokenType.FIXEDSTRING, 231 TokenType.FLOAT, 232 TokenType.DOUBLE, 233 TokenType.CHAR, 234 TokenType.NCHAR, 235 TokenType.VARCHAR, 236 TokenType.NVARCHAR, 237 TokenType.BPCHAR, 238 TokenType.TEXT, 239 TokenType.MEDIUMTEXT, 240 TokenType.LONGTEXT, 241 TokenType.MEDIUMBLOB, 242 TokenType.LONGBLOB, 243 TokenType.BINARY, 244 TokenType.VARBINARY, 245 TokenType.JSON, 246 TokenType.JSONB, 247 TokenType.INTERVAL, 248 TokenType.TINYBLOB, 249 TokenType.TINYTEXT, 250 TokenType.TIME, 251 TokenType.TIMETZ, 252 TokenType.TIMESTAMP, 253 TokenType.TIMESTAMP_S, 254 TokenType.TIMESTAMP_MS, 255 TokenType.TIMESTAMP_NS, 256 TokenType.TIMESTAMPTZ, 257 TokenType.TIMESTAMPLTZ, 258 TokenType.TIMESTAMPNTZ, 259 TokenType.DATETIME, 260 TokenType.DATETIME64, 261 TokenType.DATE, 262 TokenType.DATE32, 263 TokenType.INT4RANGE, 264 TokenType.INT4MULTIRANGE, 265 TokenType.INT8RANGE, 266 TokenType.INT8MULTIRANGE, 267 TokenType.NUMRANGE, 268 TokenType.NUMMULTIRANGE, 269 TokenType.TSRANGE, 270 TokenType.TSMULTIRANGE, 271 TokenType.TSTZRANGE, 272 TokenType.TSTZMULTIRANGE, 273 TokenType.DATERANGE, 274 TokenType.DATEMULTIRANGE, 275 TokenType.DECIMAL, 276 TokenType.UDECIMAL, 277 TokenType.BIGDECIMAL, 278 TokenType.UUID, 279 TokenType.GEOGRAPHY, 280 TokenType.GEOMETRY, 281 TokenType.HLLSKETCH, 282 TokenType.HSTORE, 283 TokenType.PSEUDO_TYPE, 284 TokenType.SUPER, 285 TokenType.SERIAL, 286 TokenType.SMALLSERIAL, 287 TokenType.BIGSERIAL, 288 TokenType.XML, 289 TokenType.YEAR, 290 TokenType.UNIQUEIDENTIFIER, 291 TokenType.USERDEFINED, 292 TokenType.MONEY, 293 TokenType.SMALLMONEY, 294 TokenType.ROWVERSION, 295 TokenType.IMAGE, 296 TokenType.VARIANT, 297 TokenType.OBJECT, 298 TokenType.OBJECT_IDENTIFIER, 299 TokenType.INET, 300 TokenType.IPADDRESS, 301 TokenType.IPPREFIX, 302 TokenType.IPV4, 303 TokenType.IPV6, 304 TokenType.UNKNOWN, 305 TokenType.NULL, 306 TokenType.NAME, 307 TokenType.TDIGEST, 308 *ENUM_TYPE_TOKENS, 309 *NESTED_TYPE_TOKENS, 310 *AGGREGATE_TYPE_TOKENS, 311 } 312 313 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 314 TokenType.BIGINT: TokenType.UBIGINT, 315 TokenType.INT: TokenType.UINT, 316 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 317 TokenType.SMALLINT: TokenType.USMALLINT, 318 TokenType.TINYINT: TokenType.UTINYINT, 319 TokenType.DECIMAL: TokenType.UDECIMAL, 320 } 321 322 SUBQUERY_PREDICATES = { 323 TokenType.ANY: exp.Any, 324 TokenType.ALL: exp.All, 325 TokenType.EXISTS: exp.Exists, 326 TokenType.SOME: exp.Any, 327 } 328 329 RESERVED_TOKENS = { 330 *Tokenizer.SINGLE_TOKENS.values(), 331 TokenType.SELECT, 332 } - {TokenType.IDENTIFIER} 333 334 DB_CREATABLES = { 335 TokenType.DATABASE, 336 TokenType.DICTIONARY, 337 TokenType.MODEL, 338 TokenType.SCHEMA, 339 TokenType.SEQUENCE, 340 TokenType.STORAGE_INTEGRATION, 341 TokenType.TABLE, 342 TokenType.TAG, 343 TokenType.VIEW, 344 TokenType.WAREHOUSE, 345 TokenType.STREAMLIT, 346 } 347 348 CREATABLES = { 349 TokenType.COLUMN, 350 TokenType.CONSTRAINT, 351 TokenType.FOREIGN_KEY, 352 TokenType.FUNCTION, 353 TokenType.INDEX, 354 TokenType.PROCEDURE, 355 *DB_CREATABLES, 356 } 357 358 # Tokens that can represent identifiers 359 ID_VAR_TOKENS = { 360 TokenType.VAR, 361 TokenType.ANTI, 362 TokenType.APPLY, 363 TokenType.ASC, 364 TokenType.ASOF, 365 TokenType.AUTO_INCREMENT, 366 TokenType.BEGIN, 367 TokenType.BPCHAR, 368 TokenType.CACHE, 369 TokenType.CASE, 370 TokenType.COLLATE, 371 TokenType.COMMAND, 372 TokenType.COMMENT, 373 TokenType.COMMIT, 374 TokenType.CONSTRAINT, 375 TokenType.COPY, 376 TokenType.DEFAULT, 377 TokenType.DELETE, 378 TokenType.DESC, 379 TokenType.DESCRIBE, 380 TokenType.DICTIONARY, 381 TokenType.DIV, 382 TokenType.END, 383 TokenType.EXECUTE, 384 TokenType.ESCAPE, 385 TokenType.FALSE, 386 TokenType.FIRST, 387 TokenType.FILTER, 388 TokenType.FINAL, 389 TokenType.FORMAT, 390 TokenType.FULL, 391 TokenType.IDENTIFIER, 392 TokenType.IS, 393 TokenType.ISNULL, 394 TokenType.INTERVAL, 395 TokenType.KEEP, 396 TokenType.KILL, 397 TokenType.LEFT, 398 TokenType.LOAD, 399 TokenType.MERGE, 400 TokenType.NATURAL, 401 TokenType.NEXT, 402 TokenType.OFFSET, 403 TokenType.OPERATOR, 404 TokenType.ORDINALITY, 405 TokenType.OVERLAPS, 406 TokenType.OVERWRITE, 407 TokenType.PARTITION, 408 TokenType.PERCENT, 409 TokenType.PIVOT, 410 TokenType.PRAGMA, 411 TokenType.RANGE, 412 TokenType.RECURSIVE, 413 TokenType.REFERENCES, 414 TokenType.REFRESH, 415 TokenType.REPLACE, 416 TokenType.RIGHT, 417 TokenType.ROLLUP, 418 TokenType.ROW, 419 TokenType.ROWS, 420 TokenType.SEMI, 421 TokenType.SET, 422 TokenType.SETTINGS, 423 TokenType.SHOW, 424 TokenType.TEMPORARY, 425 TokenType.TOP, 426 TokenType.TRUE, 427 TokenType.TRUNCATE, 428 TokenType.UNIQUE, 429 TokenType.UNNEST, 430 TokenType.UNPIVOT, 431 TokenType.UPDATE, 432 TokenType.USE, 433 TokenType.VOLATILE, 434 TokenType.WINDOW, 435 *CREATABLES, 436 *SUBQUERY_PREDICATES, 437 *TYPE_TOKENS, 438 *NO_PAREN_FUNCTIONS, 439 } 440 441 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 442 443 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 444 TokenType.ANTI, 445 TokenType.APPLY, 446 TokenType.ASOF, 447 TokenType.FULL, 448 TokenType.LEFT, 449 TokenType.LOCK, 450 TokenType.NATURAL, 451 TokenType.OFFSET, 452 TokenType.RIGHT, 453 TokenType.SEMI, 454 TokenType.WINDOW, 455 } 456 457 ALIAS_TOKENS = ID_VAR_TOKENS 458 459 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 460 461 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 462 463 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 464 465 FUNC_TOKENS = { 466 TokenType.COLLATE, 467 TokenType.COMMAND, 468 TokenType.CURRENT_DATE, 469 TokenType.CURRENT_DATETIME, 470 TokenType.CURRENT_TIMESTAMP, 471 TokenType.CURRENT_TIME, 472 TokenType.CURRENT_USER, 473 TokenType.FILTER, 474 TokenType.FIRST, 475 TokenType.FORMAT, 476 TokenType.GLOB, 477 TokenType.IDENTIFIER, 478 TokenType.INDEX, 479 TokenType.ISNULL, 480 TokenType.ILIKE, 481 TokenType.INSERT, 482 TokenType.LIKE, 483 TokenType.MERGE, 484 TokenType.OFFSET, 485 TokenType.PRIMARY_KEY, 486 TokenType.RANGE, 487 TokenType.REPLACE, 488 TokenType.RLIKE, 489 TokenType.ROW, 490 TokenType.UNNEST, 491 TokenType.VAR, 492 TokenType.LEFT, 493 TokenType.RIGHT, 494 TokenType.SEQUENCE, 495 TokenType.DATE, 496 TokenType.DATETIME, 497 TokenType.TABLE, 498 TokenType.TIMESTAMP, 499 TokenType.TIMESTAMPTZ, 500 TokenType.TRUNCATE, 501 TokenType.WINDOW, 502 TokenType.XOR, 503 *TYPE_TOKENS, 504 *SUBQUERY_PREDICATES, 505 } 506 507 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 508 TokenType.AND: exp.And, 509 } 510 511 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 512 TokenType.COLON_EQ: exp.PropertyEQ, 513 } 514 515 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 516 TokenType.OR: exp.Or, 517 } 518 519 EQUALITY = { 520 TokenType.EQ: exp.EQ, 521 TokenType.NEQ: exp.NEQ, 522 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 523 } 524 525 COMPARISON = { 526 TokenType.GT: exp.GT, 527 TokenType.GTE: exp.GTE, 528 TokenType.LT: exp.LT, 529 TokenType.LTE: exp.LTE, 530 } 531 532 BITWISE = { 533 TokenType.AMP: exp.BitwiseAnd, 534 TokenType.CARET: exp.BitwiseXor, 535 TokenType.PIPE: exp.BitwiseOr, 536 } 537 538 TERM = { 539 TokenType.DASH: exp.Sub, 540 TokenType.PLUS: exp.Add, 541 TokenType.MOD: exp.Mod, 542 TokenType.COLLATE: exp.Collate, 543 } 544 545 FACTOR = { 546 TokenType.DIV: exp.IntDiv, 547 TokenType.LR_ARROW: exp.Distance, 548 TokenType.SLASH: exp.Div, 549 TokenType.STAR: exp.Mul, 550 } 551 552 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 553 554 TIMES = { 555 TokenType.TIME, 556 TokenType.TIMETZ, 557 } 558 559 TIMESTAMPS = { 560 TokenType.TIMESTAMP, 561 TokenType.TIMESTAMPTZ, 562 TokenType.TIMESTAMPLTZ, 563 *TIMES, 564 } 565 566 SET_OPERATIONS = { 567 TokenType.UNION, 568 TokenType.INTERSECT, 569 TokenType.EXCEPT, 570 } 571 572 JOIN_METHODS = { 573 TokenType.ASOF, 574 TokenType.NATURAL, 575 TokenType.POSITIONAL, 576 } 577 578 JOIN_SIDES = { 579 TokenType.LEFT, 580 TokenType.RIGHT, 581 TokenType.FULL, 582 } 583 584 JOIN_KINDS = { 585 TokenType.INNER, 586 TokenType.OUTER, 587 TokenType.CROSS, 588 TokenType.SEMI, 589 TokenType.ANTI, 590 } 591 592 JOIN_HINTS: t.Set[str] = set() 593 594 LAMBDAS = { 595 TokenType.ARROW: lambda self, expressions: self.expression( 596 exp.Lambda, 597 this=self._replace_lambda( 598 self._parse_assignment(), 599 expressions, 600 ), 601 expressions=expressions, 602 ), 603 TokenType.FARROW: lambda self, expressions: self.expression( 604 exp.Kwarg, 605 this=exp.var(expressions[0].name), 606 expression=self._parse_assignment(), 607 ), 608 } 609 610 COLUMN_OPERATORS = { 611 TokenType.DOT: None, 612 TokenType.DCOLON: lambda self, this, to: self.expression( 613 exp.Cast if self.STRICT_CAST else exp.TryCast, 614 this=this, 615 to=to, 616 ), 617 TokenType.ARROW: lambda self, this, path: self.expression( 618 exp.JSONExtract, 619 this=this, 620 expression=self.dialect.to_json_path(path), 621 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 622 ), 623 TokenType.DARROW: lambda self, this, path: self.expression( 624 exp.JSONExtractScalar, 625 this=this, 626 expression=self.dialect.to_json_path(path), 627 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 628 ), 629 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 630 exp.JSONBExtract, 631 this=this, 632 expression=path, 633 ), 634 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 635 exp.JSONBExtractScalar, 636 this=this, 637 expression=path, 638 ), 639 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 640 exp.JSONBContains, 641 this=this, 642 expression=key, 643 ), 644 } 645 646 EXPRESSION_PARSERS = { 647 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 648 exp.Column: lambda self: self._parse_column(), 649 exp.Condition: lambda self: self._parse_assignment(), 650 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 651 exp.Expression: lambda self: self._parse_expression(), 652 exp.From: lambda self: self._parse_from(joins=True), 653 exp.Group: lambda self: self._parse_group(), 654 exp.Having: lambda self: self._parse_having(), 655 exp.Identifier: lambda self: self._parse_id_var(), 656 exp.Join: lambda self: self._parse_join(), 657 exp.Lambda: lambda self: self._parse_lambda(), 658 exp.Lateral: lambda self: self._parse_lateral(), 659 exp.Limit: lambda self: self._parse_limit(), 660 exp.Offset: lambda self: self._parse_offset(), 661 exp.Order: lambda self: self._parse_order(), 662 exp.Ordered: lambda self: self._parse_ordered(), 663 exp.Properties: lambda self: self._parse_properties(), 664 exp.Qualify: lambda self: self._parse_qualify(), 665 exp.Returning: lambda self: self._parse_returning(), 666 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 667 exp.Table: lambda self: self._parse_table_parts(), 668 exp.TableAlias: lambda self: self._parse_table_alias(), 669 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 670 exp.Where: lambda self: self._parse_where(), 671 exp.Window: lambda self: self._parse_named_window(), 672 exp.With: lambda self: self._parse_with(), 673 "JOIN_TYPE": lambda self: self._parse_join_parts(), 674 } 675 676 STATEMENT_PARSERS = { 677 TokenType.ALTER: lambda self: self._parse_alter(), 678 TokenType.BEGIN: lambda self: self._parse_transaction(), 679 TokenType.CACHE: lambda self: self._parse_cache(), 680 TokenType.COMMENT: lambda self: self._parse_comment(), 681 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 682 TokenType.COPY: lambda self: self._parse_copy(), 683 TokenType.CREATE: lambda self: self._parse_create(), 684 TokenType.DELETE: lambda self: self._parse_delete(), 685 TokenType.DESC: lambda self: self._parse_describe(), 686 TokenType.DESCRIBE: lambda self: self._parse_describe(), 687 TokenType.DROP: lambda self: self._parse_drop(), 688 TokenType.INSERT: lambda self: self._parse_insert(), 689 TokenType.KILL: lambda self: self._parse_kill(), 690 TokenType.LOAD: lambda self: self._parse_load(), 691 TokenType.MERGE: lambda self: self._parse_merge(), 692 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 693 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 694 TokenType.REFRESH: lambda self: self._parse_refresh(), 695 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 696 TokenType.SET: lambda self: self._parse_set(), 697 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 698 TokenType.UNCACHE: lambda self: self._parse_uncache(), 699 TokenType.UPDATE: lambda self: self._parse_update(), 700 TokenType.USE: lambda self: self.expression( 701 exp.Use, 702 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 703 this=self._parse_table(schema=False), 704 ), 705 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 706 } 707 708 UNARY_PARSERS = { 709 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 710 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 711 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 712 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 713 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 714 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 715 } 716 717 STRING_PARSERS = { 718 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 719 exp.RawString, this=token.text 720 ), 721 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 722 exp.National, this=token.text 723 ), 724 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 725 TokenType.STRING: lambda self, token: self.expression( 726 exp.Literal, this=token.text, is_string=True 727 ), 728 TokenType.UNICODE_STRING: lambda self, token: self.expression( 729 exp.UnicodeString, 730 this=token.text, 731 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 732 ), 733 } 734 735 NUMERIC_PARSERS = { 736 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 737 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 738 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 739 TokenType.NUMBER: lambda self, token: self.expression( 740 exp.Literal, this=token.text, is_string=False 741 ), 742 } 743 744 PRIMARY_PARSERS = { 745 **STRING_PARSERS, 746 **NUMERIC_PARSERS, 747 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 748 TokenType.NULL: lambda self, _: self.expression(exp.Null), 749 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 750 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 751 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 752 TokenType.STAR: lambda self, _: self.expression( 753 exp.Star, 754 **{ 755 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 756 "replace": self._parse_star_op("REPLACE"), 757 "rename": self._parse_star_op("RENAME"), 758 }, 759 ), 760 } 761 762 PLACEHOLDER_PARSERS = { 763 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 764 TokenType.PARAMETER: lambda self: self._parse_parameter(), 765 TokenType.COLON: lambda self: ( 766 self.expression(exp.Placeholder, this=self._prev.text) 767 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 768 else None 769 ), 770 } 771 772 RANGE_PARSERS = { 773 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 774 TokenType.GLOB: binary_range_parser(exp.Glob), 775 TokenType.ILIKE: binary_range_parser(exp.ILike), 776 TokenType.IN: lambda self, this: self._parse_in(this), 777 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 778 TokenType.IS: lambda self, this: self._parse_is(this), 779 TokenType.LIKE: binary_range_parser(exp.Like), 780 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 781 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 782 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 783 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 784 } 785 786 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 787 "ALLOWED_VALUES": lambda self: self.expression( 788 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 789 ), 790 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 791 "AUTO": lambda self: self._parse_auto_property(), 792 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 793 "BACKUP": lambda self: self.expression( 794 exp.BackupProperty, this=self._parse_var(any_token=True) 795 ), 796 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 797 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 798 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 799 "CHECKSUM": lambda self: self._parse_checksum(), 800 "CLUSTER BY": lambda self: self._parse_cluster(), 801 "CLUSTERED": lambda self: self._parse_clustered_by(), 802 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 803 exp.CollateProperty, **kwargs 804 ), 805 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 806 "CONTAINS": lambda self: self._parse_contains_property(), 807 "COPY": lambda self: self._parse_copy_property(), 808 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 809 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 810 "DEFINER": lambda self: self._parse_definer(), 811 "DETERMINISTIC": lambda self: self.expression( 812 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 813 ), 814 "DISTKEY": lambda self: self._parse_distkey(), 815 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 816 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 817 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 818 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 819 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 820 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 821 "FREESPACE": lambda self: self._parse_freespace(), 822 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 823 "HEAP": lambda self: self.expression(exp.HeapProperty), 824 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 825 "IMMUTABLE": lambda self: self.expression( 826 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 827 ), 828 "INHERITS": lambda self: self.expression( 829 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 830 ), 831 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 832 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 833 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 834 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 835 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 836 "LIKE": lambda self: self._parse_create_like(), 837 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 838 "LOCK": lambda self: self._parse_locking(), 839 "LOCKING": lambda self: self._parse_locking(), 840 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 841 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 842 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 843 "MODIFIES": lambda self: self._parse_modifies_property(), 844 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 845 "NO": lambda self: self._parse_no_property(), 846 "ON": lambda self: self._parse_on_property(), 847 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 848 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 849 "PARTITION": lambda self: self._parse_partitioned_of(), 850 "PARTITION BY": lambda self: self._parse_partitioned_by(), 851 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 852 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 853 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 854 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 855 "READS": lambda self: self._parse_reads_property(), 856 "REMOTE": lambda self: self._parse_remote_with_connection(), 857 "RETURNS": lambda self: self._parse_returns(), 858 "STRICT": lambda self: self.expression(exp.StrictProperty), 859 "ROW": lambda self: self._parse_row(), 860 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 861 "SAMPLE": lambda self: self.expression( 862 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 863 ), 864 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 865 "SETTINGS": lambda self: self.expression( 866 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 867 ), 868 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 869 "SORTKEY": lambda self: self._parse_sortkey(), 870 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 871 "STABLE": lambda self: self.expression( 872 exp.StabilityProperty, this=exp.Literal.string("STABLE") 873 ), 874 "STORED": lambda self: self._parse_stored(), 875 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 876 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 877 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 878 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 879 "TO": lambda self: self._parse_to_table(), 880 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 881 "TRANSFORM": lambda self: self.expression( 882 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 883 ), 884 "TTL": lambda self: self._parse_ttl(), 885 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 886 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 887 "VOLATILE": lambda self: self._parse_volatile_property(), 888 "WITH": lambda self: self._parse_with_property(), 889 } 890 891 CONSTRAINT_PARSERS = { 892 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 893 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 894 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 895 "CHARACTER SET": lambda self: self.expression( 896 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 897 ), 898 "CHECK": lambda self: self.expression( 899 exp.CheckColumnConstraint, 900 this=self._parse_wrapped(self._parse_assignment), 901 enforced=self._match_text_seq("ENFORCED"), 902 ), 903 "COLLATE": lambda self: self.expression( 904 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 905 ), 906 "COMMENT": lambda self: self.expression( 907 exp.CommentColumnConstraint, this=self._parse_string() 908 ), 909 "COMPRESS": lambda self: self._parse_compress(), 910 "CLUSTERED": lambda self: self.expression( 911 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 912 ), 913 "NONCLUSTERED": lambda self: self.expression( 914 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 915 ), 916 "DEFAULT": lambda self: self.expression( 917 exp.DefaultColumnConstraint, this=self._parse_bitwise() 918 ), 919 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 920 "EPHEMERAL": lambda self: self.expression( 921 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 922 ), 923 "EXCLUDE": lambda self: self.expression( 924 exp.ExcludeColumnConstraint, this=self._parse_index_params() 925 ), 926 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 927 "FORMAT": lambda self: self.expression( 928 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 929 ), 930 "GENERATED": lambda self: self._parse_generated_as_identity(), 931 "IDENTITY": lambda self: self._parse_auto_increment(), 932 "INLINE": lambda self: self._parse_inline(), 933 "LIKE": lambda self: self._parse_create_like(), 934 "NOT": lambda self: self._parse_not_constraint(), 935 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 936 "ON": lambda self: ( 937 self._match(TokenType.UPDATE) 938 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 939 ) 940 or self.expression(exp.OnProperty, this=self._parse_id_var()), 941 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 942 "PERIOD": lambda self: self._parse_period_for_system_time(), 943 "PRIMARY KEY": lambda self: self._parse_primary_key(), 944 "REFERENCES": lambda self: self._parse_references(match=False), 945 "TITLE": lambda self: self.expression( 946 exp.TitleColumnConstraint, this=self._parse_var_or_string() 947 ), 948 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 949 "UNIQUE": lambda self: self._parse_unique(), 950 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 951 "WITH": lambda self: self.expression( 952 exp.Properties, expressions=self._parse_wrapped_properties() 953 ), 954 } 955 956 ALTER_PARSERS = { 957 "ADD": lambda self: self._parse_alter_table_add(), 958 "ALTER": lambda self: self._parse_alter_table_alter(), 959 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 960 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 961 "DROP": lambda self: self._parse_alter_table_drop(), 962 "RENAME": lambda self: self._parse_alter_table_rename(), 963 "SET": lambda self: self._parse_alter_table_set(), 964 } 965 966 ALTER_ALTER_PARSERS = { 967 "DISTKEY": lambda self: self._parse_alter_diststyle(), 968 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 969 "SORTKEY": lambda self: self._parse_alter_sortkey(), 970 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 971 } 972 973 SCHEMA_UNNAMED_CONSTRAINTS = { 974 "CHECK", 975 "EXCLUDE", 976 "FOREIGN KEY", 977 "LIKE", 978 "PERIOD", 979 "PRIMARY KEY", 980 "UNIQUE", 981 } 982 983 NO_PAREN_FUNCTION_PARSERS = { 984 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 985 "CASE": lambda self: self._parse_case(), 986 "IF": lambda self: self._parse_if(), 987 "NEXT": lambda self: self._parse_next_value_for(), 988 } 989 990 INVALID_FUNC_NAME_TOKENS = { 991 TokenType.IDENTIFIER, 992 TokenType.STRING, 993 } 994 995 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 996 997 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 998 999 FUNCTION_PARSERS = { 1000 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1001 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1002 "DECODE": lambda self: self._parse_decode(), 1003 "EXTRACT": lambda self: self._parse_extract(), 1004 "JSON_OBJECT": lambda self: self._parse_json_object(), 1005 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1006 "JSON_TABLE": lambda self: self._parse_json_table(), 1007 "MATCH": lambda self: self._parse_match_against(), 1008 "OPENJSON": lambda self: self._parse_open_json(), 1009 "POSITION": lambda self: self._parse_position(), 1010 "PREDICT": lambda self: self._parse_predict(), 1011 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1012 "STRING_AGG": lambda self: self._parse_string_agg(), 1013 "SUBSTRING": lambda self: self._parse_substring(), 1014 "TRIM": lambda self: self._parse_trim(), 1015 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1016 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1017 } 1018 1019 QUERY_MODIFIER_PARSERS = { 1020 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1021 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1022 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1023 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1024 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1025 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1026 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1027 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1028 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1029 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1030 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1031 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1032 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1033 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1034 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1035 TokenType.CLUSTER_BY: lambda self: ( 1036 "cluster", 1037 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1038 ), 1039 TokenType.DISTRIBUTE_BY: lambda self: ( 1040 "distribute", 1041 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1042 ), 1043 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1044 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1045 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1046 } 1047 1048 SET_PARSERS = { 1049 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1050 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1051 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1052 "TRANSACTION": lambda self: self._parse_set_transaction(), 1053 } 1054 1055 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1056 1057 TYPE_LITERAL_PARSERS = { 1058 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1059 } 1060 1061 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1062 1063 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1064 1065 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1066 1067 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1068 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1069 "ISOLATION": ( 1070 ("LEVEL", "REPEATABLE", "READ"), 1071 ("LEVEL", "READ", "COMMITTED"), 1072 ("LEVEL", "READ", "UNCOMITTED"), 1073 ("LEVEL", "SERIALIZABLE"), 1074 ), 1075 "READ": ("WRITE", "ONLY"), 1076 } 1077 1078 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1079 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1080 ) 1081 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1082 1083 CREATE_SEQUENCE: OPTIONS_TYPE = { 1084 "SCALE": ("EXTEND", "NOEXTEND"), 1085 "SHARD": ("EXTEND", "NOEXTEND"), 1086 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1087 **dict.fromkeys( 1088 ( 1089 "SESSION", 1090 "GLOBAL", 1091 "KEEP", 1092 "NOKEEP", 1093 "ORDER", 1094 "NOORDER", 1095 "NOCACHE", 1096 "CYCLE", 1097 "NOCYCLE", 1098 "NOMINVALUE", 1099 "NOMAXVALUE", 1100 "NOSCALE", 1101 "NOSHARD", 1102 ), 1103 tuple(), 1104 ), 1105 } 1106 1107 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1108 1109 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1110 1111 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1112 1113 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1114 1115 CLONE_KEYWORDS = {"CLONE", "COPY"} 1116 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1117 1118 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1119 1120 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1121 1122 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1123 1124 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1125 1126 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1127 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1128 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1129 1130 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1131 1132 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1133 1134 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1135 1136 DISTINCT_TOKENS = {TokenType.DISTINCT} 1137 1138 NULL_TOKENS = {TokenType.NULL} 1139 1140 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1141 1142 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1143 1144 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1145 1146 STRICT_CAST = True 1147 1148 PREFIXED_PIVOT_COLUMNS = False 1149 IDENTIFY_PIVOT_STRINGS = False 1150 1151 LOG_DEFAULTS_TO_LN = False 1152 1153 # Whether ADD is present for each column added by ALTER TABLE 1154 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1155 1156 # Whether the table sample clause expects CSV syntax 1157 TABLESAMPLE_CSV = False 1158 1159 # The default method used for table sampling 1160 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1161 1162 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1163 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1164 1165 # Whether the TRIM function expects the characters to trim as its first argument 1166 TRIM_PATTERN_FIRST = False 1167 1168 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1169 STRING_ALIASES = False 1170 1171 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1172 MODIFIERS_ATTACHED_TO_UNION = True 1173 UNION_MODIFIERS = {"order", "limit", "offset"} 1174 1175 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1176 NO_PAREN_IF_COMMANDS = True 1177 1178 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1179 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1180 1181 # Whether the `:` operator is used to extract a value from a JSON document 1182 COLON_IS_JSON_EXTRACT = False 1183 1184 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1185 # If this is True and '(' is not found, the keyword will be treated as an identifier 1186 VALUES_FOLLOWED_BY_PAREN = True 1187 1188 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1189 SUPPORTS_IMPLICIT_UNNEST = False 1190 1191 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1192 INTERVAL_SPANS = True 1193 1194 # Whether a PARTITION clause can follow a table reference 1195 SUPPORTS_PARTITION_SELECTION = False 1196 1197 __slots__ = ( 1198 "error_level", 1199 "error_message_context", 1200 "max_errors", 1201 "dialect", 1202 "sql", 1203 "errors", 1204 "_tokens", 1205 "_index", 1206 "_curr", 1207 "_next", 1208 "_prev", 1209 "_prev_comments", 1210 ) 1211 1212 # Autofilled 1213 SHOW_TRIE: t.Dict = {} 1214 SET_TRIE: t.Dict = {} 1215 1216 def __init__( 1217 self, 1218 error_level: t.Optional[ErrorLevel] = None, 1219 error_message_context: int = 100, 1220 max_errors: int = 3, 1221 dialect: DialectType = None, 1222 ): 1223 from sqlglot.dialects import Dialect 1224 1225 self.error_level = error_level or ErrorLevel.IMMEDIATE 1226 self.error_message_context = error_message_context 1227 self.max_errors = max_errors 1228 self.dialect = Dialect.get_or_raise(dialect) 1229 self.reset() 1230 1231 def reset(self): 1232 self.sql = "" 1233 self.errors = [] 1234 self._tokens = [] 1235 self._index = 0 1236 self._curr = None 1237 self._next = None 1238 self._prev = None 1239 self._prev_comments = None 1240 1241 def parse( 1242 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1243 ) -> t.List[t.Optional[exp.Expression]]: 1244 """ 1245 Parses a list of tokens and returns a list of syntax trees, one tree 1246 per parsed SQL statement. 1247 1248 Args: 1249 raw_tokens: The list of tokens. 1250 sql: The original SQL string, used to produce helpful debug messages. 1251 1252 Returns: 1253 The list of the produced syntax trees. 1254 """ 1255 return self._parse( 1256 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1257 ) 1258 1259 def parse_into( 1260 self, 1261 expression_types: exp.IntoType, 1262 raw_tokens: t.List[Token], 1263 sql: t.Optional[str] = None, 1264 ) -> t.List[t.Optional[exp.Expression]]: 1265 """ 1266 Parses a list of tokens into a given Expression type. If a collection of Expression 1267 types is given instead, this method will try to parse the token list into each one 1268 of them, stopping at the first for which the parsing succeeds. 1269 1270 Args: 1271 expression_types: The expression type(s) to try and parse the token list into. 1272 raw_tokens: The list of tokens. 1273 sql: The original SQL string, used to produce helpful debug messages. 1274 1275 Returns: 1276 The target Expression. 1277 """ 1278 errors = [] 1279 for expression_type in ensure_list(expression_types): 1280 parser = self.EXPRESSION_PARSERS.get(expression_type) 1281 if not parser: 1282 raise TypeError(f"No parser registered for {expression_type}") 1283 1284 try: 1285 return self._parse(parser, raw_tokens, sql) 1286 except ParseError as e: 1287 e.errors[0]["into_expression"] = expression_type 1288 errors.append(e) 1289 1290 raise ParseError( 1291 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1292 errors=merge_errors(errors), 1293 ) from errors[-1] 1294 1295 def _parse( 1296 self, 1297 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1298 raw_tokens: t.List[Token], 1299 sql: t.Optional[str] = None, 1300 ) -> t.List[t.Optional[exp.Expression]]: 1301 self.reset() 1302 self.sql = sql or "" 1303 1304 total = len(raw_tokens) 1305 chunks: t.List[t.List[Token]] = [[]] 1306 1307 for i, token in enumerate(raw_tokens): 1308 if token.token_type == TokenType.SEMICOLON: 1309 if token.comments: 1310 chunks.append([token]) 1311 1312 if i < total - 1: 1313 chunks.append([]) 1314 else: 1315 chunks[-1].append(token) 1316 1317 expressions = [] 1318 1319 for tokens in chunks: 1320 self._index = -1 1321 self._tokens = tokens 1322 self._advance() 1323 1324 expressions.append(parse_method(self)) 1325 1326 if self._index < len(self._tokens): 1327 self.raise_error("Invalid expression / Unexpected token") 1328 1329 self.check_errors() 1330 1331 return expressions 1332 1333 def check_errors(self) -> None: 1334 """Logs or raises any found errors, depending on the chosen error level setting.""" 1335 if self.error_level == ErrorLevel.WARN: 1336 for error in self.errors: 1337 logger.error(str(error)) 1338 elif self.error_level == ErrorLevel.RAISE and self.errors: 1339 raise ParseError( 1340 concat_messages(self.errors, self.max_errors), 1341 errors=merge_errors(self.errors), 1342 ) 1343 1344 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1345 """ 1346 Appends an error in the list of recorded errors or raises it, depending on the chosen 1347 error level setting. 1348 """ 1349 token = token or self._curr or self._prev or Token.string("") 1350 start = token.start 1351 end = token.end + 1 1352 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1353 highlight = self.sql[start:end] 1354 end_context = self.sql[end : end + self.error_message_context] 1355 1356 error = ParseError.new( 1357 f"{message}. Line {token.line}, Col: {token.col}.\n" 1358 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1359 description=message, 1360 line=token.line, 1361 col=token.col, 1362 start_context=start_context, 1363 highlight=highlight, 1364 end_context=end_context, 1365 ) 1366 1367 if self.error_level == ErrorLevel.IMMEDIATE: 1368 raise error 1369 1370 self.errors.append(error) 1371 1372 def expression( 1373 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1374 ) -> E: 1375 """ 1376 Creates a new, validated Expression. 1377 1378 Args: 1379 exp_class: The expression class to instantiate. 1380 comments: An optional list of comments to attach to the expression. 1381 kwargs: The arguments to set for the expression along with their respective values. 1382 1383 Returns: 1384 The target expression. 1385 """ 1386 instance = exp_class(**kwargs) 1387 instance.add_comments(comments) if comments else self._add_comments(instance) 1388 return self.validate_expression(instance) 1389 1390 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1391 if expression and self._prev_comments: 1392 expression.add_comments(self._prev_comments) 1393 self._prev_comments = None 1394 1395 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1396 """ 1397 Validates an Expression, making sure that all its mandatory arguments are set. 1398 1399 Args: 1400 expression: The expression to validate. 1401 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1402 1403 Returns: 1404 The validated expression. 1405 """ 1406 if self.error_level != ErrorLevel.IGNORE: 1407 for error_message in expression.error_messages(args): 1408 self.raise_error(error_message) 1409 1410 return expression 1411 1412 def _find_sql(self, start: Token, end: Token) -> str: 1413 return self.sql[start.start : end.end + 1] 1414 1415 def _is_connected(self) -> bool: 1416 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1417 1418 def _advance(self, times: int = 1) -> None: 1419 self._index += times 1420 self._curr = seq_get(self._tokens, self._index) 1421 self._next = seq_get(self._tokens, self._index + 1) 1422 1423 if self._index > 0: 1424 self._prev = self._tokens[self._index - 1] 1425 self._prev_comments = self._prev.comments 1426 else: 1427 self._prev = None 1428 self._prev_comments = None 1429 1430 def _retreat(self, index: int) -> None: 1431 if index != self._index: 1432 self._advance(index - self._index) 1433 1434 def _warn_unsupported(self) -> None: 1435 if len(self._tokens) <= 1: 1436 return 1437 1438 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1439 # interested in emitting a warning for the one being currently processed. 1440 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1441 1442 logger.warning( 1443 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1444 ) 1445 1446 def _parse_command(self) -> exp.Command: 1447 self._warn_unsupported() 1448 return self.expression( 1449 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1450 ) 1451 1452 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1453 """ 1454 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1455 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1456 the parser state accordingly 1457 """ 1458 index = self._index 1459 error_level = self.error_level 1460 1461 self.error_level = ErrorLevel.IMMEDIATE 1462 try: 1463 this = parse_method() 1464 except ParseError: 1465 this = None 1466 finally: 1467 if not this or retreat: 1468 self._retreat(index) 1469 self.error_level = error_level 1470 1471 return this 1472 1473 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1474 start = self._prev 1475 exists = self._parse_exists() if allow_exists else None 1476 1477 self._match(TokenType.ON) 1478 1479 materialized = self._match_text_seq("MATERIALIZED") 1480 kind = self._match_set(self.CREATABLES) and self._prev 1481 if not kind: 1482 return self._parse_as_command(start) 1483 1484 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1485 this = self._parse_user_defined_function(kind=kind.token_type) 1486 elif kind.token_type == TokenType.TABLE: 1487 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1488 elif kind.token_type == TokenType.COLUMN: 1489 this = self._parse_column() 1490 else: 1491 this = self._parse_id_var() 1492 1493 self._match(TokenType.IS) 1494 1495 return self.expression( 1496 exp.Comment, 1497 this=this, 1498 kind=kind.text, 1499 expression=self._parse_string(), 1500 exists=exists, 1501 materialized=materialized, 1502 ) 1503 1504 def _parse_to_table( 1505 self, 1506 ) -> exp.ToTableProperty: 1507 table = self._parse_table_parts(schema=True) 1508 return self.expression(exp.ToTableProperty, this=table) 1509 1510 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1511 def _parse_ttl(self) -> exp.Expression: 1512 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1513 this = self._parse_bitwise() 1514 1515 if self._match_text_seq("DELETE"): 1516 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1517 if self._match_text_seq("RECOMPRESS"): 1518 return self.expression( 1519 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1520 ) 1521 if self._match_text_seq("TO", "DISK"): 1522 return self.expression( 1523 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1524 ) 1525 if self._match_text_seq("TO", "VOLUME"): 1526 return self.expression( 1527 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1528 ) 1529 1530 return this 1531 1532 expressions = self._parse_csv(_parse_ttl_action) 1533 where = self._parse_where() 1534 group = self._parse_group() 1535 1536 aggregates = None 1537 if group and self._match(TokenType.SET): 1538 aggregates = self._parse_csv(self._parse_set_item) 1539 1540 return self.expression( 1541 exp.MergeTreeTTL, 1542 expressions=expressions, 1543 where=where, 1544 group=group, 1545 aggregates=aggregates, 1546 ) 1547 1548 def _parse_statement(self) -> t.Optional[exp.Expression]: 1549 if self._curr is None: 1550 return None 1551 1552 if self._match_set(self.STATEMENT_PARSERS): 1553 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1554 1555 if self._match_set(self.dialect.tokenizer.COMMANDS): 1556 return self._parse_command() 1557 1558 expression = self._parse_expression() 1559 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1560 return self._parse_query_modifiers(expression) 1561 1562 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1563 start = self._prev 1564 temporary = self._match(TokenType.TEMPORARY) 1565 materialized = self._match_text_seq("MATERIALIZED") 1566 1567 kind = self._match_set(self.CREATABLES) and self._prev.text 1568 if not kind: 1569 return self._parse_as_command(start) 1570 1571 if_exists = exists or self._parse_exists() 1572 table = self._parse_table_parts( 1573 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1574 ) 1575 1576 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1577 1578 if self._match(TokenType.L_PAREN, advance=False): 1579 expressions = self._parse_wrapped_csv(self._parse_types) 1580 else: 1581 expressions = None 1582 1583 return self.expression( 1584 exp.Drop, 1585 comments=start.comments, 1586 exists=if_exists, 1587 this=table, 1588 expressions=expressions, 1589 kind=kind.upper(), 1590 temporary=temporary, 1591 materialized=materialized, 1592 cascade=self._match_text_seq("CASCADE"), 1593 constraints=self._match_text_seq("CONSTRAINTS"), 1594 purge=self._match_text_seq("PURGE"), 1595 cluster=cluster, 1596 ) 1597 1598 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1599 return ( 1600 self._match_text_seq("IF") 1601 and (not not_ or self._match(TokenType.NOT)) 1602 and self._match(TokenType.EXISTS) 1603 ) 1604 1605 def _parse_create(self) -> exp.Create | exp.Command: 1606 # Note: this can't be None because we've matched a statement parser 1607 start = self._prev 1608 comments = self._prev_comments 1609 1610 replace = ( 1611 start.token_type == TokenType.REPLACE 1612 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1613 or self._match_pair(TokenType.OR, TokenType.ALTER) 1614 ) 1615 1616 unique = self._match(TokenType.UNIQUE) 1617 1618 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1619 self._advance() 1620 1621 properties = None 1622 create_token = self._match_set(self.CREATABLES) and self._prev 1623 1624 if not create_token: 1625 # exp.Properties.Location.POST_CREATE 1626 properties = self._parse_properties() 1627 create_token = self._match_set(self.CREATABLES) and self._prev 1628 1629 if not properties or not create_token: 1630 return self._parse_as_command(start) 1631 1632 exists = self._parse_exists(not_=True) 1633 this = None 1634 expression: t.Optional[exp.Expression] = None 1635 indexes = None 1636 no_schema_binding = None 1637 begin = None 1638 end = None 1639 clone = None 1640 1641 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1642 nonlocal properties 1643 if properties and temp_props: 1644 properties.expressions.extend(temp_props.expressions) 1645 elif temp_props: 1646 properties = temp_props 1647 1648 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1649 this = self._parse_user_defined_function(kind=create_token.token_type) 1650 1651 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1652 extend_props(self._parse_properties()) 1653 1654 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1655 extend_props(self._parse_properties()) 1656 1657 if not expression: 1658 if self._match(TokenType.COMMAND): 1659 expression = self._parse_as_command(self._prev) 1660 else: 1661 begin = self._match(TokenType.BEGIN) 1662 return_ = self._match_text_seq("RETURN") 1663 1664 if self._match(TokenType.STRING, advance=False): 1665 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1666 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1667 expression = self._parse_string() 1668 extend_props(self._parse_properties()) 1669 else: 1670 expression = self._parse_statement() 1671 1672 end = self._match_text_seq("END") 1673 1674 if return_: 1675 expression = self.expression(exp.Return, this=expression) 1676 elif create_token.token_type == TokenType.INDEX: 1677 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1678 if not self._match(TokenType.ON): 1679 index = self._parse_id_var() 1680 anonymous = False 1681 else: 1682 index = None 1683 anonymous = True 1684 1685 this = self._parse_index(index=index, anonymous=anonymous) 1686 elif create_token.token_type in self.DB_CREATABLES: 1687 table_parts = self._parse_table_parts( 1688 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1689 ) 1690 1691 # exp.Properties.Location.POST_NAME 1692 self._match(TokenType.COMMA) 1693 extend_props(self._parse_properties(before=True)) 1694 1695 this = self._parse_schema(this=table_parts) 1696 1697 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1698 extend_props(self._parse_properties()) 1699 1700 self._match(TokenType.ALIAS) 1701 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1702 # exp.Properties.Location.POST_ALIAS 1703 extend_props(self._parse_properties()) 1704 1705 if create_token.token_type == TokenType.SEQUENCE: 1706 expression = self._parse_types() 1707 extend_props(self._parse_properties()) 1708 else: 1709 expression = self._parse_ddl_select() 1710 1711 if create_token.token_type == TokenType.TABLE: 1712 # exp.Properties.Location.POST_EXPRESSION 1713 extend_props(self._parse_properties()) 1714 1715 indexes = [] 1716 while True: 1717 index = self._parse_index() 1718 1719 # exp.Properties.Location.POST_INDEX 1720 extend_props(self._parse_properties()) 1721 1722 if not index: 1723 break 1724 else: 1725 self._match(TokenType.COMMA) 1726 indexes.append(index) 1727 elif create_token.token_type == TokenType.VIEW: 1728 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1729 no_schema_binding = True 1730 1731 shallow = self._match_text_seq("SHALLOW") 1732 1733 if self._match_texts(self.CLONE_KEYWORDS): 1734 copy = self._prev.text.lower() == "copy" 1735 clone = self.expression( 1736 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1737 ) 1738 1739 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1740 return self._parse_as_command(start) 1741 1742 return self.expression( 1743 exp.Create, 1744 comments=comments, 1745 this=this, 1746 kind=create_token.text.upper(), 1747 replace=replace, 1748 unique=unique, 1749 expression=expression, 1750 exists=exists, 1751 properties=properties, 1752 indexes=indexes, 1753 no_schema_binding=no_schema_binding, 1754 begin=begin, 1755 end=end, 1756 clone=clone, 1757 ) 1758 1759 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1760 seq = exp.SequenceProperties() 1761 1762 options = [] 1763 index = self._index 1764 1765 while self._curr: 1766 self._match(TokenType.COMMA) 1767 if self._match_text_seq("INCREMENT"): 1768 self._match_text_seq("BY") 1769 self._match_text_seq("=") 1770 seq.set("increment", self._parse_term()) 1771 elif self._match_text_seq("MINVALUE"): 1772 seq.set("minvalue", self._parse_term()) 1773 elif self._match_text_seq("MAXVALUE"): 1774 seq.set("maxvalue", self._parse_term()) 1775 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1776 self._match_text_seq("=") 1777 seq.set("start", self._parse_term()) 1778 elif self._match_text_seq("CACHE"): 1779 # T-SQL allows empty CACHE which is initialized dynamically 1780 seq.set("cache", self._parse_number() or True) 1781 elif self._match_text_seq("OWNED", "BY"): 1782 # "OWNED BY NONE" is the default 1783 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1784 else: 1785 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1786 if opt: 1787 options.append(opt) 1788 else: 1789 break 1790 1791 seq.set("options", options if options else None) 1792 return None if self._index == index else seq 1793 1794 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1795 # only used for teradata currently 1796 self._match(TokenType.COMMA) 1797 1798 kwargs = { 1799 "no": self._match_text_seq("NO"), 1800 "dual": self._match_text_seq("DUAL"), 1801 "before": self._match_text_seq("BEFORE"), 1802 "default": self._match_text_seq("DEFAULT"), 1803 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1804 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1805 "after": self._match_text_seq("AFTER"), 1806 "minimum": self._match_texts(("MIN", "MINIMUM")), 1807 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1808 } 1809 1810 if self._match_texts(self.PROPERTY_PARSERS): 1811 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1812 try: 1813 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1814 except TypeError: 1815 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1816 1817 return None 1818 1819 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1820 return self._parse_wrapped_csv(self._parse_property) 1821 1822 def _parse_property(self) -> t.Optional[exp.Expression]: 1823 if self._match_texts(self.PROPERTY_PARSERS): 1824 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1825 1826 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1827 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1828 1829 if self._match_text_seq("COMPOUND", "SORTKEY"): 1830 return self._parse_sortkey(compound=True) 1831 1832 if self._match_text_seq("SQL", "SECURITY"): 1833 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1834 1835 index = self._index 1836 key = self._parse_column() 1837 1838 if not self._match(TokenType.EQ): 1839 self._retreat(index) 1840 return self._parse_sequence_properties() 1841 1842 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1843 if isinstance(key, exp.Column): 1844 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1845 1846 value = self._parse_bitwise() or self._parse_var(any_token=True) 1847 1848 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1849 if isinstance(value, exp.Column): 1850 value = exp.var(value.name) 1851 1852 return self.expression(exp.Property, this=key, value=value) 1853 1854 def _parse_stored(self) -> exp.FileFormatProperty: 1855 self._match(TokenType.ALIAS) 1856 1857 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1858 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1859 1860 return self.expression( 1861 exp.FileFormatProperty, 1862 this=( 1863 self.expression( 1864 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1865 ) 1866 if input_format or output_format 1867 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1868 ), 1869 ) 1870 1871 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1872 field = self._parse_field() 1873 if isinstance(field, exp.Identifier) and not field.quoted: 1874 field = exp.var(field) 1875 1876 return field 1877 1878 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1879 self._match(TokenType.EQ) 1880 self._match(TokenType.ALIAS) 1881 1882 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1883 1884 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1885 properties = [] 1886 while True: 1887 if before: 1888 prop = self._parse_property_before() 1889 else: 1890 prop = self._parse_property() 1891 if not prop: 1892 break 1893 for p in ensure_list(prop): 1894 properties.append(p) 1895 1896 if properties: 1897 return self.expression(exp.Properties, expressions=properties) 1898 1899 return None 1900 1901 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1902 return self.expression( 1903 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1904 ) 1905 1906 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1907 if self._index >= 2: 1908 pre_volatile_token = self._tokens[self._index - 2] 1909 else: 1910 pre_volatile_token = None 1911 1912 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1913 return exp.VolatileProperty() 1914 1915 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1916 1917 def _parse_retention_period(self) -> exp.Var: 1918 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1919 number = self._parse_number() 1920 number_str = f"{number} " if number else "" 1921 unit = self._parse_var(any_token=True) 1922 return exp.var(f"{number_str}{unit}") 1923 1924 def _parse_system_versioning_property( 1925 self, with_: bool = False 1926 ) -> exp.WithSystemVersioningProperty: 1927 self._match(TokenType.EQ) 1928 prop = self.expression( 1929 exp.WithSystemVersioningProperty, 1930 **{ # type: ignore 1931 "on": True, 1932 "with": with_, 1933 }, 1934 ) 1935 1936 if self._match_text_seq("OFF"): 1937 prop.set("on", False) 1938 return prop 1939 1940 self._match(TokenType.ON) 1941 if self._match(TokenType.L_PAREN): 1942 while self._curr and not self._match(TokenType.R_PAREN): 1943 if self._match_text_seq("HISTORY_TABLE", "="): 1944 prop.set("this", self._parse_table_parts()) 1945 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1946 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1947 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1948 prop.set("retention_period", self._parse_retention_period()) 1949 1950 self._match(TokenType.COMMA) 1951 1952 return prop 1953 1954 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1955 self._match(TokenType.EQ) 1956 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1957 prop = self.expression(exp.DataDeletionProperty, on=on) 1958 1959 if self._match(TokenType.L_PAREN): 1960 while self._curr and not self._match(TokenType.R_PAREN): 1961 if self._match_text_seq("FILTER_COLUMN", "="): 1962 prop.set("filter_column", self._parse_column()) 1963 elif self._match_text_seq("RETENTION_PERIOD", "="): 1964 prop.set("retention_period", self._parse_retention_period()) 1965 1966 self._match(TokenType.COMMA) 1967 1968 return prop 1969 1970 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1971 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1972 prop = self._parse_system_versioning_property(with_=True) 1973 self._match_r_paren() 1974 return prop 1975 1976 if self._match(TokenType.L_PAREN, advance=False): 1977 return self._parse_wrapped_properties() 1978 1979 if self._match_text_seq("JOURNAL"): 1980 return self._parse_withjournaltable() 1981 1982 if self._match_texts(self.VIEW_ATTRIBUTES): 1983 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1984 1985 if self._match_text_seq("DATA"): 1986 return self._parse_withdata(no=False) 1987 elif self._match_text_seq("NO", "DATA"): 1988 return self._parse_withdata(no=True) 1989 1990 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1991 return self._parse_serde_properties(with_=True) 1992 1993 if not self._next: 1994 return None 1995 1996 return self._parse_withisolatedloading() 1997 1998 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1999 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2000 self._match(TokenType.EQ) 2001 2002 user = self._parse_id_var() 2003 self._match(TokenType.PARAMETER) 2004 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2005 2006 if not user or not host: 2007 return None 2008 2009 return exp.DefinerProperty(this=f"{user}@{host}") 2010 2011 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2012 self._match(TokenType.TABLE) 2013 self._match(TokenType.EQ) 2014 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2015 2016 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2017 return self.expression(exp.LogProperty, no=no) 2018 2019 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2020 return self.expression(exp.JournalProperty, **kwargs) 2021 2022 def _parse_checksum(self) -> exp.ChecksumProperty: 2023 self._match(TokenType.EQ) 2024 2025 on = None 2026 if self._match(TokenType.ON): 2027 on = True 2028 elif self._match_text_seq("OFF"): 2029 on = False 2030 2031 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2032 2033 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2034 return self.expression( 2035 exp.Cluster, 2036 expressions=( 2037 self._parse_wrapped_csv(self._parse_ordered) 2038 if wrapped 2039 else self._parse_csv(self._parse_ordered) 2040 ), 2041 ) 2042 2043 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2044 self._match_text_seq("BY") 2045 2046 self._match_l_paren() 2047 expressions = self._parse_csv(self._parse_column) 2048 self._match_r_paren() 2049 2050 if self._match_text_seq("SORTED", "BY"): 2051 self._match_l_paren() 2052 sorted_by = self._parse_csv(self._parse_ordered) 2053 self._match_r_paren() 2054 else: 2055 sorted_by = None 2056 2057 self._match(TokenType.INTO) 2058 buckets = self._parse_number() 2059 self._match_text_seq("BUCKETS") 2060 2061 return self.expression( 2062 exp.ClusteredByProperty, 2063 expressions=expressions, 2064 sorted_by=sorted_by, 2065 buckets=buckets, 2066 ) 2067 2068 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2069 if not self._match_text_seq("GRANTS"): 2070 self._retreat(self._index - 1) 2071 return None 2072 2073 return self.expression(exp.CopyGrantsProperty) 2074 2075 def _parse_freespace(self) -> exp.FreespaceProperty: 2076 self._match(TokenType.EQ) 2077 return self.expression( 2078 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2079 ) 2080 2081 def _parse_mergeblockratio( 2082 self, no: bool = False, default: bool = False 2083 ) -> exp.MergeBlockRatioProperty: 2084 if self._match(TokenType.EQ): 2085 return self.expression( 2086 exp.MergeBlockRatioProperty, 2087 this=self._parse_number(), 2088 percent=self._match(TokenType.PERCENT), 2089 ) 2090 2091 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2092 2093 def _parse_datablocksize( 2094 self, 2095 default: t.Optional[bool] = None, 2096 minimum: t.Optional[bool] = None, 2097 maximum: t.Optional[bool] = None, 2098 ) -> exp.DataBlocksizeProperty: 2099 self._match(TokenType.EQ) 2100 size = self._parse_number() 2101 2102 units = None 2103 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2104 units = self._prev.text 2105 2106 return self.expression( 2107 exp.DataBlocksizeProperty, 2108 size=size, 2109 units=units, 2110 default=default, 2111 minimum=minimum, 2112 maximum=maximum, 2113 ) 2114 2115 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2116 self._match(TokenType.EQ) 2117 always = self._match_text_seq("ALWAYS") 2118 manual = self._match_text_seq("MANUAL") 2119 never = self._match_text_seq("NEVER") 2120 default = self._match_text_seq("DEFAULT") 2121 2122 autotemp = None 2123 if self._match_text_seq("AUTOTEMP"): 2124 autotemp = self._parse_schema() 2125 2126 return self.expression( 2127 exp.BlockCompressionProperty, 2128 always=always, 2129 manual=manual, 2130 never=never, 2131 default=default, 2132 autotemp=autotemp, 2133 ) 2134 2135 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2136 index = self._index 2137 no = self._match_text_seq("NO") 2138 concurrent = self._match_text_seq("CONCURRENT") 2139 2140 if not self._match_text_seq("ISOLATED", "LOADING"): 2141 self._retreat(index) 2142 return None 2143 2144 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2145 return self.expression( 2146 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2147 ) 2148 2149 def _parse_locking(self) -> exp.LockingProperty: 2150 if self._match(TokenType.TABLE): 2151 kind = "TABLE" 2152 elif self._match(TokenType.VIEW): 2153 kind = "VIEW" 2154 elif self._match(TokenType.ROW): 2155 kind = "ROW" 2156 elif self._match_text_seq("DATABASE"): 2157 kind = "DATABASE" 2158 else: 2159 kind = None 2160 2161 if kind in ("DATABASE", "TABLE", "VIEW"): 2162 this = self._parse_table_parts() 2163 else: 2164 this = None 2165 2166 if self._match(TokenType.FOR): 2167 for_or_in = "FOR" 2168 elif self._match(TokenType.IN): 2169 for_or_in = "IN" 2170 else: 2171 for_or_in = None 2172 2173 if self._match_text_seq("ACCESS"): 2174 lock_type = "ACCESS" 2175 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2176 lock_type = "EXCLUSIVE" 2177 elif self._match_text_seq("SHARE"): 2178 lock_type = "SHARE" 2179 elif self._match_text_seq("READ"): 2180 lock_type = "READ" 2181 elif self._match_text_seq("WRITE"): 2182 lock_type = "WRITE" 2183 elif self._match_text_seq("CHECKSUM"): 2184 lock_type = "CHECKSUM" 2185 else: 2186 lock_type = None 2187 2188 override = self._match_text_seq("OVERRIDE") 2189 2190 return self.expression( 2191 exp.LockingProperty, 2192 this=this, 2193 kind=kind, 2194 for_or_in=for_or_in, 2195 lock_type=lock_type, 2196 override=override, 2197 ) 2198 2199 def _parse_partition_by(self) -> t.List[exp.Expression]: 2200 if self._match(TokenType.PARTITION_BY): 2201 return self._parse_csv(self._parse_assignment) 2202 return [] 2203 2204 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2205 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2206 if self._match_text_seq("MINVALUE"): 2207 return exp.var("MINVALUE") 2208 if self._match_text_seq("MAXVALUE"): 2209 return exp.var("MAXVALUE") 2210 return self._parse_bitwise() 2211 2212 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2213 expression = None 2214 from_expressions = None 2215 to_expressions = None 2216 2217 if self._match(TokenType.IN): 2218 this = self._parse_wrapped_csv(self._parse_bitwise) 2219 elif self._match(TokenType.FROM): 2220 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2221 self._match_text_seq("TO") 2222 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2223 elif self._match_text_seq("WITH", "(", "MODULUS"): 2224 this = self._parse_number() 2225 self._match_text_seq(",", "REMAINDER") 2226 expression = self._parse_number() 2227 self._match_r_paren() 2228 else: 2229 self.raise_error("Failed to parse partition bound spec.") 2230 2231 return self.expression( 2232 exp.PartitionBoundSpec, 2233 this=this, 2234 expression=expression, 2235 from_expressions=from_expressions, 2236 to_expressions=to_expressions, 2237 ) 2238 2239 # https://www.postgresql.org/docs/current/sql-createtable.html 2240 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2241 if not self._match_text_seq("OF"): 2242 self._retreat(self._index - 1) 2243 return None 2244 2245 this = self._parse_table(schema=True) 2246 2247 if self._match(TokenType.DEFAULT): 2248 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2249 elif self._match_text_seq("FOR", "VALUES"): 2250 expression = self._parse_partition_bound_spec() 2251 else: 2252 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2253 2254 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2255 2256 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2257 self._match(TokenType.EQ) 2258 return self.expression( 2259 exp.PartitionedByProperty, 2260 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2261 ) 2262 2263 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2264 if self._match_text_seq("AND", "STATISTICS"): 2265 statistics = True 2266 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2267 statistics = False 2268 else: 2269 statistics = None 2270 2271 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2272 2273 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2274 if self._match_text_seq("SQL"): 2275 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2276 return None 2277 2278 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2279 if self._match_text_seq("SQL", "DATA"): 2280 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2281 return None 2282 2283 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2284 if self._match_text_seq("PRIMARY", "INDEX"): 2285 return exp.NoPrimaryIndexProperty() 2286 if self._match_text_seq("SQL"): 2287 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2288 return None 2289 2290 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2291 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2292 return exp.OnCommitProperty() 2293 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2294 return exp.OnCommitProperty(delete=True) 2295 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2296 2297 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2298 if self._match_text_seq("SQL", "DATA"): 2299 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2300 return None 2301 2302 def _parse_distkey(self) -> exp.DistKeyProperty: 2303 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2304 2305 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2306 table = self._parse_table(schema=True) 2307 2308 options = [] 2309 while self._match_texts(("INCLUDING", "EXCLUDING")): 2310 this = self._prev.text.upper() 2311 2312 id_var = self._parse_id_var() 2313 if not id_var: 2314 return None 2315 2316 options.append( 2317 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2318 ) 2319 2320 return self.expression(exp.LikeProperty, this=table, expressions=options) 2321 2322 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2323 return self.expression( 2324 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2325 ) 2326 2327 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2328 self._match(TokenType.EQ) 2329 return self.expression( 2330 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2331 ) 2332 2333 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2334 self._match_text_seq("WITH", "CONNECTION") 2335 return self.expression( 2336 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2337 ) 2338 2339 def _parse_returns(self) -> exp.ReturnsProperty: 2340 value: t.Optional[exp.Expression] 2341 null = None 2342 is_table = self._match(TokenType.TABLE) 2343 2344 if is_table: 2345 if self._match(TokenType.LT): 2346 value = self.expression( 2347 exp.Schema, 2348 this="TABLE", 2349 expressions=self._parse_csv(self._parse_struct_types), 2350 ) 2351 if not self._match(TokenType.GT): 2352 self.raise_error("Expecting >") 2353 else: 2354 value = self._parse_schema(exp.var("TABLE")) 2355 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2356 null = True 2357 value = None 2358 else: 2359 value = self._parse_types() 2360 2361 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2362 2363 def _parse_describe(self) -> exp.Describe: 2364 kind = self._match_set(self.CREATABLES) and self._prev.text 2365 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2366 if self._match(TokenType.DOT): 2367 style = None 2368 self._retreat(self._index - 2) 2369 this = self._parse_table(schema=True) 2370 properties = self._parse_properties() 2371 expressions = properties.expressions if properties else None 2372 return self.expression( 2373 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2374 ) 2375 2376 def _parse_insert(self) -> exp.Insert: 2377 comments = ensure_list(self._prev_comments) 2378 hint = self._parse_hint() 2379 overwrite = self._match(TokenType.OVERWRITE) 2380 ignore = self._match(TokenType.IGNORE) 2381 local = self._match_text_seq("LOCAL") 2382 alternative = None 2383 is_function = None 2384 2385 if self._match_text_seq("DIRECTORY"): 2386 this: t.Optional[exp.Expression] = self.expression( 2387 exp.Directory, 2388 this=self._parse_var_or_string(), 2389 local=local, 2390 row_format=self._parse_row_format(match_row=True), 2391 ) 2392 else: 2393 if self._match(TokenType.OR): 2394 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2395 2396 self._match(TokenType.INTO) 2397 comments += ensure_list(self._prev_comments) 2398 self._match(TokenType.TABLE) 2399 is_function = self._match(TokenType.FUNCTION) 2400 2401 this = ( 2402 self._parse_table(schema=True, parse_partition=True) 2403 if not is_function 2404 else self._parse_function() 2405 ) 2406 2407 returning = self._parse_returning() 2408 2409 return self.expression( 2410 exp.Insert, 2411 comments=comments, 2412 hint=hint, 2413 is_function=is_function, 2414 this=this, 2415 stored=self._match_text_seq("STORED") and self._parse_stored(), 2416 by_name=self._match_text_seq("BY", "NAME"), 2417 exists=self._parse_exists(), 2418 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2419 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2420 conflict=self._parse_on_conflict(), 2421 returning=returning or self._parse_returning(), 2422 overwrite=overwrite, 2423 alternative=alternative, 2424 ignore=ignore, 2425 ) 2426 2427 def _parse_kill(self) -> exp.Kill: 2428 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2429 2430 return self.expression( 2431 exp.Kill, 2432 this=self._parse_primary(), 2433 kind=kind, 2434 ) 2435 2436 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2437 conflict = self._match_text_seq("ON", "CONFLICT") 2438 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2439 2440 if not conflict and not duplicate: 2441 return None 2442 2443 conflict_keys = None 2444 constraint = None 2445 2446 if conflict: 2447 if self._match_text_seq("ON", "CONSTRAINT"): 2448 constraint = self._parse_id_var() 2449 elif self._match(TokenType.L_PAREN): 2450 conflict_keys = self._parse_csv(self._parse_id_var) 2451 self._match_r_paren() 2452 2453 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2454 if self._prev.token_type == TokenType.UPDATE: 2455 self._match(TokenType.SET) 2456 expressions = self._parse_csv(self._parse_equality) 2457 else: 2458 expressions = None 2459 2460 return self.expression( 2461 exp.OnConflict, 2462 duplicate=duplicate, 2463 expressions=expressions, 2464 action=action, 2465 conflict_keys=conflict_keys, 2466 constraint=constraint, 2467 ) 2468 2469 def _parse_returning(self) -> t.Optional[exp.Returning]: 2470 if not self._match(TokenType.RETURNING): 2471 return None 2472 return self.expression( 2473 exp.Returning, 2474 expressions=self._parse_csv(self._parse_expression), 2475 into=self._match(TokenType.INTO) and self._parse_table_part(), 2476 ) 2477 2478 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2479 if not self._match(TokenType.FORMAT): 2480 return None 2481 return self._parse_row_format() 2482 2483 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2484 index = self._index 2485 with_ = with_ or self._match_text_seq("WITH") 2486 2487 if not self._match(TokenType.SERDE_PROPERTIES): 2488 self._retreat(index) 2489 return None 2490 return self.expression( 2491 exp.SerdeProperties, 2492 **{ # type: ignore 2493 "expressions": self._parse_wrapped_properties(), 2494 "with": with_, 2495 }, 2496 ) 2497 2498 def _parse_row_format( 2499 self, match_row: bool = False 2500 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2501 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2502 return None 2503 2504 if self._match_text_seq("SERDE"): 2505 this = self._parse_string() 2506 2507 serde_properties = self._parse_serde_properties() 2508 2509 return self.expression( 2510 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2511 ) 2512 2513 self._match_text_seq("DELIMITED") 2514 2515 kwargs = {} 2516 2517 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2518 kwargs["fields"] = self._parse_string() 2519 if self._match_text_seq("ESCAPED", "BY"): 2520 kwargs["escaped"] = self._parse_string() 2521 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2522 kwargs["collection_items"] = self._parse_string() 2523 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2524 kwargs["map_keys"] = self._parse_string() 2525 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2526 kwargs["lines"] = self._parse_string() 2527 if self._match_text_seq("NULL", "DEFINED", "AS"): 2528 kwargs["null"] = self._parse_string() 2529 2530 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2531 2532 def _parse_load(self) -> exp.LoadData | exp.Command: 2533 if self._match_text_seq("DATA"): 2534 local = self._match_text_seq("LOCAL") 2535 self._match_text_seq("INPATH") 2536 inpath = self._parse_string() 2537 overwrite = self._match(TokenType.OVERWRITE) 2538 self._match_pair(TokenType.INTO, TokenType.TABLE) 2539 2540 return self.expression( 2541 exp.LoadData, 2542 this=self._parse_table(schema=True), 2543 local=local, 2544 overwrite=overwrite, 2545 inpath=inpath, 2546 partition=self._parse_partition(), 2547 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2548 serde=self._match_text_seq("SERDE") and self._parse_string(), 2549 ) 2550 return self._parse_as_command(self._prev) 2551 2552 def _parse_delete(self) -> exp.Delete: 2553 # This handles MySQL's "Multiple-Table Syntax" 2554 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2555 tables = None 2556 comments = self._prev_comments 2557 if not self._match(TokenType.FROM, advance=False): 2558 tables = self._parse_csv(self._parse_table) or None 2559 2560 returning = self._parse_returning() 2561 2562 return self.expression( 2563 exp.Delete, 2564 comments=comments, 2565 tables=tables, 2566 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2567 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2568 where=self._parse_where(), 2569 returning=returning or self._parse_returning(), 2570 limit=self._parse_limit(), 2571 ) 2572 2573 def _parse_update(self) -> exp.Update: 2574 comments = self._prev_comments 2575 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2576 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2577 returning = self._parse_returning() 2578 return self.expression( 2579 exp.Update, 2580 comments=comments, 2581 **{ # type: ignore 2582 "this": this, 2583 "expressions": expressions, 2584 "from": self._parse_from(joins=True), 2585 "where": self._parse_where(), 2586 "returning": returning or self._parse_returning(), 2587 "order": self._parse_order(), 2588 "limit": self._parse_limit(), 2589 }, 2590 ) 2591 2592 def _parse_uncache(self) -> exp.Uncache: 2593 if not self._match(TokenType.TABLE): 2594 self.raise_error("Expecting TABLE after UNCACHE") 2595 2596 return self.expression( 2597 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2598 ) 2599 2600 def _parse_cache(self) -> exp.Cache: 2601 lazy = self._match_text_seq("LAZY") 2602 self._match(TokenType.TABLE) 2603 table = self._parse_table(schema=True) 2604 2605 options = [] 2606 if self._match_text_seq("OPTIONS"): 2607 self._match_l_paren() 2608 k = self._parse_string() 2609 self._match(TokenType.EQ) 2610 v = self._parse_string() 2611 options = [k, v] 2612 self._match_r_paren() 2613 2614 self._match(TokenType.ALIAS) 2615 return self.expression( 2616 exp.Cache, 2617 this=table, 2618 lazy=lazy, 2619 options=options, 2620 expression=self._parse_select(nested=True), 2621 ) 2622 2623 def _parse_partition(self) -> t.Optional[exp.Partition]: 2624 if not self._match(TokenType.PARTITION): 2625 return None 2626 2627 return self.expression( 2628 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2629 ) 2630 2631 def _parse_value(self) -> t.Optional[exp.Tuple]: 2632 if self._match(TokenType.L_PAREN): 2633 expressions = self._parse_csv(self._parse_expression) 2634 self._match_r_paren() 2635 return self.expression(exp.Tuple, expressions=expressions) 2636 2637 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2638 expression = self._parse_expression() 2639 if expression: 2640 return self.expression(exp.Tuple, expressions=[expression]) 2641 return None 2642 2643 def _parse_projections(self) -> t.List[exp.Expression]: 2644 return self._parse_expressions() 2645 2646 def _parse_select( 2647 self, 2648 nested: bool = False, 2649 table: bool = False, 2650 parse_subquery_alias: bool = True, 2651 parse_set_operation: bool = True, 2652 ) -> t.Optional[exp.Expression]: 2653 cte = self._parse_with() 2654 2655 if cte: 2656 this = self._parse_statement() 2657 2658 if not this: 2659 self.raise_error("Failed to parse any statement following CTE") 2660 return cte 2661 2662 if "with" in this.arg_types: 2663 this.set("with", cte) 2664 else: 2665 self.raise_error(f"{this.key} does not support CTE") 2666 this = cte 2667 2668 return this 2669 2670 # duckdb supports leading with FROM x 2671 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2672 2673 if self._match(TokenType.SELECT): 2674 comments = self._prev_comments 2675 2676 hint = self._parse_hint() 2677 all_ = self._match(TokenType.ALL) 2678 distinct = self._match_set(self.DISTINCT_TOKENS) 2679 2680 kind = ( 2681 self._match(TokenType.ALIAS) 2682 and self._match_texts(("STRUCT", "VALUE")) 2683 and self._prev.text.upper() 2684 ) 2685 2686 if distinct: 2687 distinct = self.expression( 2688 exp.Distinct, 2689 on=self._parse_value() if self._match(TokenType.ON) else None, 2690 ) 2691 2692 if all_ and distinct: 2693 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2694 2695 limit = self._parse_limit(top=True) 2696 projections = self._parse_projections() 2697 2698 this = self.expression( 2699 exp.Select, 2700 kind=kind, 2701 hint=hint, 2702 distinct=distinct, 2703 expressions=projections, 2704 limit=limit, 2705 ) 2706 this.comments = comments 2707 2708 into = self._parse_into() 2709 if into: 2710 this.set("into", into) 2711 2712 if not from_: 2713 from_ = self._parse_from() 2714 2715 if from_: 2716 this.set("from", from_) 2717 2718 this = self._parse_query_modifiers(this) 2719 elif (table or nested) and self._match(TokenType.L_PAREN): 2720 if self._match(TokenType.PIVOT): 2721 this = self._parse_simplified_pivot() 2722 elif self._match(TokenType.FROM): 2723 this = exp.select("*").from_( 2724 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2725 ) 2726 else: 2727 this = ( 2728 self._parse_table() 2729 if table 2730 else self._parse_select(nested=True, parse_set_operation=False) 2731 ) 2732 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2733 2734 self._match_r_paren() 2735 2736 # We return early here so that the UNION isn't attached to the subquery by the 2737 # following call to _parse_set_operations, but instead becomes the parent node 2738 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2739 elif self._match(TokenType.VALUES, advance=False): 2740 this = self._parse_derived_table_values() 2741 elif from_: 2742 this = exp.select("*").from_(from_.this, copy=False) 2743 else: 2744 this = None 2745 2746 if parse_set_operation: 2747 return self._parse_set_operations(this) 2748 return this 2749 2750 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2751 if not skip_with_token and not self._match(TokenType.WITH): 2752 return None 2753 2754 comments = self._prev_comments 2755 recursive = self._match(TokenType.RECURSIVE) 2756 2757 expressions = [] 2758 while True: 2759 expressions.append(self._parse_cte()) 2760 2761 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2762 break 2763 else: 2764 self._match(TokenType.WITH) 2765 2766 return self.expression( 2767 exp.With, comments=comments, expressions=expressions, recursive=recursive 2768 ) 2769 2770 def _parse_cte(self) -> exp.CTE: 2771 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2772 if not alias or not alias.this: 2773 self.raise_error("Expected CTE to have alias") 2774 2775 self._match(TokenType.ALIAS) 2776 2777 if self._match_text_seq("NOT", "MATERIALIZED"): 2778 materialized = False 2779 elif self._match_text_seq("MATERIALIZED"): 2780 materialized = True 2781 else: 2782 materialized = None 2783 2784 return self.expression( 2785 exp.CTE, 2786 this=self._parse_wrapped(self._parse_statement), 2787 alias=alias, 2788 materialized=materialized, 2789 ) 2790 2791 def _parse_table_alias( 2792 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2793 ) -> t.Optional[exp.TableAlias]: 2794 any_token = self._match(TokenType.ALIAS) 2795 alias = ( 2796 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2797 or self._parse_string_as_identifier() 2798 ) 2799 2800 index = self._index 2801 if self._match(TokenType.L_PAREN): 2802 columns = self._parse_csv(self._parse_function_parameter) 2803 self._match_r_paren() if columns else self._retreat(index) 2804 else: 2805 columns = None 2806 2807 if not alias and not columns: 2808 return None 2809 2810 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2811 2812 # We bubble up comments from the Identifier to the TableAlias 2813 if isinstance(alias, exp.Identifier): 2814 table_alias.add_comments(alias.pop_comments()) 2815 2816 return table_alias 2817 2818 def _parse_subquery( 2819 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2820 ) -> t.Optional[exp.Subquery]: 2821 if not this: 2822 return None 2823 2824 return self.expression( 2825 exp.Subquery, 2826 this=this, 2827 pivots=self._parse_pivots(), 2828 alias=self._parse_table_alias() if parse_alias else None, 2829 ) 2830 2831 def _implicit_unnests_to_explicit(self, this: E) -> E: 2832 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2833 2834 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2835 for i, join in enumerate(this.args.get("joins") or []): 2836 table = join.this 2837 normalized_table = table.copy() 2838 normalized_table.meta["maybe_column"] = True 2839 normalized_table = _norm(normalized_table, dialect=self.dialect) 2840 2841 if isinstance(table, exp.Table) and not join.args.get("on"): 2842 if normalized_table.parts[0].name in refs: 2843 table_as_column = table.to_column() 2844 unnest = exp.Unnest(expressions=[table_as_column]) 2845 2846 # Table.to_column creates a parent Alias node that we want to convert to 2847 # a TableAlias and attach to the Unnest, so it matches the parser's output 2848 if isinstance(table.args.get("alias"), exp.TableAlias): 2849 table_as_column.replace(table_as_column.this) 2850 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2851 2852 table.replace(unnest) 2853 2854 refs.add(normalized_table.alias_or_name) 2855 2856 return this 2857 2858 def _parse_query_modifiers( 2859 self, this: t.Optional[exp.Expression] 2860 ) -> t.Optional[exp.Expression]: 2861 if isinstance(this, (exp.Query, exp.Table)): 2862 for join in self._parse_joins(): 2863 this.append("joins", join) 2864 for lateral in iter(self._parse_lateral, None): 2865 this.append("laterals", lateral) 2866 2867 while True: 2868 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2869 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2870 key, expression = parser(self) 2871 2872 if expression: 2873 this.set(key, expression) 2874 if key == "limit": 2875 offset = expression.args.pop("offset", None) 2876 2877 if offset: 2878 offset = exp.Offset(expression=offset) 2879 this.set("offset", offset) 2880 2881 limit_by_expressions = expression.expressions 2882 expression.set("expressions", None) 2883 offset.set("expressions", limit_by_expressions) 2884 continue 2885 break 2886 2887 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2888 this = self._implicit_unnests_to_explicit(this) 2889 2890 return this 2891 2892 def _parse_hint(self) -> t.Optional[exp.Hint]: 2893 if self._match(TokenType.HINT): 2894 hints = [] 2895 for hint in iter( 2896 lambda: self._parse_csv( 2897 lambda: self._parse_function() or self._parse_var(upper=True) 2898 ), 2899 [], 2900 ): 2901 hints.extend(hint) 2902 2903 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2904 self.raise_error("Expected */ after HINT") 2905 2906 return self.expression(exp.Hint, expressions=hints) 2907 2908 return None 2909 2910 def _parse_into(self) -> t.Optional[exp.Into]: 2911 if not self._match(TokenType.INTO): 2912 return None 2913 2914 temp = self._match(TokenType.TEMPORARY) 2915 unlogged = self._match_text_seq("UNLOGGED") 2916 self._match(TokenType.TABLE) 2917 2918 return self.expression( 2919 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2920 ) 2921 2922 def _parse_from( 2923 self, joins: bool = False, skip_from_token: bool = False 2924 ) -> t.Optional[exp.From]: 2925 if not skip_from_token and not self._match(TokenType.FROM): 2926 return None 2927 2928 return self.expression( 2929 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2930 ) 2931 2932 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2933 return self.expression( 2934 exp.MatchRecognizeMeasure, 2935 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2936 this=self._parse_expression(), 2937 ) 2938 2939 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2940 if not self._match(TokenType.MATCH_RECOGNIZE): 2941 return None 2942 2943 self._match_l_paren() 2944 2945 partition = self._parse_partition_by() 2946 order = self._parse_order() 2947 2948 measures = ( 2949 self._parse_csv(self._parse_match_recognize_measure) 2950 if self._match_text_seq("MEASURES") 2951 else None 2952 ) 2953 2954 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2955 rows = exp.var("ONE ROW PER MATCH") 2956 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2957 text = "ALL ROWS PER MATCH" 2958 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2959 text += " SHOW EMPTY MATCHES" 2960 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2961 text += " OMIT EMPTY MATCHES" 2962 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2963 text += " WITH UNMATCHED ROWS" 2964 rows = exp.var(text) 2965 else: 2966 rows = None 2967 2968 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2969 text = "AFTER MATCH SKIP" 2970 if self._match_text_seq("PAST", "LAST", "ROW"): 2971 text += " PAST LAST ROW" 2972 elif self._match_text_seq("TO", "NEXT", "ROW"): 2973 text += " TO NEXT ROW" 2974 elif self._match_text_seq("TO", "FIRST"): 2975 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2976 elif self._match_text_seq("TO", "LAST"): 2977 text += f" TO LAST {self._advance_any().text}" # type: ignore 2978 after = exp.var(text) 2979 else: 2980 after = None 2981 2982 if self._match_text_seq("PATTERN"): 2983 self._match_l_paren() 2984 2985 if not self._curr: 2986 self.raise_error("Expecting )", self._curr) 2987 2988 paren = 1 2989 start = self._curr 2990 2991 while self._curr and paren > 0: 2992 if self._curr.token_type == TokenType.L_PAREN: 2993 paren += 1 2994 if self._curr.token_type == TokenType.R_PAREN: 2995 paren -= 1 2996 2997 end = self._prev 2998 self._advance() 2999 3000 if paren > 0: 3001 self.raise_error("Expecting )", self._curr) 3002 3003 pattern = exp.var(self._find_sql(start, end)) 3004 else: 3005 pattern = None 3006 3007 define = ( 3008 self._parse_csv(self._parse_name_as_expression) 3009 if self._match_text_seq("DEFINE") 3010 else None 3011 ) 3012 3013 self._match_r_paren() 3014 3015 return self.expression( 3016 exp.MatchRecognize, 3017 partition_by=partition, 3018 order=order, 3019 measures=measures, 3020 rows=rows, 3021 after=after, 3022 pattern=pattern, 3023 define=define, 3024 alias=self._parse_table_alias(), 3025 ) 3026 3027 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3028 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3029 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3030 cross_apply = False 3031 3032 if cross_apply is not None: 3033 this = self._parse_select(table=True) 3034 view = None 3035 outer = None 3036 elif self._match(TokenType.LATERAL): 3037 this = self._parse_select(table=True) 3038 view = self._match(TokenType.VIEW) 3039 outer = self._match(TokenType.OUTER) 3040 else: 3041 return None 3042 3043 if not this: 3044 this = ( 3045 self._parse_unnest() 3046 or self._parse_function() 3047 or self._parse_id_var(any_token=False) 3048 ) 3049 3050 while self._match(TokenType.DOT): 3051 this = exp.Dot( 3052 this=this, 3053 expression=self._parse_function() or self._parse_id_var(any_token=False), 3054 ) 3055 3056 if view: 3057 table = self._parse_id_var(any_token=False) 3058 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3059 table_alias: t.Optional[exp.TableAlias] = self.expression( 3060 exp.TableAlias, this=table, columns=columns 3061 ) 3062 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3063 # We move the alias from the lateral's child node to the lateral itself 3064 table_alias = this.args["alias"].pop() 3065 else: 3066 table_alias = self._parse_table_alias() 3067 3068 return self.expression( 3069 exp.Lateral, 3070 this=this, 3071 view=view, 3072 outer=outer, 3073 alias=table_alias, 3074 cross_apply=cross_apply, 3075 ) 3076 3077 def _parse_join_parts( 3078 self, 3079 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3080 return ( 3081 self._match_set(self.JOIN_METHODS) and self._prev, 3082 self._match_set(self.JOIN_SIDES) and self._prev, 3083 self._match_set(self.JOIN_KINDS) and self._prev, 3084 ) 3085 3086 def _parse_join( 3087 self, skip_join_token: bool = False, parse_bracket: bool = False 3088 ) -> t.Optional[exp.Join]: 3089 if self._match(TokenType.COMMA): 3090 return self.expression(exp.Join, this=self._parse_table()) 3091 3092 index = self._index 3093 method, side, kind = self._parse_join_parts() 3094 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3095 join = self._match(TokenType.JOIN) 3096 3097 if not skip_join_token and not join: 3098 self._retreat(index) 3099 kind = None 3100 method = None 3101 side = None 3102 3103 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3104 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3105 3106 if not skip_join_token and not join and not outer_apply and not cross_apply: 3107 return None 3108 3109 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3110 3111 if method: 3112 kwargs["method"] = method.text 3113 if side: 3114 kwargs["side"] = side.text 3115 if kind: 3116 kwargs["kind"] = kind.text 3117 if hint: 3118 kwargs["hint"] = hint 3119 3120 if self._match(TokenType.MATCH_CONDITION): 3121 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3122 3123 if self._match(TokenType.ON): 3124 kwargs["on"] = self._parse_assignment() 3125 elif self._match(TokenType.USING): 3126 kwargs["using"] = self._parse_wrapped_id_vars() 3127 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3128 kind and kind.token_type == TokenType.CROSS 3129 ): 3130 index = self._index 3131 joins: t.Optional[list] = list(self._parse_joins()) 3132 3133 if joins and self._match(TokenType.ON): 3134 kwargs["on"] = self._parse_assignment() 3135 elif joins and self._match(TokenType.USING): 3136 kwargs["using"] = self._parse_wrapped_id_vars() 3137 else: 3138 joins = None 3139 self._retreat(index) 3140 3141 kwargs["this"].set("joins", joins if joins else None) 3142 3143 comments = [c for token in (method, side, kind) if token for c in token.comments] 3144 return self.expression(exp.Join, comments=comments, **kwargs) 3145 3146 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3147 this = self._parse_assignment() 3148 3149 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3150 return this 3151 3152 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3153 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3154 3155 return this 3156 3157 def _parse_index_params(self) -> exp.IndexParameters: 3158 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3159 3160 if self._match(TokenType.L_PAREN, advance=False): 3161 columns = self._parse_wrapped_csv(self._parse_with_operator) 3162 else: 3163 columns = None 3164 3165 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3166 partition_by = self._parse_partition_by() 3167 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3168 tablespace = ( 3169 self._parse_var(any_token=True) 3170 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3171 else None 3172 ) 3173 where = self._parse_where() 3174 3175 return self.expression( 3176 exp.IndexParameters, 3177 using=using, 3178 columns=columns, 3179 include=include, 3180 partition_by=partition_by, 3181 where=where, 3182 with_storage=with_storage, 3183 tablespace=tablespace, 3184 ) 3185 3186 def _parse_index( 3187 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3188 ) -> t.Optional[exp.Index]: 3189 if index or anonymous: 3190 unique = None 3191 primary = None 3192 amp = None 3193 3194 self._match(TokenType.ON) 3195 self._match(TokenType.TABLE) # hive 3196 table = self._parse_table_parts(schema=True) 3197 else: 3198 unique = self._match(TokenType.UNIQUE) 3199 primary = self._match_text_seq("PRIMARY") 3200 amp = self._match_text_seq("AMP") 3201 3202 if not self._match(TokenType.INDEX): 3203 return None 3204 3205 index = self._parse_id_var() 3206 table = None 3207 3208 params = self._parse_index_params() 3209 3210 return self.expression( 3211 exp.Index, 3212 this=index, 3213 table=table, 3214 unique=unique, 3215 primary=primary, 3216 amp=amp, 3217 params=params, 3218 ) 3219 3220 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3221 hints: t.List[exp.Expression] = [] 3222 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3223 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3224 hints.append( 3225 self.expression( 3226 exp.WithTableHint, 3227 expressions=self._parse_csv( 3228 lambda: self._parse_function() or self._parse_var(any_token=True) 3229 ), 3230 ) 3231 ) 3232 self._match_r_paren() 3233 else: 3234 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3235 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3236 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3237 3238 self._match_texts(("INDEX", "KEY")) 3239 if self._match(TokenType.FOR): 3240 hint.set("target", self._advance_any() and self._prev.text.upper()) 3241 3242 hint.set("expressions", self._parse_wrapped_id_vars()) 3243 hints.append(hint) 3244 3245 return hints or None 3246 3247 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3248 return ( 3249 (not schema and self._parse_function(optional_parens=False)) 3250 or self._parse_id_var(any_token=False) 3251 or self._parse_string_as_identifier() 3252 or self._parse_placeholder() 3253 ) 3254 3255 def _parse_table_parts( 3256 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3257 ) -> exp.Table: 3258 catalog = None 3259 db = None 3260 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3261 3262 while self._match(TokenType.DOT): 3263 if catalog: 3264 # This allows nesting the table in arbitrarily many dot expressions if needed 3265 table = self.expression( 3266 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3267 ) 3268 else: 3269 catalog = db 3270 db = table 3271 # "" used for tsql FROM a..b case 3272 table = self._parse_table_part(schema=schema) or "" 3273 3274 if ( 3275 wildcard 3276 and self._is_connected() 3277 and (isinstance(table, exp.Identifier) or not table) 3278 and self._match(TokenType.STAR) 3279 ): 3280 if isinstance(table, exp.Identifier): 3281 table.args["this"] += "*" 3282 else: 3283 table = exp.Identifier(this="*") 3284 3285 # We bubble up comments from the Identifier to the Table 3286 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3287 3288 if is_db_reference: 3289 catalog = db 3290 db = table 3291 table = None 3292 3293 if not table and not is_db_reference: 3294 self.raise_error(f"Expected table name but got {self._curr}") 3295 if not db and is_db_reference: 3296 self.raise_error(f"Expected database name but got {self._curr}") 3297 3298 return self.expression( 3299 exp.Table, 3300 comments=comments, 3301 this=table, 3302 db=db, 3303 catalog=catalog, 3304 pivots=self._parse_pivots(), 3305 ) 3306 3307 def _parse_table( 3308 self, 3309 schema: bool = False, 3310 joins: bool = False, 3311 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3312 parse_bracket: bool = False, 3313 is_db_reference: bool = False, 3314 parse_partition: bool = False, 3315 ) -> t.Optional[exp.Expression]: 3316 lateral = self._parse_lateral() 3317 if lateral: 3318 return lateral 3319 3320 unnest = self._parse_unnest() 3321 if unnest: 3322 return unnest 3323 3324 values = self._parse_derived_table_values() 3325 if values: 3326 return values 3327 3328 subquery = self._parse_select(table=True) 3329 if subquery: 3330 if not subquery.args.get("pivots"): 3331 subquery.set("pivots", self._parse_pivots()) 3332 return subquery 3333 3334 bracket = parse_bracket and self._parse_bracket(None) 3335 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3336 3337 only = self._match(TokenType.ONLY) 3338 3339 this = t.cast( 3340 exp.Expression, 3341 bracket 3342 or self._parse_bracket( 3343 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3344 ), 3345 ) 3346 3347 if only: 3348 this.set("only", only) 3349 3350 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3351 self._match_text_seq("*") 3352 3353 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3354 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3355 this.set("partition", self._parse_partition()) 3356 3357 if schema: 3358 return self._parse_schema(this=this) 3359 3360 version = self._parse_version() 3361 3362 if version: 3363 this.set("version", version) 3364 3365 if self.dialect.ALIAS_POST_TABLESAMPLE: 3366 table_sample = self._parse_table_sample() 3367 3368 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3369 if alias: 3370 this.set("alias", alias) 3371 3372 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3373 return self.expression( 3374 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3375 ) 3376 3377 this.set("hints", self._parse_table_hints()) 3378 3379 if not this.args.get("pivots"): 3380 this.set("pivots", self._parse_pivots()) 3381 3382 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3383 table_sample = self._parse_table_sample() 3384 3385 if table_sample: 3386 table_sample.set("this", this) 3387 this = table_sample 3388 3389 if joins: 3390 for join in self._parse_joins(): 3391 this.append("joins", join) 3392 3393 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3394 this.set("ordinality", True) 3395 this.set("alias", self._parse_table_alias()) 3396 3397 return this 3398 3399 def _parse_version(self) -> t.Optional[exp.Version]: 3400 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3401 this = "TIMESTAMP" 3402 elif self._match(TokenType.VERSION_SNAPSHOT): 3403 this = "VERSION" 3404 else: 3405 return None 3406 3407 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3408 kind = self._prev.text.upper() 3409 start = self._parse_bitwise() 3410 self._match_texts(("TO", "AND")) 3411 end = self._parse_bitwise() 3412 expression: t.Optional[exp.Expression] = self.expression( 3413 exp.Tuple, expressions=[start, end] 3414 ) 3415 elif self._match_text_seq("CONTAINED", "IN"): 3416 kind = "CONTAINED IN" 3417 expression = self.expression( 3418 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3419 ) 3420 elif self._match(TokenType.ALL): 3421 kind = "ALL" 3422 expression = None 3423 else: 3424 self._match_text_seq("AS", "OF") 3425 kind = "AS OF" 3426 expression = self._parse_type() 3427 3428 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3429 3430 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3431 if not self._match(TokenType.UNNEST): 3432 return None 3433 3434 expressions = self._parse_wrapped_csv(self._parse_equality) 3435 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3436 3437 alias = self._parse_table_alias() if with_alias else None 3438 3439 if alias: 3440 if self.dialect.UNNEST_COLUMN_ONLY: 3441 if alias.args.get("columns"): 3442 self.raise_error("Unexpected extra column alias in unnest.") 3443 3444 alias.set("columns", [alias.this]) 3445 alias.set("this", None) 3446 3447 columns = alias.args.get("columns") or [] 3448 if offset and len(expressions) < len(columns): 3449 offset = columns.pop() 3450 3451 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3452 self._match(TokenType.ALIAS) 3453 offset = self._parse_id_var( 3454 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3455 ) or exp.to_identifier("offset") 3456 3457 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3458 3459 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3460 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3461 if not is_derived and not self._match_text_seq("VALUES"): 3462 return None 3463 3464 expressions = self._parse_csv(self._parse_value) 3465 alias = self._parse_table_alias() 3466 3467 if is_derived: 3468 self._match_r_paren() 3469 3470 return self.expression( 3471 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3472 ) 3473 3474 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3475 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3476 as_modifier and self._match_text_seq("USING", "SAMPLE") 3477 ): 3478 return None 3479 3480 bucket_numerator = None 3481 bucket_denominator = None 3482 bucket_field = None 3483 percent = None 3484 size = None 3485 seed = None 3486 3487 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3488 matched_l_paren = self._match(TokenType.L_PAREN) 3489 3490 if self.TABLESAMPLE_CSV: 3491 num = None 3492 expressions = self._parse_csv(self._parse_primary) 3493 else: 3494 expressions = None 3495 num = ( 3496 self._parse_factor() 3497 if self._match(TokenType.NUMBER, advance=False) 3498 else self._parse_primary() or self._parse_placeholder() 3499 ) 3500 3501 if self._match_text_seq("BUCKET"): 3502 bucket_numerator = self._parse_number() 3503 self._match_text_seq("OUT", "OF") 3504 bucket_denominator = bucket_denominator = self._parse_number() 3505 self._match(TokenType.ON) 3506 bucket_field = self._parse_field() 3507 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3508 percent = num 3509 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3510 size = num 3511 else: 3512 percent = num 3513 3514 if matched_l_paren: 3515 self._match_r_paren() 3516 3517 if self._match(TokenType.L_PAREN): 3518 method = self._parse_var(upper=True) 3519 seed = self._match(TokenType.COMMA) and self._parse_number() 3520 self._match_r_paren() 3521 elif self._match_texts(("SEED", "REPEATABLE")): 3522 seed = self._parse_wrapped(self._parse_number) 3523 3524 if not method and self.DEFAULT_SAMPLING_METHOD: 3525 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3526 3527 return self.expression( 3528 exp.TableSample, 3529 expressions=expressions, 3530 method=method, 3531 bucket_numerator=bucket_numerator, 3532 bucket_denominator=bucket_denominator, 3533 bucket_field=bucket_field, 3534 percent=percent, 3535 size=size, 3536 seed=seed, 3537 ) 3538 3539 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3540 return list(iter(self._parse_pivot, None)) or None 3541 3542 def _parse_joins(self) -> t.Iterator[exp.Join]: 3543 return iter(self._parse_join, None) 3544 3545 # https://duckdb.org/docs/sql/statements/pivot 3546 def _parse_simplified_pivot(self) -> exp.Pivot: 3547 def _parse_on() -> t.Optional[exp.Expression]: 3548 this = self._parse_bitwise() 3549 return self._parse_in(this) if self._match(TokenType.IN) else this 3550 3551 this = self._parse_table() 3552 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3553 using = self._match(TokenType.USING) and self._parse_csv( 3554 lambda: self._parse_alias(self._parse_function()) 3555 ) 3556 group = self._parse_group() 3557 return self.expression( 3558 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3559 ) 3560 3561 def _parse_pivot_in(self) -> exp.In: 3562 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3563 this = self._parse_assignment() 3564 3565 self._match(TokenType.ALIAS) 3566 alias = self._parse_field() 3567 if alias: 3568 return self.expression(exp.PivotAlias, this=this, alias=alias) 3569 3570 return this 3571 3572 value = self._parse_column() 3573 3574 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3575 self.raise_error("Expecting IN (") 3576 3577 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3578 3579 self._match_r_paren() 3580 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3581 3582 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3583 index = self._index 3584 include_nulls = None 3585 3586 if self._match(TokenType.PIVOT): 3587 unpivot = False 3588 elif self._match(TokenType.UNPIVOT): 3589 unpivot = True 3590 3591 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3592 if self._match_text_seq("INCLUDE", "NULLS"): 3593 include_nulls = True 3594 elif self._match_text_seq("EXCLUDE", "NULLS"): 3595 include_nulls = False 3596 else: 3597 return None 3598 3599 expressions = [] 3600 3601 if not self._match(TokenType.L_PAREN): 3602 self._retreat(index) 3603 return None 3604 3605 if unpivot: 3606 expressions = self._parse_csv(self._parse_column) 3607 else: 3608 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3609 3610 if not expressions: 3611 self.raise_error("Failed to parse PIVOT's aggregation list") 3612 3613 if not self._match(TokenType.FOR): 3614 self.raise_error("Expecting FOR") 3615 3616 field = self._parse_pivot_in() 3617 3618 self._match_r_paren() 3619 3620 pivot = self.expression( 3621 exp.Pivot, 3622 expressions=expressions, 3623 field=field, 3624 unpivot=unpivot, 3625 include_nulls=include_nulls, 3626 ) 3627 3628 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3629 pivot.set("alias", self._parse_table_alias()) 3630 3631 if not unpivot: 3632 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3633 3634 columns: t.List[exp.Expression] = [] 3635 for fld in pivot.args["field"].expressions: 3636 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3637 for name in names: 3638 if self.PREFIXED_PIVOT_COLUMNS: 3639 name = f"{name}_{field_name}" if name else field_name 3640 else: 3641 name = f"{field_name}_{name}" if name else field_name 3642 3643 columns.append(exp.to_identifier(name)) 3644 3645 pivot.set("columns", columns) 3646 3647 return pivot 3648 3649 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3650 return [agg.alias for agg in aggregations] 3651 3652 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3653 if not skip_where_token and not self._match(TokenType.PREWHERE): 3654 return None 3655 3656 return self.expression( 3657 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3658 ) 3659 3660 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3661 if not skip_where_token and not self._match(TokenType.WHERE): 3662 return None 3663 3664 return self.expression( 3665 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3666 ) 3667 3668 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3669 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3670 return None 3671 3672 elements: t.Dict[str, t.Any] = defaultdict(list) 3673 3674 if self._match(TokenType.ALL): 3675 elements["all"] = True 3676 elif self._match(TokenType.DISTINCT): 3677 elements["all"] = False 3678 3679 while True: 3680 expressions = self._parse_csv( 3681 lambda: None 3682 if self._match(TokenType.ROLLUP, advance=False) 3683 else self._parse_assignment() 3684 ) 3685 if expressions: 3686 elements["expressions"].extend(expressions) 3687 3688 grouping_sets = self._parse_grouping_sets() 3689 if grouping_sets: 3690 elements["grouping_sets"].extend(grouping_sets) 3691 3692 rollup = None 3693 cube = None 3694 totals = None 3695 3696 index = self._index 3697 with_ = self._match(TokenType.WITH) 3698 if self._match(TokenType.ROLLUP): 3699 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3700 elements["rollup"].extend(ensure_list(rollup)) 3701 3702 if self._match(TokenType.CUBE): 3703 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3704 elements["cube"].extend(ensure_list(cube)) 3705 3706 if self._match_text_seq("TOTALS"): 3707 totals = True 3708 elements["totals"] = True # type: ignore 3709 3710 if not (grouping_sets or rollup or cube or totals): 3711 if with_: 3712 self._retreat(index) 3713 break 3714 3715 return self.expression(exp.Group, **elements) # type: ignore 3716 3717 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3718 if not self._match(TokenType.GROUPING_SETS): 3719 return None 3720 3721 return self._parse_wrapped_csv(self._parse_grouping_set) 3722 3723 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3724 if self._match(TokenType.L_PAREN): 3725 grouping_set = self._parse_csv(self._parse_column) 3726 self._match_r_paren() 3727 return self.expression(exp.Tuple, expressions=grouping_set) 3728 3729 return self._parse_column() 3730 3731 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3732 if not skip_having_token and not self._match(TokenType.HAVING): 3733 return None 3734 return self.expression(exp.Having, this=self._parse_assignment()) 3735 3736 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3737 if not self._match(TokenType.QUALIFY): 3738 return None 3739 return self.expression(exp.Qualify, this=self._parse_assignment()) 3740 3741 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3742 if skip_start_token: 3743 start = None 3744 elif self._match(TokenType.START_WITH): 3745 start = self._parse_assignment() 3746 else: 3747 return None 3748 3749 self._match(TokenType.CONNECT_BY) 3750 nocycle = self._match_text_seq("NOCYCLE") 3751 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3752 exp.Prior, this=self._parse_bitwise() 3753 ) 3754 connect = self._parse_assignment() 3755 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3756 3757 if not start and self._match(TokenType.START_WITH): 3758 start = self._parse_assignment() 3759 3760 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3761 3762 def _parse_name_as_expression(self) -> exp.Alias: 3763 return self.expression( 3764 exp.Alias, 3765 alias=self._parse_id_var(any_token=True), 3766 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3767 ) 3768 3769 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3770 if self._match_text_seq("INTERPOLATE"): 3771 return self._parse_wrapped_csv(self._parse_name_as_expression) 3772 return None 3773 3774 def _parse_order( 3775 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3776 ) -> t.Optional[exp.Expression]: 3777 siblings = None 3778 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3779 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3780 return this 3781 3782 siblings = True 3783 3784 return self.expression( 3785 exp.Order, 3786 this=this, 3787 expressions=self._parse_csv(self._parse_ordered), 3788 interpolate=self._parse_interpolate(), 3789 siblings=siblings, 3790 ) 3791 3792 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3793 if not self._match(token): 3794 return None 3795 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3796 3797 def _parse_ordered( 3798 self, parse_method: t.Optional[t.Callable] = None 3799 ) -> t.Optional[exp.Ordered]: 3800 this = parse_method() if parse_method else self._parse_assignment() 3801 if not this: 3802 return None 3803 3804 asc = self._match(TokenType.ASC) 3805 desc = self._match(TokenType.DESC) or (asc and False) 3806 3807 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3808 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3809 3810 nulls_first = is_nulls_first or False 3811 explicitly_null_ordered = is_nulls_first or is_nulls_last 3812 3813 if ( 3814 not explicitly_null_ordered 3815 and ( 3816 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3817 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3818 ) 3819 and self.dialect.NULL_ORDERING != "nulls_are_last" 3820 ): 3821 nulls_first = True 3822 3823 if self._match_text_seq("WITH", "FILL"): 3824 with_fill = self.expression( 3825 exp.WithFill, 3826 **{ # type: ignore 3827 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3828 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3829 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3830 }, 3831 ) 3832 else: 3833 with_fill = None 3834 3835 return self.expression( 3836 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3837 ) 3838 3839 def _parse_limit( 3840 self, 3841 this: t.Optional[exp.Expression] = None, 3842 top: bool = False, 3843 skip_limit_token: bool = False, 3844 ) -> t.Optional[exp.Expression]: 3845 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3846 comments = self._prev_comments 3847 if top: 3848 limit_paren = self._match(TokenType.L_PAREN) 3849 expression = self._parse_term() if limit_paren else self._parse_number() 3850 3851 if limit_paren: 3852 self._match_r_paren() 3853 else: 3854 expression = self._parse_term() 3855 3856 if self._match(TokenType.COMMA): 3857 offset = expression 3858 expression = self._parse_term() 3859 else: 3860 offset = None 3861 3862 limit_exp = self.expression( 3863 exp.Limit, 3864 this=this, 3865 expression=expression, 3866 offset=offset, 3867 comments=comments, 3868 expressions=self._parse_limit_by(), 3869 ) 3870 3871 return limit_exp 3872 3873 if self._match(TokenType.FETCH): 3874 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3875 direction = self._prev.text.upper() if direction else "FIRST" 3876 3877 count = self._parse_field(tokens=self.FETCH_TOKENS) 3878 percent = self._match(TokenType.PERCENT) 3879 3880 self._match_set((TokenType.ROW, TokenType.ROWS)) 3881 3882 only = self._match_text_seq("ONLY") 3883 with_ties = self._match_text_seq("WITH", "TIES") 3884 3885 if only and with_ties: 3886 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3887 3888 return self.expression( 3889 exp.Fetch, 3890 direction=direction, 3891 count=count, 3892 percent=percent, 3893 with_ties=with_ties, 3894 ) 3895 3896 return this 3897 3898 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3899 if not self._match(TokenType.OFFSET): 3900 return this 3901 3902 count = self._parse_term() 3903 self._match_set((TokenType.ROW, TokenType.ROWS)) 3904 3905 return self.expression( 3906 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3907 ) 3908 3909 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3910 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3911 3912 def _parse_locks(self) -> t.List[exp.Lock]: 3913 locks = [] 3914 while True: 3915 if self._match_text_seq("FOR", "UPDATE"): 3916 update = True 3917 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3918 "LOCK", "IN", "SHARE", "MODE" 3919 ): 3920 update = False 3921 else: 3922 break 3923 3924 expressions = None 3925 if self._match_text_seq("OF"): 3926 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3927 3928 wait: t.Optional[bool | exp.Expression] = None 3929 if self._match_text_seq("NOWAIT"): 3930 wait = True 3931 elif self._match_text_seq("WAIT"): 3932 wait = self._parse_primary() 3933 elif self._match_text_seq("SKIP", "LOCKED"): 3934 wait = False 3935 3936 locks.append( 3937 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3938 ) 3939 3940 return locks 3941 3942 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3943 while this and self._match_set(self.SET_OPERATIONS): 3944 token_type = self._prev.token_type 3945 3946 if token_type == TokenType.UNION: 3947 operation = exp.Union 3948 elif token_type == TokenType.EXCEPT: 3949 operation = exp.Except 3950 else: 3951 operation = exp.Intersect 3952 3953 comments = self._prev.comments 3954 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3955 by_name = self._match_text_seq("BY", "NAME") 3956 expression = self._parse_select(nested=True, parse_set_operation=False) 3957 3958 this = self.expression( 3959 operation, 3960 comments=comments, 3961 this=this, 3962 distinct=distinct, 3963 by_name=by_name, 3964 expression=expression, 3965 ) 3966 3967 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3968 expression = this.expression 3969 3970 if expression: 3971 for arg in self.UNION_MODIFIERS: 3972 expr = expression.args.get(arg) 3973 if expr: 3974 this.set(arg, expr.pop()) 3975 3976 return this 3977 3978 def _parse_expression(self) -> t.Optional[exp.Expression]: 3979 return self._parse_alias(self._parse_assignment()) 3980 3981 def _parse_assignment(self) -> t.Optional[exp.Expression]: 3982 this = self._parse_disjunction() 3983 3984 while self._match_set(self.ASSIGNMENT): 3985 this = self.expression( 3986 self.ASSIGNMENT[self._prev.token_type], 3987 this=this, 3988 comments=self._prev_comments, 3989 expression=self._parse_assignment(), 3990 ) 3991 3992 return this 3993 3994 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 3995 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 3996 3997 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3998 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3999 4000 def _parse_equality(self) -> t.Optional[exp.Expression]: 4001 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4002 4003 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4004 return self._parse_tokens(self._parse_range, self.COMPARISON) 4005 4006 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4007 this = this or self._parse_bitwise() 4008 negate = self._match(TokenType.NOT) 4009 4010 if self._match_set(self.RANGE_PARSERS): 4011 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4012 if not expression: 4013 return this 4014 4015 this = expression 4016 elif self._match(TokenType.ISNULL): 4017 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4018 4019 # Postgres supports ISNULL and NOTNULL for conditions. 4020 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4021 if self._match(TokenType.NOTNULL): 4022 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4023 this = self.expression(exp.Not, this=this) 4024 4025 if negate: 4026 this = self.expression(exp.Not, this=this) 4027 4028 if self._match(TokenType.IS): 4029 this = self._parse_is(this) 4030 4031 return this 4032 4033 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4034 index = self._index - 1 4035 negate = self._match(TokenType.NOT) 4036 4037 if self._match_text_seq("DISTINCT", "FROM"): 4038 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4039 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4040 4041 expression = self._parse_null() or self._parse_boolean() 4042 if not expression: 4043 self._retreat(index) 4044 return None 4045 4046 this = self.expression(exp.Is, this=this, expression=expression) 4047 return self.expression(exp.Not, this=this) if negate else this 4048 4049 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4050 unnest = self._parse_unnest(with_alias=False) 4051 if unnest: 4052 this = self.expression(exp.In, this=this, unnest=unnest) 4053 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4054 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4055 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4056 4057 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4058 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4059 else: 4060 this = self.expression(exp.In, this=this, expressions=expressions) 4061 4062 if matched_l_paren: 4063 self._match_r_paren(this) 4064 elif not self._match(TokenType.R_BRACKET, expression=this): 4065 self.raise_error("Expecting ]") 4066 else: 4067 this = self.expression(exp.In, this=this, field=self._parse_field()) 4068 4069 return this 4070 4071 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4072 low = self._parse_bitwise() 4073 self._match(TokenType.AND) 4074 high = self._parse_bitwise() 4075 return self.expression(exp.Between, this=this, low=low, high=high) 4076 4077 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4078 if not self._match(TokenType.ESCAPE): 4079 return this 4080 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4081 4082 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4083 index = self._index 4084 4085 if not self._match(TokenType.INTERVAL) and match_interval: 4086 return None 4087 4088 if self._match(TokenType.STRING, advance=False): 4089 this = self._parse_primary() 4090 else: 4091 this = self._parse_term() 4092 4093 if not this or ( 4094 isinstance(this, exp.Column) 4095 and not this.table 4096 and not this.this.quoted 4097 and this.name.upper() == "IS" 4098 ): 4099 self._retreat(index) 4100 return None 4101 4102 unit = self._parse_function() or ( 4103 not self._match(TokenType.ALIAS, advance=False) 4104 and self._parse_var(any_token=True, upper=True) 4105 ) 4106 4107 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4108 # each INTERVAL expression into this canonical form so it's easy to transpile 4109 if this and this.is_number: 4110 this = exp.Literal.string(this.name) 4111 elif this and this.is_string: 4112 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4113 if len(parts) == 1: 4114 if unit: 4115 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4116 self._retreat(self._index - 1) 4117 4118 this = exp.Literal.string(parts[0][0]) 4119 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4120 4121 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4122 unit = self.expression( 4123 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4124 ) 4125 4126 interval = self.expression(exp.Interval, this=this, unit=unit) 4127 4128 index = self._index 4129 self._match(TokenType.PLUS) 4130 4131 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4132 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4133 return self.expression( 4134 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4135 ) 4136 4137 self._retreat(index) 4138 return interval 4139 4140 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4141 this = self._parse_term() 4142 4143 while True: 4144 if self._match_set(self.BITWISE): 4145 this = self.expression( 4146 self.BITWISE[self._prev.token_type], 4147 this=this, 4148 expression=self._parse_term(), 4149 ) 4150 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4151 this = self.expression( 4152 exp.DPipe, 4153 this=this, 4154 expression=self._parse_term(), 4155 safe=not self.dialect.STRICT_STRING_CONCAT, 4156 ) 4157 elif self._match(TokenType.DQMARK): 4158 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4159 elif self._match_pair(TokenType.LT, TokenType.LT): 4160 this = self.expression( 4161 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4162 ) 4163 elif self._match_pair(TokenType.GT, TokenType.GT): 4164 this = self.expression( 4165 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4166 ) 4167 else: 4168 break 4169 4170 return this 4171 4172 def _parse_term(self) -> t.Optional[exp.Expression]: 4173 return self._parse_tokens(self._parse_factor, self.TERM) 4174 4175 def _parse_factor(self) -> t.Optional[exp.Expression]: 4176 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4177 this = parse_method() 4178 4179 while self._match_set(self.FACTOR): 4180 klass = self.FACTOR[self._prev.token_type] 4181 comments = self._prev_comments 4182 expression = parse_method() 4183 4184 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4185 self._retreat(self._index - 1) 4186 return this 4187 4188 this = self.expression(klass, this=this, comments=comments, expression=expression) 4189 4190 if isinstance(this, exp.Div): 4191 this.args["typed"] = self.dialect.TYPED_DIVISION 4192 this.args["safe"] = self.dialect.SAFE_DIVISION 4193 4194 return this 4195 4196 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4197 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4198 4199 def _parse_unary(self) -> t.Optional[exp.Expression]: 4200 if self._match_set(self.UNARY_PARSERS): 4201 return self.UNARY_PARSERS[self._prev.token_type](self) 4202 return self._parse_at_time_zone(self._parse_type()) 4203 4204 def _parse_type( 4205 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4206 ) -> t.Optional[exp.Expression]: 4207 interval = parse_interval and self._parse_interval() 4208 if interval: 4209 return interval 4210 4211 index = self._index 4212 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4213 4214 if data_type: 4215 index2 = self._index 4216 this = self._parse_primary() 4217 4218 if isinstance(this, exp.Literal): 4219 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4220 if parser: 4221 return parser(self, this, data_type) 4222 4223 return self.expression(exp.Cast, this=this, to=data_type) 4224 4225 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4226 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4227 # 4228 # If the index difference here is greater than 1, that means the parser itself must have 4229 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4230 # 4231 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4232 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4233 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4234 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4235 # 4236 # In these cases, we don't really want to return the converted type, but instead retreat 4237 # and try to parse a Column or Identifier in the section below. 4238 if data_type.expressions and index2 - index > 1: 4239 self._retreat(index2) 4240 return self._parse_column_ops(data_type) 4241 4242 self._retreat(index) 4243 4244 if fallback_to_identifier: 4245 return self._parse_id_var() 4246 4247 this = self._parse_column() 4248 return this and self._parse_column_ops(this) 4249 4250 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4251 this = self._parse_type() 4252 if not this: 4253 return None 4254 4255 if isinstance(this, exp.Column) and not this.table: 4256 this = exp.var(this.name.upper()) 4257 4258 return self.expression( 4259 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4260 ) 4261 4262 def _parse_types( 4263 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4264 ) -> t.Optional[exp.Expression]: 4265 index = self._index 4266 4267 this: t.Optional[exp.Expression] = None 4268 prefix = self._match_text_seq("SYSUDTLIB", ".") 4269 4270 if not self._match_set(self.TYPE_TOKENS): 4271 identifier = allow_identifiers and self._parse_id_var( 4272 any_token=False, tokens=(TokenType.VAR,) 4273 ) 4274 if identifier: 4275 tokens = self.dialect.tokenize(identifier.name) 4276 4277 if len(tokens) != 1: 4278 self.raise_error("Unexpected identifier", self._prev) 4279 4280 if tokens[0].token_type in self.TYPE_TOKENS: 4281 self._prev = tokens[0] 4282 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4283 type_name = identifier.name 4284 4285 while self._match(TokenType.DOT): 4286 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4287 4288 this = exp.DataType.build(type_name, udt=True) 4289 else: 4290 self._retreat(self._index - 1) 4291 return None 4292 else: 4293 return None 4294 4295 type_token = self._prev.token_type 4296 4297 if type_token == TokenType.PSEUDO_TYPE: 4298 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4299 4300 if type_token == TokenType.OBJECT_IDENTIFIER: 4301 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4302 4303 nested = type_token in self.NESTED_TYPE_TOKENS 4304 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4305 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4306 expressions = None 4307 maybe_func = False 4308 4309 if self._match(TokenType.L_PAREN): 4310 if is_struct: 4311 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4312 elif nested: 4313 expressions = self._parse_csv( 4314 lambda: self._parse_types( 4315 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4316 ) 4317 ) 4318 elif type_token in self.ENUM_TYPE_TOKENS: 4319 expressions = self._parse_csv(self._parse_equality) 4320 elif is_aggregate: 4321 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4322 any_token=False, tokens=(TokenType.VAR,) 4323 ) 4324 if not func_or_ident or not self._match(TokenType.COMMA): 4325 return None 4326 expressions = self._parse_csv( 4327 lambda: self._parse_types( 4328 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4329 ) 4330 ) 4331 expressions.insert(0, func_or_ident) 4332 else: 4333 expressions = self._parse_csv(self._parse_type_size) 4334 4335 if not expressions or not self._match(TokenType.R_PAREN): 4336 self._retreat(index) 4337 return None 4338 4339 maybe_func = True 4340 4341 values: t.Optional[t.List[exp.Expression]] = None 4342 4343 if nested and self._match(TokenType.LT): 4344 if is_struct: 4345 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4346 else: 4347 expressions = self._parse_csv( 4348 lambda: self._parse_types( 4349 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4350 ) 4351 ) 4352 4353 if not self._match(TokenType.GT): 4354 self.raise_error("Expecting >") 4355 4356 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4357 values = self._parse_csv(self._parse_assignment) 4358 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4359 4360 if type_token in self.TIMESTAMPS: 4361 if self._match_text_seq("WITH", "TIME", "ZONE"): 4362 maybe_func = False 4363 tz_type = ( 4364 exp.DataType.Type.TIMETZ 4365 if type_token in self.TIMES 4366 else exp.DataType.Type.TIMESTAMPTZ 4367 ) 4368 this = exp.DataType(this=tz_type, expressions=expressions) 4369 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4370 maybe_func = False 4371 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4372 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4373 maybe_func = False 4374 elif type_token == TokenType.INTERVAL: 4375 unit = self._parse_var(upper=True) 4376 if unit: 4377 if self._match_text_seq("TO"): 4378 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4379 4380 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4381 else: 4382 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4383 4384 if maybe_func and check_func: 4385 index2 = self._index 4386 peek = self._parse_string() 4387 4388 if not peek: 4389 self._retreat(index) 4390 return None 4391 4392 self._retreat(index2) 4393 4394 if not this: 4395 if self._match_text_seq("UNSIGNED"): 4396 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4397 if not unsigned_type_token: 4398 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4399 4400 type_token = unsigned_type_token or type_token 4401 4402 this = exp.DataType( 4403 this=exp.DataType.Type[type_token.value], 4404 expressions=expressions, 4405 nested=nested, 4406 values=values, 4407 prefix=prefix, 4408 ) 4409 elif expressions: 4410 this.set("expressions", expressions) 4411 4412 index = self._index 4413 4414 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4415 matched_array = self._match(TokenType.ARRAY) 4416 4417 while self._curr: 4418 matched_l_bracket = self._match(TokenType.L_BRACKET) 4419 if not matched_l_bracket and not matched_array: 4420 break 4421 4422 matched_array = False 4423 values = self._parse_csv(self._parse_assignment) or None 4424 if values and not schema: 4425 self._retreat(index) 4426 break 4427 4428 this = exp.DataType( 4429 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4430 ) 4431 self._match(TokenType.R_BRACKET) 4432 4433 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4434 converter = self.TYPE_CONVERTER.get(this.this) 4435 if converter: 4436 this = converter(t.cast(exp.DataType, this)) 4437 4438 return this 4439 4440 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4441 index = self._index 4442 this = ( 4443 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4444 or self._parse_id_var() 4445 ) 4446 self._match(TokenType.COLON) 4447 4448 if ( 4449 type_required 4450 and not isinstance(this, exp.DataType) 4451 and not self._match_set(self.TYPE_TOKENS, advance=False) 4452 ): 4453 self._retreat(index) 4454 return self._parse_types() 4455 4456 return self._parse_column_def(this) 4457 4458 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4459 if not self._match_text_seq("AT", "TIME", "ZONE"): 4460 return this 4461 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4462 4463 def _parse_column(self) -> t.Optional[exp.Expression]: 4464 this = self._parse_column_reference() 4465 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4466 4467 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4468 this = self._parse_field() 4469 if ( 4470 not this 4471 and self._match(TokenType.VALUES, advance=False) 4472 and self.VALUES_FOLLOWED_BY_PAREN 4473 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4474 ): 4475 this = self._parse_id_var() 4476 4477 if isinstance(this, exp.Identifier): 4478 # We bubble up comments from the Identifier to the Column 4479 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4480 4481 return this 4482 4483 def _parse_colon_as_json_extract( 4484 self, this: t.Optional[exp.Expression] 4485 ) -> t.Optional[exp.Expression]: 4486 casts = [] 4487 json_path = [] 4488 4489 while self._match(TokenType.COLON): 4490 start_index = self._index 4491 path = self._parse_column_ops(self._parse_field(any_token=True)) 4492 4493 # The cast :: operator has a lower precedence than the extraction operator :, so 4494 # we rearrange the AST appropriately to avoid casting the JSON path 4495 while isinstance(path, exp.Cast): 4496 casts.append(path.to) 4497 path = path.this 4498 4499 if casts: 4500 dcolon_offset = next( 4501 i 4502 for i, t in enumerate(self._tokens[start_index:]) 4503 if t.token_type == TokenType.DCOLON 4504 ) 4505 end_token = self._tokens[start_index + dcolon_offset - 1] 4506 else: 4507 end_token = self._prev 4508 4509 if path: 4510 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4511 4512 if json_path: 4513 this = self.expression( 4514 exp.JSONExtract, 4515 this=this, 4516 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4517 ) 4518 4519 while casts: 4520 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4521 4522 return this 4523 4524 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4525 this = self._parse_bracket(this) 4526 4527 while self._match_set(self.COLUMN_OPERATORS): 4528 op_token = self._prev.token_type 4529 op = self.COLUMN_OPERATORS.get(op_token) 4530 4531 if op_token == TokenType.DCOLON: 4532 field = self._parse_types() 4533 if not field: 4534 self.raise_error("Expected type") 4535 elif op and self._curr: 4536 field = self._parse_column_reference() 4537 else: 4538 field = self._parse_field(any_token=True, anonymous_func=True) 4539 4540 if isinstance(field, exp.Func) and this: 4541 # bigquery allows function calls like x.y.count(...) 4542 # SAFE.SUBSTR(...) 4543 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4544 this = exp.replace_tree( 4545 this, 4546 lambda n: ( 4547 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4548 if n.table 4549 else n.this 4550 ) 4551 if isinstance(n, exp.Column) 4552 else n, 4553 ) 4554 4555 if op: 4556 this = op(self, this, field) 4557 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4558 this = self.expression( 4559 exp.Column, 4560 this=field, 4561 table=this.this, 4562 db=this.args.get("table"), 4563 catalog=this.args.get("db"), 4564 ) 4565 else: 4566 this = self.expression(exp.Dot, this=this, expression=field) 4567 4568 this = self._parse_bracket(this) 4569 4570 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4571 4572 def _parse_primary(self) -> t.Optional[exp.Expression]: 4573 if self._match_set(self.PRIMARY_PARSERS): 4574 token_type = self._prev.token_type 4575 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4576 4577 if token_type == TokenType.STRING: 4578 expressions = [primary] 4579 while self._match(TokenType.STRING): 4580 expressions.append(exp.Literal.string(self._prev.text)) 4581 4582 if len(expressions) > 1: 4583 return self.expression(exp.Concat, expressions=expressions) 4584 4585 return primary 4586 4587 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4588 return exp.Literal.number(f"0.{self._prev.text}") 4589 4590 if self._match(TokenType.L_PAREN): 4591 comments = self._prev_comments 4592 query = self._parse_select() 4593 4594 if query: 4595 expressions = [query] 4596 else: 4597 expressions = self._parse_expressions() 4598 4599 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4600 4601 if not this and self._match(TokenType.R_PAREN, advance=False): 4602 this = self.expression(exp.Tuple) 4603 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4604 this = self._parse_subquery(this=this, parse_alias=False) 4605 elif isinstance(this, exp.Subquery): 4606 this = self._parse_subquery( 4607 this=self._parse_set_operations(this), parse_alias=False 4608 ) 4609 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4610 this = self.expression(exp.Tuple, expressions=expressions) 4611 else: 4612 this = self.expression(exp.Paren, this=this) 4613 4614 if this: 4615 this.add_comments(comments) 4616 4617 self._match_r_paren(expression=this) 4618 return this 4619 4620 return None 4621 4622 def _parse_field( 4623 self, 4624 any_token: bool = False, 4625 tokens: t.Optional[t.Collection[TokenType]] = None, 4626 anonymous_func: bool = False, 4627 ) -> t.Optional[exp.Expression]: 4628 if anonymous_func: 4629 field = ( 4630 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4631 or self._parse_primary() 4632 ) 4633 else: 4634 field = self._parse_primary() or self._parse_function( 4635 anonymous=anonymous_func, any_token=any_token 4636 ) 4637 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4638 4639 def _parse_function( 4640 self, 4641 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4642 anonymous: bool = False, 4643 optional_parens: bool = True, 4644 any_token: bool = False, 4645 ) -> t.Optional[exp.Expression]: 4646 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4647 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4648 fn_syntax = False 4649 if ( 4650 self._match(TokenType.L_BRACE, advance=False) 4651 and self._next 4652 and self._next.text.upper() == "FN" 4653 ): 4654 self._advance(2) 4655 fn_syntax = True 4656 4657 func = self._parse_function_call( 4658 functions=functions, 4659 anonymous=anonymous, 4660 optional_parens=optional_parens, 4661 any_token=any_token, 4662 ) 4663 4664 if fn_syntax: 4665 self._match(TokenType.R_BRACE) 4666 4667 return func 4668 4669 def _parse_function_call( 4670 self, 4671 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4672 anonymous: bool = False, 4673 optional_parens: bool = True, 4674 any_token: bool = False, 4675 ) -> t.Optional[exp.Expression]: 4676 if not self._curr: 4677 return None 4678 4679 comments = self._curr.comments 4680 token_type = self._curr.token_type 4681 this = self._curr.text 4682 upper = this.upper() 4683 4684 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4685 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4686 self._advance() 4687 return self._parse_window(parser(self)) 4688 4689 if not self._next or self._next.token_type != TokenType.L_PAREN: 4690 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4691 self._advance() 4692 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4693 4694 return None 4695 4696 if any_token: 4697 if token_type in self.RESERVED_TOKENS: 4698 return None 4699 elif token_type not in self.FUNC_TOKENS: 4700 return None 4701 4702 self._advance(2) 4703 4704 parser = self.FUNCTION_PARSERS.get(upper) 4705 if parser and not anonymous: 4706 this = parser(self) 4707 else: 4708 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4709 4710 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4711 this = self.expression(subquery_predicate, this=self._parse_select()) 4712 self._match_r_paren() 4713 return this 4714 4715 if functions is None: 4716 functions = self.FUNCTIONS 4717 4718 function = functions.get(upper) 4719 4720 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4721 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4722 4723 if alias: 4724 args = self._kv_to_prop_eq(args) 4725 4726 if function and not anonymous: 4727 if "dialect" in function.__code__.co_varnames: 4728 func = function(args, dialect=self.dialect) 4729 else: 4730 func = function(args) 4731 4732 func = self.validate_expression(func, args) 4733 if not self.dialect.NORMALIZE_FUNCTIONS: 4734 func.meta["name"] = this 4735 4736 this = func 4737 else: 4738 if token_type == TokenType.IDENTIFIER: 4739 this = exp.Identifier(this=this, quoted=True) 4740 this = self.expression(exp.Anonymous, this=this, expressions=args) 4741 4742 if isinstance(this, exp.Expression): 4743 this.add_comments(comments) 4744 4745 self._match_r_paren(this) 4746 return self._parse_window(this) 4747 4748 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4749 transformed = [] 4750 4751 for e in expressions: 4752 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4753 if isinstance(e, exp.Alias): 4754 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4755 4756 if not isinstance(e, exp.PropertyEQ): 4757 e = self.expression( 4758 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4759 ) 4760 4761 if isinstance(e.this, exp.Column): 4762 e.this.replace(e.this.this) 4763 4764 transformed.append(e) 4765 4766 return transformed 4767 4768 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4769 return self._parse_column_def(self._parse_id_var()) 4770 4771 def _parse_user_defined_function( 4772 self, kind: t.Optional[TokenType] = None 4773 ) -> t.Optional[exp.Expression]: 4774 this = self._parse_id_var() 4775 4776 while self._match(TokenType.DOT): 4777 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4778 4779 if not self._match(TokenType.L_PAREN): 4780 return this 4781 4782 expressions = self._parse_csv(self._parse_function_parameter) 4783 self._match_r_paren() 4784 return self.expression( 4785 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4786 ) 4787 4788 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4789 literal = self._parse_primary() 4790 if literal: 4791 return self.expression(exp.Introducer, this=token.text, expression=literal) 4792 4793 return self.expression(exp.Identifier, this=token.text) 4794 4795 def _parse_session_parameter(self) -> exp.SessionParameter: 4796 kind = None 4797 this = self._parse_id_var() or self._parse_primary() 4798 4799 if this and self._match(TokenType.DOT): 4800 kind = this.name 4801 this = self._parse_var() or self._parse_primary() 4802 4803 return self.expression(exp.SessionParameter, this=this, kind=kind) 4804 4805 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4806 return self._parse_id_var() 4807 4808 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4809 index = self._index 4810 4811 if self._match(TokenType.L_PAREN): 4812 expressions = t.cast( 4813 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4814 ) 4815 4816 if not self._match(TokenType.R_PAREN): 4817 self._retreat(index) 4818 else: 4819 expressions = [self._parse_lambda_arg()] 4820 4821 if self._match_set(self.LAMBDAS): 4822 return self.LAMBDAS[self._prev.token_type](self, expressions) 4823 4824 self._retreat(index) 4825 4826 this: t.Optional[exp.Expression] 4827 4828 if self._match(TokenType.DISTINCT): 4829 this = self.expression( 4830 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4831 ) 4832 else: 4833 this = self._parse_select_or_expression(alias=alias) 4834 4835 return self._parse_limit( 4836 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4837 ) 4838 4839 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4840 index = self._index 4841 if not self._match(TokenType.L_PAREN): 4842 return this 4843 4844 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4845 # expr can be of both types 4846 if self._match_set(self.SELECT_START_TOKENS): 4847 self._retreat(index) 4848 return this 4849 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4850 self._match_r_paren() 4851 return self.expression(exp.Schema, this=this, expressions=args) 4852 4853 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4854 return self._parse_column_def(self._parse_field(any_token=True)) 4855 4856 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4857 # column defs are not really columns, they're identifiers 4858 if isinstance(this, exp.Column): 4859 this = this.this 4860 4861 kind = self._parse_types(schema=True) 4862 4863 if self._match_text_seq("FOR", "ORDINALITY"): 4864 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4865 4866 constraints: t.List[exp.Expression] = [] 4867 4868 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4869 ("ALIAS", "MATERIALIZED") 4870 ): 4871 persisted = self._prev.text.upper() == "MATERIALIZED" 4872 constraints.append( 4873 self.expression( 4874 exp.ComputedColumnConstraint, 4875 this=self._parse_assignment(), 4876 persisted=persisted or self._match_text_seq("PERSISTED"), 4877 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4878 ) 4879 ) 4880 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4881 self._match(TokenType.ALIAS) 4882 constraints.append( 4883 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4884 ) 4885 4886 while True: 4887 constraint = self._parse_column_constraint() 4888 if not constraint: 4889 break 4890 constraints.append(constraint) 4891 4892 if not kind and not constraints: 4893 return this 4894 4895 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4896 4897 def _parse_auto_increment( 4898 self, 4899 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4900 start = None 4901 increment = None 4902 4903 if self._match(TokenType.L_PAREN, advance=False): 4904 args = self._parse_wrapped_csv(self._parse_bitwise) 4905 start = seq_get(args, 0) 4906 increment = seq_get(args, 1) 4907 elif self._match_text_seq("START"): 4908 start = self._parse_bitwise() 4909 self._match_text_seq("INCREMENT") 4910 increment = self._parse_bitwise() 4911 4912 if start and increment: 4913 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4914 4915 return exp.AutoIncrementColumnConstraint() 4916 4917 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4918 if not self._match_text_seq("REFRESH"): 4919 self._retreat(self._index - 1) 4920 return None 4921 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4922 4923 def _parse_compress(self) -> exp.CompressColumnConstraint: 4924 if self._match(TokenType.L_PAREN, advance=False): 4925 return self.expression( 4926 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4927 ) 4928 4929 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4930 4931 def _parse_generated_as_identity( 4932 self, 4933 ) -> ( 4934 exp.GeneratedAsIdentityColumnConstraint 4935 | exp.ComputedColumnConstraint 4936 | exp.GeneratedAsRowColumnConstraint 4937 ): 4938 if self._match_text_seq("BY", "DEFAULT"): 4939 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4940 this = self.expression( 4941 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4942 ) 4943 else: 4944 self._match_text_seq("ALWAYS") 4945 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4946 4947 self._match(TokenType.ALIAS) 4948 4949 if self._match_text_seq("ROW"): 4950 start = self._match_text_seq("START") 4951 if not start: 4952 self._match(TokenType.END) 4953 hidden = self._match_text_seq("HIDDEN") 4954 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4955 4956 identity = self._match_text_seq("IDENTITY") 4957 4958 if self._match(TokenType.L_PAREN): 4959 if self._match(TokenType.START_WITH): 4960 this.set("start", self._parse_bitwise()) 4961 if self._match_text_seq("INCREMENT", "BY"): 4962 this.set("increment", self._parse_bitwise()) 4963 if self._match_text_seq("MINVALUE"): 4964 this.set("minvalue", self._parse_bitwise()) 4965 if self._match_text_seq("MAXVALUE"): 4966 this.set("maxvalue", self._parse_bitwise()) 4967 4968 if self._match_text_seq("CYCLE"): 4969 this.set("cycle", True) 4970 elif self._match_text_seq("NO", "CYCLE"): 4971 this.set("cycle", False) 4972 4973 if not identity: 4974 this.set("expression", self._parse_range()) 4975 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4976 args = self._parse_csv(self._parse_bitwise) 4977 this.set("start", seq_get(args, 0)) 4978 this.set("increment", seq_get(args, 1)) 4979 4980 self._match_r_paren() 4981 4982 return this 4983 4984 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4985 self._match_text_seq("LENGTH") 4986 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4987 4988 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4989 if self._match_text_seq("NULL"): 4990 return self.expression(exp.NotNullColumnConstraint) 4991 if self._match_text_seq("CASESPECIFIC"): 4992 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4993 if self._match_text_seq("FOR", "REPLICATION"): 4994 return self.expression(exp.NotForReplicationColumnConstraint) 4995 return None 4996 4997 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4998 if self._match(TokenType.CONSTRAINT): 4999 this = self._parse_id_var() 5000 else: 5001 this = None 5002 5003 if self._match_texts(self.CONSTRAINT_PARSERS): 5004 return self.expression( 5005 exp.ColumnConstraint, 5006 this=this, 5007 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5008 ) 5009 5010 return this 5011 5012 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5013 if not self._match(TokenType.CONSTRAINT): 5014 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5015 5016 return self.expression( 5017 exp.Constraint, 5018 this=self._parse_id_var(), 5019 expressions=self._parse_unnamed_constraints(), 5020 ) 5021 5022 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5023 constraints = [] 5024 while True: 5025 constraint = self._parse_unnamed_constraint() or self._parse_function() 5026 if not constraint: 5027 break 5028 constraints.append(constraint) 5029 5030 return constraints 5031 5032 def _parse_unnamed_constraint( 5033 self, constraints: t.Optional[t.Collection[str]] = None 5034 ) -> t.Optional[exp.Expression]: 5035 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5036 constraints or self.CONSTRAINT_PARSERS 5037 ): 5038 return None 5039 5040 constraint = self._prev.text.upper() 5041 if constraint not in self.CONSTRAINT_PARSERS: 5042 self.raise_error(f"No parser found for schema constraint {constraint}.") 5043 5044 return self.CONSTRAINT_PARSERS[constraint](self) 5045 5046 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5047 self._match_text_seq("KEY") 5048 return self.expression( 5049 exp.UniqueColumnConstraint, 5050 this=self._parse_schema(self._parse_id_var(any_token=False)), 5051 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5052 on_conflict=self._parse_on_conflict(), 5053 ) 5054 5055 def _parse_key_constraint_options(self) -> t.List[str]: 5056 options = [] 5057 while True: 5058 if not self._curr: 5059 break 5060 5061 if self._match(TokenType.ON): 5062 action = None 5063 on = self._advance_any() and self._prev.text 5064 5065 if self._match_text_seq("NO", "ACTION"): 5066 action = "NO ACTION" 5067 elif self._match_text_seq("CASCADE"): 5068 action = "CASCADE" 5069 elif self._match_text_seq("RESTRICT"): 5070 action = "RESTRICT" 5071 elif self._match_pair(TokenType.SET, TokenType.NULL): 5072 action = "SET NULL" 5073 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5074 action = "SET DEFAULT" 5075 else: 5076 self.raise_error("Invalid key constraint") 5077 5078 options.append(f"ON {on} {action}") 5079 elif self._match_text_seq("NOT", "ENFORCED"): 5080 options.append("NOT ENFORCED") 5081 elif self._match_text_seq("DEFERRABLE"): 5082 options.append("DEFERRABLE") 5083 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5084 options.append("INITIALLY DEFERRED") 5085 elif self._match_text_seq("NORELY"): 5086 options.append("NORELY") 5087 elif self._match_text_seq("MATCH", "FULL"): 5088 options.append("MATCH FULL") 5089 else: 5090 break 5091 5092 return options 5093 5094 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5095 if match and not self._match(TokenType.REFERENCES): 5096 return None 5097 5098 expressions = None 5099 this = self._parse_table(schema=True) 5100 options = self._parse_key_constraint_options() 5101 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5102 5103 def _parse_foreign_key(self) -> exp.ForeignKey: 5104 expressions = self._parse_wrapped_id_vars() 5105 reference = self._parse_references() 5106 options = {} 5107 5108 while self._match(TokenType.ON): 5109 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5110 self.raise_error("Expected DELETE or UPDATE") 5111 5112 kind = self._prev.text.lower() 5113 5114 if self._match_text_seq("NO", "ACTION"): 5115 action = "NO ACTION" 5116 elif self._match(TokenType.SET): 5117 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5118 action = "SET " + self._prev.text.upper() 5119 else: 5120 self._advance() 5121 action = self._prev.text.upper() 5122 5123 options[kind] = action 5124 5125 return self.expression( 5126 exp.ForeignKey, 5127 expressions=expressions, 5128 reference=reference, 5129 **options, # type: ignore 5130 ) 5131 5132 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5133 return self._parse_field() 5134 5135 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5136 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5137 self._retreat(self._index - 1) 5138 return None 5139 5140 id_vars = self._parse_wrapped_id_vars() 5141 return self.expression( 5142 exp.PeriodForSystemTimeConstraint, 5143 this=seq_get(id_vars, 0), 5144 expression=seq_get(id_vars, 1), 5145 ) 5146 5147 def _parse_primary_key( 5148 self, wrapped_optional: bool = False, in_props: bool = False 5149 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5150 desc = ( 5151 self._match_set((TokenType.ASC, TokenType.DESC)) 5152 and self._prev.token_type == TokenType.DESC 5153 ) 5154 5155 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5156 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5157 5158 expressions = self._parse_wrapped_csv( 5159 self._parse_primary_key_part, optional=wrapped_optional 5160 ) 5161 options = self._parse_key_constraint_options() 5162 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5163 5164 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5165 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5166 5167 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5168 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5169 return this 5170 5171 bracket_kind = self._prev.token_type 5172 expressions = self._parse_csv( 5173 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5174 ) 5175 5176 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5177 self.raise_error("Expected ]") 5178 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5179 self.raise_error("Expected }") 5180 5181 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5182 if bracket_kind == TokenType.L_BRACE: 5183 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5184 elif not this or this.name.upper() == "ARRAY": 5185 this = self.expression(exp.Array, expressions=expressions) 5186 else: 5187 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5188 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5189 5190 self._add_comments(this) 5191 return self._parse_bracket(this) 5192 5193 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5194 if self._match(TokenType.COLON): 5195 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5196 return this 5197 5198 def _parse_case(self) -> t.Optional[exp.Expression]: 5199 ifs = [] 5200 default = None 5201 5202 comments = self._prev_comments 5203 expression = self._parse_assignment() 5204 5205 while self._match(TokenType.WHEN): 5206 this = self._parse_assignment() 5207 self._match(TokenType.THEN) 5208 then = self._parse_assignment() 5209 ifs.append(self.expression(exp.If, this=this, true=then)) 5210 5211 if self._match(TokenType.ELSE): 5212 default = self._parse_assignment() 5213 5214 if not self._match(TokenType.END): 5215 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5216 default = exp.column("interval") 5217 else: 5218 self.raise_error("Expected END after CASE", self._prev) 5219 5220 return self.expression( 5221 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5222 ) 5223 5224 def _parse_if(self) -> t.Optional[exp.Expression]: 5225 if self._match(TokenType.L_PAREN): 5226 args = self._parse_csv(self._parse_assignment) 5227 this = self.validate_expression(exp.If.from_arg_list(args), args) 5228 self._match_r_paren() 5229 else: 5230 index = self._index - 1 5231 5232 if self.NO_PAREN_IF_COMMANDS and index == 0: 5233 return self._parse_as_command(self._prev) 5234 5235 condition = self._parse_assignment() 5236 5237 if not condition: 5238 self._retreat(index) 5239 return None 5240 5241 self._match(TokenType.THEN) 5242 true = self._parse_assignment() 5243 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5244 self._match(TokenType.END) 5245 this = self.expression(exp.If, this=condition, true=true, false=false) 5246 5247 return this 5248 5249 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5250 if not self._match_text_seq("VALUE", "FOR"): 5251 self._retreat(self._index - 1) 5252 return None 5253 5254 return self.expression( 5255 exp.NextValueFor, 5256 this=self._parse_column(), 5257 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5258 ) 5259 5260 def _parse_extract(self) -> exp.Extract: 5261 this = self._parse_function() or self._parse_var() or self._parse_type() 5262 5263 if self._match(TokenType.FROM): 5264 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5265 5266 if not self._match(TokenType.COMMA): 5267 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5268 5269 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5270 5271 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5272 this = self._parse_assignment() 5273 5274 if not self._match(TokenType.ALIAS): 5275 if self._match(TokenType.COMMA): 5276 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5277 5278 self.raise_error("Expected AS after CAST") 5279 5280 fmt = None 5281 to = self._parse_types() 5282 5283 if self._match(TokenType.FORMAT): 5284 fmt_string = self._parse_string() 5285 fmt = self._parse_at_time_zone(fmt_string) 5286 5287 if not to: 5288 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5289 if to.this in exp.DataType.TEMPORAL_TYPES: 5290 this = self.expression( 5291 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5292 this=this, 5293 format=exp.Literal.string( 5294 format_time( 5295 fmt_string.this if fmt_string else "", 5296 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5297 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5298 ) 5299 ), 5300 ) 5301 5302 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5303 this.set("zone", fmt.args["zone"]) 5304 return this 5305 elif not to: 5306 self.raise_error("Expected TYPE after CAST") 5307 elif isinstance(to, exp.Identifier): 5308 to = exp.DataType.build(to.name, udt=True) 5309 elif to.this == exp.DataType.Type.CHAR: 5310 if self._match(TokenType.CHARACTER_SET): 5311 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5312 5313 return self.expression( 5314 exp.Cast if strict else exp.TryCast, 5315 this=this, 5316 to=to, 5317 format=fmt, 5318 safe=safe, 5319 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5320 ) 5321 5322 def _parse_string_agg(self) -> exp.Expression: 5323 if self._match(TokenType.DISTINCT): 5324 args: t.List[t.Optional[exp.Expression]] = [ 5325 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5326 ] 5327 if self._match(TokenType.COMMA): 5328 args.extend(self._parse_csv(self._parse_assignment)) 5329 else: 5330 args = self._parse_csv(self._parse_assignment) # type: ignore 5331 5332 index = self._index 5333 if not self._match(TokenType.R_PAREN) and args: 5334 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5335 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5336 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5337 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5338 5339 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5340 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5341 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5342 if not self._match_text_seq("WITHIN", "GROUP"): 5343 self._retreat(index) 5344 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5345 5346 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5347 order = self._parse_order(this=seq_get(args, 0)) 5348 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5349 5350 def _parse_convert( 5351 self, strict: bool, safe: t.Optional[bool] = None 5352 ) -> t.Optional[exp.Expression]: 5353 this = self._parse_bitwise() 5354 5355 if self._match(TokenType.USING): 5356 to: t.Optional[exp.Expression] = self.expression( 5357 exp.CharacterSet, this=self._parse_var() 5358 ) 5359 elif self._match(TokenType.COMMA): 5360 to = self._parse_types() 5361 else: 5362 to = None 5363 5364 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5365 5366 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5367 """ 5368 There are generally two variants of the DECODE function: 5369 5370 - DECODE(bin, charset) 5371 - DECODE(expression, search, result [, search, result] ... [, default]) 5372 5373 The second variant will always be parsed into a CASE expression. Note that NULL 5374 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5375 instead of relying on pattern matching. 5376 """ 5377 args = self._parse_csv(self._parse_assignment) 5378 5379 if len(args) < 3: 5380 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5381 5382 expression, *expressions = args 5383 if not expression: 5384 return None 5385 5386 ifs = [] 5387 for search, result in zip(expressions[::2], expressions[1::2]): 5388 if not search or not result: 5389 return None 5390 5391 if isinstance(search, exp.Literal): 5392 ifs.append( 5393 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5394 ) 5395 elif isinstance(search, exp.Null): 5396 ifs.append( 5397 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5398 ) 5399 else: 5400 cond = exp.or_( 5401 exp.EQ(this=expression.copy(), expression=search), 5402 exp.and_( 5403 exp.Is(this=expression.copy(), expression=exp.Null()), 5404 exp.Is(this=search.copy(), expression=exp.Null()), 5405 copy=False, 5406 ), 5407 copy=False, 5408 ) 5409 ifs.append(exp.If(this=cond, true=result)) 5410 5411 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5412 5413 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5414 self._match_text_seq("KEY") 5415 key = self._parse_column() 5416 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5417 self._match_text_seq("VALUE") 5418 value = self._parse_bitwise() 5419 5420 if not key and not value: 5421 return None 5422 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5423 5424 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5425 if not this or not self._match_text_seq("FORMAT", "JSON"): 5426 return this 5427 5428 return self.expression(exp.FormatJson, this=this) 5429 5430 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5431 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5432 for value in values: 5433 if self._match_text_seq(value, "ON", on): 5434 return f"{value} ON {on}" 5435 5436 return None 5437 5438 @t.overload 5439 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5440 5441 @t.overload 5442 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5443 5444 def _parse_json_object(self, agg=False): 5445 star = self._parse_star() 5446 expressions = ( 5447 [star] 5448 if star 5449 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5450 ) 5451 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5452 5453 unique_keys = None 5454 if self._match_text_seq("WITH", "UNIQUE"): 5455 unique_keys = True 5456 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5457 unique_keys = False 5458 5459 self._match_text_seq("KEYS") 5460 5461 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5462 self._parse_type() 5463 ) 5464 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5465 5466 return self.expression( 5467 exp.JSONObjectAgg if agg else exp.JSONObject, 5468 expressions=expressions, 5469 null_handling=null_handling, 5470 unique_keys=unique_keys, 5471 return_type=return_type, 5472 encoding=encoding, 5473 ) 5474 5475 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5476 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5477 if not self._match_text_seq("NESTED"): 5478 this = self._parse_id_var() 5479 kind = self._parse_types(allow_identifiers=False) 5480 nested = None 5481 else: 5482 this = None 5483 kind = None 5484 nested = True 5485 5486 path = self._match_text_seq("PATH") and self._parse_string() 5487 nested_schema = nested and self._parse_json_schema() 5488 5489 return self.expression( 5490 exp.JSONColumnDef, 5491 this=this, 5492 kind=kind, 5493 path=path, 5494 nested_schema=nested_schema, 5495 ) 5496 5497 def _parse_json_schema(self) -> exp.JSONSchema: 5498 self._match_text_seq("COLUMNS") 5499 return self.expression( 5500 exp.JSONSchema, 5501 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5502 ) 5503 5504 def _parse_json_table(self) -> exp.JSONTable: 5505 this = self._parse_format_json(self._parse_bitwise()) 5506 path = self._match(TokenType.COMMA) and self._parse_string() 5507 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5508 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5509 schema = self._parse_json_schema() 5510 5511 return exp.JSONTable( 5512 this=this, 5513 schema=schema, 5514 path=path, 5515 error_handling=error_handling, 5516 empty_handling=empty_handling, 5517 ) 5518 5519 def _parse_match_against(self) -> exp.MatchAgainst: 5520 expressions = self._parse_csv(self._parse_column) 5521 5522 self._match_text_seq(")", "AGAINST", "(") 5523 5524 this = self._parse_string() 5525 5526 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5527 modifier = "IN NATURAL LANGUAGE MODE" 5528 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5529 modifier = f"{modifier} WITH QUERY EXPANSION" 5530 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5531 modifier = "IN BOOLEAN MODE" 5532 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5533 modifier = "WITH QUERY EXPANSION" 5534 else: 5535 modifier = None 5536 5537 return self.expression( 5538 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5539 ) 5540 5541 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5542 def _parse_open_json(self) -> exp.OpenJSON: 5543 this = self._parse_bitwise() 5544 path = self._match(TokenType.COMMA) and self._parse_string() 5545 5546 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5547 this = self._parse_field(any_token=True) 5548 kind = self._parse_types() 5549 path = self._parse_string() 5550 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5551 5552 return self.expression( 5553 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5554 ) 5555 5556 expressions = None 5557 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5558 self._match_l_paren() 5559 expressions = self._parse_csv(_parse_open_json_column_def) 5560 5561 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5562 5563 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5564 args = self._parse_csv(self._parse_bitwise) 5565 5566 if self._match(TokenType.IN): 5567 return self.expression( 5568 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5569 ) 5570 5571 if haystack_first: 5572 haystack = seq_get(args, 0) 5573 needle = seq_get(args, 1) 5574 else: 5575 needle = seq_get(args, 0) 5576 haystack = seq_get(args, 1) 5577 5578 return self.expression( 5579 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5580 ) 5581 5582 def _parse_predict(self) -> exp.Predict: 5583 self._match_text_seq("MODEL") 5584 this = self._parse_table() 5585 5586 self._match(TokenType.COMMA) 5587 self._match_text_seq("TABLE") 5588 5589 return self.expression( 5590 exp.Predict, 5591 this=this, 5592 expression=self._parse_table(), 5593 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5594 ) 5595 5596 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5597 args = self._parse_csv(self._parse_table) 5598 return exp.JoinHint(this=func_name.upper(), expressions=args) 5599 5600 def _parse_substring(self) -> exp.Substring: 5601 # Postgres supports the form: substring(string [from int] [for int]) 5602 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5603 5604 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5605 5606 if self._match(TokenType.FROM): 5607 args.append(self._parse_bitwise()) 5608 if self._match(TokenType.FOR): 5609 if len(args) == 1: 5610 args.append(exp.Literal.number(1)) 5611 args.append(self._parse_bitwise()) 5612 5613 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5614 5615 def _parse_trim(self) -> exp.Trim: 5616 # https://www.w3resource.com/sql/character-functions/trim.php 5617 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5618 5619 position = None 5620 collation = None 5621 expression = None 5622 5623 if self._match_texts(self.TRIM_TYPES): 5624 position = self._prev.text.upper() 5625 5626 this = self._parse_bitwise() 5627 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5628 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5629 expression = self._parse_bitwise() 5630 5631 if invert_order: 5632 this, expression = expression, this 5633 5634 if self._match(TokenType.COLLATE): 5635 collation = self._parse_bitwise() 5636 5637 return self.expression( 5638 exp.Trim, this=this, position=position, expression=expression, collation=collation 5639 ) 5640 5641 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5642 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5643 5644 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5645 return self._parse_window(self._parse_id_var(), alias=True) 5646 5647 def _parse_respect_or_ignore_nulls( 5648 self, this: t.Optional[exp.Expression] 5649 ) -> t.Optional[exp.Expression]: 5650 if self._match_text_seq("IGNORE", "NULLS"): 5651 return self.expression(exp.IgnoreNulls, this=this) 5652 if self._match_text_seq("RESPECT", "NULLS"): 5653 return self.expression(exp.RespectNulls, this=this) 5654 return this 5655 5656 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5657 if self._match(TokenType.HAVING): 5658 self._match_texts(("MAX", "MIN")) 5659 max = self._prev.text.upper() != "MIN" 5660 return self.expression( 5661 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5662 ) 5663 5664 return this 5665 5666 def _parse_window( 5667 self, this: t.Optional[exp.Expression], alias: bool = False 5668 ) -> t.Optional[exp.Expression]: 5669 func = this 5670 comments = func.comments if isinstance(func, exp.Expression) else None 5671 5672 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5673 self._match(TokenType.WHERE) 5674 this = self.expression( 5675 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5676 ) 5677 self._match_r_paren() 5678 5679 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5680 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5681 if self._match_text_seq("WITHIN", "GROUP"): 5682 order = self._parse_wrapped(self._parse_order) 5683 this = self.expression(exp.WithinGroup, this=this, expression=order) 5684 5685 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5686 # Some dialects choose to implement and some do not. 5687 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5688 5689 # There is some code above in _parse_lambda that handles 5690 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5691 5692 # The below changes handle 5693 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5694 5695 # Oracle allows both formats 5696 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5697 # and Snowflake chose to do the same for familiarity 5698 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5699 if isinstance(this, exp.AggFunc): 5700 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5701 5702 if ignore_respect and ignore_respect is not this: 5703 ignore_respect.replace(ignore_respect.this) 5704 this = self.expression(ignore_respect.__class__, this=this) 5705 5706 this = self._parse_respect_or_ignore_nulls(this) 5707 5708 # bigquery select from window x AS (partition by ...) 5709 if alias: 5710 over = None 5711 self._match(TokenType.ALIAS) 5712 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5713 return this 5714 else: 5715 over = self._prev.text.upper() 5716 5717 if comments and isinstance(func, exp.Expression): 5718 func.pop_comments() 5719 5720 if not self._match(TokenType.L_PAREN): 5721 return self.expression( 5722 exp.Window, 5723 comments=comments, 5724 this=this, 5725 alias=self._parse_id_var(False), 5726 over=over, 5727 ) 5728 5729 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5730 5731 first = self._match(TokenType.FIRST) 5732 if self._match_text_seq("LAST"): 5733 first = False 5734 5735 partition, order = self._parse_partition_and_order() 5736 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5737 5738 if kind: 5739 self._match(TokenType.BETWEEN) 5740 start = self._parse_window_spec() 5741 self._match(TokenType.AND) 5742 end = self._parse_window_spec() 5743 5744 spec = self.expression( 5745 exp.WindowSpec, 5746 kind=kind, 5747 start=start["value"], 5748 start_side=start["side"], 5749 end=end["value"], 5750 end_side=end["side"], 5751 ) 5752 else: 5753 spec = None 5754 5755 self._match_r_paren() 5756 5757 window = self.expression( 5758 exp.Window, 5759 comments=comments, 5760 this=this, 5761 partition_by=partition, 5762 order=order, 5763 spec=spec, 5764 alias=window_alias, 5765 over=over, 5766 first=first, 5767 ) 5768 5769 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5770 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5771 return self._parse_window(window, alias=alias) 5772 5773 return window 5774 5775 def _parse_partition_and_order( 5776 self, 5777 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5778 return self._parse_partition_by(), self._parse_order() 5779 5780 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5781 self._match(TokenType.BETWEEN) 5782 5783 return { 5784 "value": ( 5785 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5786 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5787 or self._parse_bitwise() 5788 ), 5789 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5790 } 5791 5792 def _parse_alias( 5793 self, this: t.Optional[exp.Expression], explicit: bool = False 5794 ) -> t.Optional[exp.Expression]: 5795 any_token = self._match(TokenType.ALIAS) 5796 comments = self._prev_comments or [] 5797 5798 if explicit and not any_token: 5799 return this 5800 5801 if self._match(TokenType.L_PAREN): 5802 aliases = self.expression( 5803 exp.Aliases, 5804 comments=comments, 5805 this=this, 5806 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5807 ) 5808 self._match_r_paren(aliases) 5809 return aliases 5810 5811 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5812 self.STRING_ALIASES and self._parse_string_as_identifier() 5813 ) 5814 5815 if alias: 5816 comments.extend(alias.pop_comments()) 5817 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5818 column = this.this 5819 5820 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5821 if not this.comments and column and column.comments: 5822 this.comments = column.pop_comments() 5823 5824 return this 5825 5826 def _parse_id_var( 5827 self, 5828 any_token: bool = True, 5829 tokens: t.Optional[t.Collection[TokenType]] = None, 5830 ) -> t.Optional[exp.Expression]: 5831 expression = self._parse_identifier() 5832 if not expression and ( 5833 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5834 ): 5835 quoted = self._prev.token_type == TokenType.STRING 5836 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5837 5838 return expression 5839 5840 def _parse_string(self) -> t.Optional[exp.Expression]: 5841 if self._match_set(self.STRING_PARSERS): 5842 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5843 return self._parse_placeholder() 5844 5845 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5846 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5847 5848 def _parse_number(self) -> t.Optional[exp.Expression]: 5849 if self._match_set(self.NUMERIC_PARSERS): 5850 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5851 return self._parse_placeholder() 5852 5853 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5854 if self._match(TokenType.IDENTIFIER): 5855 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5856 return self._parse_placeholder() 5857 5858 def _parse_var( 5859 self, 5860 any_token: bool = False, 5861 tokens: t.Optional[t.Collection[TokenType]] = None, 5862 upper: bool = False, 5863 ) -> t.Optional[exp.Expression]: 5864 if ( 5865 (any_token and self._advance_any()) 5866 or self._match(TokenType.VAR) 5867 or (self._match_set(tokens) if tokens else False) 5868 ): 5869 return self.expression( 5870 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5871 ) 5872 return self._parse_placeholder() 5873 5874 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5875 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5876 self._advance() 5877 return self._prev 5878 return None 5879 5880 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5881 return self._parse_var() or self._parse_string() 5882 5883 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5884 return self._parse_primary() or self._parse_var(any_token=True) 5885 5886 def _parse_null(self) -> t.Optional[exp.Expression]: 5887 if self._match_set(self.NULL_TOKENS): 5888 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5889 return self._parse_placeholder() 5890 5891 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5892 if self._match(TokenType.TRUE): 5893 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5894 if self._match(TokenType.FALSE): 5895 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5896 return self._parse_placeholder() 5897 5898 def _parse_star(self) -> t.Optional[exp.Expression]: 5899 if self._match(TokenType.STAR): 5900 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5901 return self._parse_placeholder() 5902 5903 def _parse_parameter(self) -> exp.Parameter: 5904 this = self._parse_identifier() or self._parse_primary_or_var() 5905 return self.expression(exp.Parameter, this=this) 5906 5907 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5908 if self._match_set(self.PLACEHOLDER_PARSERS): 5909 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5910 if placeholder: 5911 return placeholder 5912 self._advance(-1) 5913 return None 5914 5915 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5916 if not self._match_texts(keywords): 5917 return None 5918 if self._match(TokenType.L_PAREN, advance=False): 5919 return self._parse_wrapped_csv(self._parse_expression) 5920 5921 expression = self._parse_expression() 5922 return [expression] if expression else None 5923 5924 def _parse_csv( 5925 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5926 ) -> t.List[exp.Expression]: 5927 parse_result = parse_method() 5928 items = [parse_result] if parse_result is not None else [] 5929 5930 while self._match(sep): 5931 self._add_comments(parse_result) 5932 parse_result = parse_method() 5933 if parse_result is not None: 5934 items.append(parse_result) 5935 5936 return items 5937 5938 def _parse_tokens( 5939 self, parse_method: t.Callable, expressions: t.Dict 5940 ) -> t.Optional[exp.Expression]: 5941 this = parse_method() 5942 5943 while self._match_set(expressions): 5944 this = self.expression( 5945 expressions[self._prev.token_type], 5946 this=this, 5947 comments=self._prev_comments, 5948 expression=parse_method(), 5949 ) 5950 5951 return this 5952 5953 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5954 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5955 5956 def _parse_wrapped_csv( 5957 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5958 ) -> t.List[exp.Expression]: 5959 return self._parse_wrapped( 5960 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5961 ) 5962 5963 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5964 wrapped = self._match(TokenType.L_PAREN) 5965 if not wrapped and not optional: 5966 self.raise_error("Expecting (") 5967 parse_result = parse_method() 5968 if wrapped: 5969 self._match_r_paren() 5970 return parse_result 5971 5972 def _parse_expressions(self) -> t.List[exp.Expression]: 5973 return self._parse_csv(self._parse_expression) 5974 5975 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5976 return self._parse_select() or self._parse_set_operations( 5977 self._parse_expression() if alias else self._parse_assignment() 5978 ) 5979 5980 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5981 return self._parse_query_modifiers( 5982 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5983 ) 5984 5985 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5986 this = None 5987 if self._match_texts(self.TRANSACTION_KIND): 5988 this = self._prev.text 5989 5990 self._match_texts(("TRANSACTION", "WORK")) 5991 5992 modes = [] 5993 while True: 5994 mode = [] 5995 while self._match(TokenType.VAR): 5996 mode.append(self._prev.text) 5997 5998 if mode: 5999 modes.append(" ".join(mode)) 6000 if not self._match(TokenType.COMMA): 6001 break 6002 6003 return self.expression(exp.Transaction, this=this, modes=modes) 6004 6005 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6006 chain = None 6007 savepoint = None 6008 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6009 6010 self._match_texts(("TRANSACTION", "WORK")) 6011 6012 if self._match_text_seq("TO"): 6013 self._match_text_seq("SAVEPOINT") 6014 savepoint = self._parse_id_var() 6015 6016 if self._match(TokenType.AND): 6017 chain = not self._match_text_seq("NO") 6018 self._match_text_seq("CHAIN") 6019 6020 if is_rollback: 6021 return self.expression(exp.Rollback, savepoint=savepoint) 6022 6023 return self.expression(exp.Commit, chain=chain) 6024 6025 def _parse_refresh(self) -> exp.Refresh: 6026 self._match(TokenType.TABLE) 6027 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6028 6029 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6030 if not self._match_text_seq("ADD"): 6031 return None 6032 6033 self._match(TokenType.COLUMN) 6034 exists_column = self._parse_exists(not_=True) 6035 expression = self._parse_field_def() 6036 6037 if expression: 6038 expression.set("exists", exists_column) 6039 6040 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6041 if self._match_texts(("FIRST", "AFTER")): 6042 position = self._prev.text 6043 column_position = self.expression( 6044 exp.ColumnPosition, this=self._parse_column(), position=position 6045 ) 6046 expression.set("position", column_position) 6047 6048 return expression 6049 6050 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6051 drop = self._match(TokenType.DROP) and self._parse_drop() 6052 if drop and not isinstance(drop, exp.Command): 6053 drop.set("kind", drop.args.get("kind", "COLUMN")) 6054 return drop 6055 6056 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6057 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6058 return self.expression( 6059 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6060 ) 6061 6062 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6063 index = self._index - 1 6064 6065 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6066 return self._parse_csv( 6067 lambda: self.expression( 6068 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6069 ) 6070 ) 6071 6072 self._retreat(index) 6073 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6074 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6075 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6076 6077 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6078 if self._match_texts(self.ALTER_ALTER_PARSERS): 6079 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6080 6081 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6082 # keyword after ALTER we default to parsing this statement 6083 self._match(TokenType.COLUMN) 6084 column = self._parse_field(any_token=True) 6085 6086 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6087 return self.expression(exp.AlterColumn, this=column, drop=True) 6088 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6089 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6090 if self._match(TokenType.COMMENT): 6091 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6092 if self._match_text_seq("DROP", "NOT", "NULL"): 6093 return self.expression( 6094 exp.AlterColumn, 6095 this=column, 6096 drop=True, 6097 allow_null=True, 6098 ) 6099 if self._match_text_seq("SET", "NOT", "NULL"): 6100 return self.expression( 6101 exp.AlterColumn, 6102 this=column, 6103 allow_null=False, 6104 ) 6105 self._match_text_seq("SET", "DATA") 6106 self._match_text_seq("TYPE") 6107 return self.expression( 6108 exp.AlterColumn, 6109 this=column, 6110 dtype=self._parse_types(), 6111 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6112 using=self._match(TokenType.USING) and self._parse_assignment(), 6113 ) 6114 6115 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6116 if self._match_texts(("ALL", "EVEN", "AUTO")): 6117 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6118 6119 self._match_text_seq("KEY", "DISTKEY") 6120 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6121 6122 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6123 if compound: 6124 self._match_text_seq("SORTKEY") 6125 6126 if self._match(TokenType.L_PAREN, advance=False): 6127 return self.expression( 6128 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6129 ) 6130 6131 self._match_texts(("AUTO", "NONE")) 6132 return self.expression( 6133 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6134 ) 6135 6136 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6137 index = self._index - 1 6138 6139 partition_exists = self._parse_exists() 6140 if self._match(TokenType.PARTITION, advance=False): 6141 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6142 6143 self._retreat(index) 6144 return self._parse_csv(self._parse_drop_column) 6145 6146 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6147 if self._match(TokenType.COLUMN): 6148 exists = self._parse_exists() 6149 old_column = self._parse_column() 6150 to = self._match_text_seq("TO") 6151 new_column = self._parse_column() 6152 6153 if old_column is None or to is None or new_column is None: 6154 return None 6155 6156 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6157 6158 self._match_text_seq("TO") 6159 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6160 6161 def _parse_alter_table_set(self) -> exp.AlterSet: 6162 alter_set = self.expression(exp.AlterSet) 6163 6164 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6165 "TABLE", "PROPERTIES" 6166 ): 6167 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6168 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6169 alter_set.set("expressions", [self._parse_assignment()]) 6170 elif self._match_texts(("LOGGED", "UNLOGGED")): 6171 alter_set.set("option", exp.var(self._prev.text.upper())) 6172 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6173 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6174 elif self._match_text_seq("LOCATION"): 6175 alter_set.set("location", self._parse_field()) 6176 elif self._match_text_seq("ACCESS", "METHOD"): 6177 alter_set.set("access_method", self._parse_field()) 6178 elif self._match_text_seq("TABLESPACE"): 6179 alter_set.set("tablespace", self._parse_field()) 6180 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6181 alter_set.set("file_format", [self._parse_field()]) 6182 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6183 alter_set.set("file_format", self._parse_wrapped_options()) 6184 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6185 alter_set.set("copy_options", self._parse_wrapped_options()) 6186 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6187 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6188 else: 6189 if self._match_text_seq("SERDE"): 6190 alter_set.set("serde", self._parse_field()) 6191 6192 alter_set.set("expressions", [self._parse_properties()]) 6193 6194 return alter_set 6195 6196 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6197 start = self._prev 6198 6199 if not self._match(TokenType.TABLE): 6200 return self._parse_as_command(start) 6201 6202 exists = self._parse_exists() 6203 only = self._match_text_seq("ONLY") 6204 this = self._parse_table(schema=True) 6205 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6206 6207 if self._next: 6208 self._advance() 6209 6210 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6211 if parser: 6212 actions = ensure_list(parser(self)) 6213 options = self._parse_csv(self._parse_property) 6214 6215 if not self._curr and actions: 6216 return self.expression( 6217 exp.AlterTable, 6218 this=this, 6219 exists=exists, 6220 actions=actions, 6221 only=only, 6222 options=options, 6223 cluster=cluster, 6224 ) 6225 6226 return self._parse_as_command(start) 6227 6228 def _parse_merge(self) -> exp.Merge: 6229 self._match(TokenType.INTO) 6230 target = self._parse_table() 6231 6232 if target and self._match(TokenType.ALIAS, advance=False): 6233 target.set("alias", self._parse_table_alias()) 6234 6235 self._match(TokenType.USING) 6236 using = self._parse_table() 6237 6238 self._match(TokenType.ON) 6239 on = self._parse_assignment() 6240 6241 return self.expression( 6242 exp.Merge, 6243 this=target, 6244 using=using, 6245 on=on, 6246 expressions=self._parse_when_matched(), 6247 ) 6248 6249 def _parse_when_matched(self) -> t.List[exp.When]: 6250 whens = [] 6251 6252 while self._match(TokenType.WHEN): 6253 matched = not self._match(TokenType.NOT) 6254 self._match_text_seq("MATCHED") 6255 source = ( 6256 False 6257 if self._match_text_seq("BY", "TARGET") 6258 else self._match_text_seq("BY", "SOURCE") 6259 ) 6260 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6261 6262 self._match(TokenType.THEN) 6263 6264 if self._match(TokenType.INSERT): 6265 _this = self._parse_star() 6266 if _this: 6267 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6268 else: 6269 then = self.expression( 6270 exp.Insert, 6271 this=self._parse_value(), 6272 expression=self._match_text_seq("VALUES") and self._parse_value(), 6273 ) 6274 elif self._match(TokenType.UPDATE): 6275 expressions = self._parse_star() 6276 if expressions: 6277 then = self.expression(exp.Update, expressions=expressions) 6278 else: 6279 then = self.expression( 6280 exp.Update, 6281 expressions=self._match(TokenType.SET) 6282 and self._parse_csv(self._parse_equality), 6283 ) 6284 elif self._match(TokenType.DELETE): 6285 then = self.expression(exp.Var, this=self._prev.text) 6286 else: 6287 then = None 6288 6289 whens.append( 6290 self.expression( 6291 exp.When, 6292 matched=matched, 6293 source=source, 6294 condition=condition, 6295 then=then, 6296 ) 6297 ) 6298 return whens 6299 6300 def _parse_show(self) -> t.Optional[exp.Expression]: 6301 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6302 if parser: 6303 return parser(self) 6304 return self._parse_as_command(self._prev) 6305 6306 def _parse_set_item_assignment( 6307 self, kind: t.Optional[str] = None 6308 ) -> t.Optional[exp.Expression]: 6309 index = self._index 6310 6311 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6312 return self._parse_set_transaction(global_=kind == "GLOBAL") 6313 6314 left = self._parse_primary() or self._parse_column() 6315 assignment_delimiter = self._match_texts(("=", "TO")) 6316 6317 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6318 self._retreat(index) 6319 return None 6320 6321 right = self._parse_statement() or self._parse_id_var() 6322 if isinstance(right, (exp.Column, exp.Identifier)): 6323 right = exp.var(right.name) 6324 6325 this = self.expression(exp.EQ, this=left, expression=right) 6326 return self.expression(exp.SetItem, this=this, kind=kind) 6327 6328 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6329 self._match_text_seq("TRANSACTION") 6330 characteristics = self._parse_csv( 6331 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6332 ) 6333 return self.expression( 6334 exp.SetItem, 6335 expressions=characteristics, 6336 kind="TRANSACTION", 6337 **{"global": global_}, # type: ignore 6338 ) 6339 6340 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6341 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6342 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6343 6344 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6345 index = self._index 6346 set_ = self.expression( 6347 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6348 ) 6349 6350 if self._curr: 6351 self._retreat(index) 6352 return self._parse_as_command(self._prev) 6353 6354 return set_ 6355 6356 def _parse_var_from_options( 6357 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6358 ) -> t.Optional[exp.Var]: 6359 start = self._curr 6360 if not start: 6361 return None 6362 6363 option = start.text.upper() 6364 continuations = options.get(option) 6365 6366 index = self._index 6367 self._advance() 6368 for keywords in continuations or []: 6369 if isinstance(keywords, str): 6370 keywords = (keywords,) 6371 6372 if self._match_text_seq(*keywords): 6373 option = f"{option} {' '.join(keywords)}" 6374 break 6375 else: 6376 if continuations or continuations is None: 6377 if raise_unmatched: 6378 self.raise_error(f"Unknown option {option}") 6379 6380 self._retreat(index) 6381 return None 6382 6383 return exp.var(option) 6384 6385 def _parse_as_command(self, start: Token) -> exp.Command: 6386 while self._curr: 6387 self._advance() 6388 text = self._find_sql(start, self._prev) 6389 size = len(start.text) 6390 self._warn_unsupported() 6391 return exp.Command(this=text[:size], expression=text[size:]) 6392 6393 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6394 settings = [] 6395 6396 self._match_l_paren() 6397 kind = self._parse_id_var() 6398 6399 if self._match(TokenType.L_PAREN): 6400 while True: 6401 key = self._parse_id_var() 6402 value = self._parse_primary() 6403 6404 if not key and value is None: 6405 break 6406 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6407 self._match(TokenType.R_PAREN) 6408 6409 self._match_r_paren() 6410 6411 return self.expression( 6412 exp.DictProperty, 6413 this=this, 6414 kind=kind.this if kind else None, 6415 settings=settings, 6416 ) 6417 6418 def _parse_dict_range(self, this: str) -> exp.DictRange: 6419 self._match_l_paren() 6420 has_min = self._match_text_seq("MIN") 6421 if has_min: 6422 min = self._parse_var() or self._parse_primary() 6423 self._match_text_seq("MAX") 6424 max = self._parse_var() or self._parse_primary() 6425 else: 6426 max = self._parse_var() or self._parse_primary() 6427 min = exp.Literal.number(0) 6428 self._match_r_paren() 6429 return self.expression(exp.DictRange, this=this, min=min, max=max) 6430 6431 def _parse_comprehension( 6432 self, this: t.Optional[exp.Expression] 6433 ) -> t.Optional[exp.Comprehension]: 6434 index = self._index 6435 expression = self._parse_column() 6436 if not self._match(TokenType.IN): 6437 self._retreat(index - 1) 6438 return None 6439 iterator = self._parse_column() 6440 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6441 return self.expression( 6442 exp.Comprehension, 6443 this=this, 6444 expression=expression, 6445 iterator=iterator, 6446 condition=condition, 6447 ) 6448 6449 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6450 if self._match(TokenType.HEREDOC_STRING): 6451 return self.expression(exp.Heredoc, this=self._prev.text) 6452 6453 if not self._match_text_seq("$"): 6454 return None 6455 6456 tags = ["$"] 6457 tag_text = None 6458 6459 if self._is_connected(): 6460 self._advance() 6461 tags.append(self._prev.text.upper()) 6462 else: 6463 self.raise_error("No closing $ found") 6464 6465 if tags[-1] != "$": 6466 if self._is_connected() and self._match_text_seq("$"): 6467 tag_text = tags[-1] 6468 tags.append("$") 6469 else: 6470 self.raise_error("No closing $ found") 6471 6472 heredoc_start = self._curr 6473 6474 while self._curr: 6475 if self._match_text_seq(*tags, advance=False): 6476 this = self._find_sql(heredoc_start, self._prev) 6477 self._advance(len(tags)) 6478 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6479 6480 self._advance() 6481 6482 self.raise_error(f"No closing {''.join(tags)} found") 6483 return None 6484 6485 def _find_parser( 6486 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6487 ) -> t.Optional[t.Callable]: 6488 if not self._curr: 6489 return None 6490 6491 index = self._index 6492 this = [] 6493 while True: 6494 # The current token might be multiple words 6495 curr = self._curr.text.upper() 6496 key = curr.split(" ") 6497 this.append(curr) 6498 6499 self._advance() 6500 result, trie = in_trie(trie, key) 6501 if result == TrieResult.FAILED: 6502 break 6503 6504 if result == TrieResult.EXISTS: 6505 subparser = parsers[" ".join(this)] 6506 return subparser 6507 6508 self._retreat(index) 6509 return None 6510 6511 def _match(self, token_type, advance=True, expression=None): 6512 if not self._curr: 6513 return None 6514 6515 if self._curr.token_type == token_type: 6516 if advance: 6517 self._advance() 6518 self._add_comments(expression) 6519 return True 6520 6521 return None 6522 6523 def _match_set(self, types, advance=True): 6524 if not self._curr: 6525 return None 6526 6527 if self._curr.token_type in types: 6528 if advance: 6529 self._advance() 6530 return True 6531 6532 return None 6533 6534 def _match_pair(self, token_type_a, token_type_b, advance=True): 6535 if not self._curr or not self._next: 6536 return None 6537 6538 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6539 if advance: 6540 self._advance(2) 6541 return True 6542 6543 return None 6544 6545 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6546 if not self._match(TokenType.L_PAREN, expression=expression): 6547 self.raise_error("Expecting (") 6548 6549 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6550 if not self._match(TokenType.R_PAREN, expression=expression): 6551 self.raise_error("Expecting )") 6552 6553 def _match_texts(self, texts, advance=True): 6554 if self._curr and self._curr.text.upper() in texts: 6555 if advance: 6556 self._advance() 6557 return True 6558 return None 6559 6560 def _match_text_seq(self, *texts, advance=True): 6561 index = self._index 6562 for text in texts: 6563 if self._curr and self._curr.text.upper() == text: 6564 self._advance() 6565 else: 6566 self._retreat(index) 6567 return None 6568 6569 if not advance: 6570 self._retreat(index) 6571 6572 return True 6573 6574 def _replace_lambda( 6575 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6576 ) -> t.Optional[exp.Expression]: 6577 if not node: 6578 return node 6579 6580 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6581 6582 for column in node.find_all(exp.Column): 6583 typ = lambda_types.get(column.parts[0].name) 6584 if typ is not None: 6585 dot_or_id = column.to_dot() if column.table else column.this 6586 6587 if typ: 6588 dot_or_id = self.expression( 6589 exp.Cast, 6590 this=dot_or_id, 6591 to=typ, 6592 ) 6593 6594 parent = column.parent 6595 6596 while isinstance(parent, exp.Dot): 6597 if not isinstance(parent.parent, exp.Dot): 6598 parent.replace(dot_or_id) 6599 break 6600 parent = parent.parent 6601 else: 6602 if column is node: 6603 node = dot_or_id 6604 else: 6605 column.replace(dot_or_id) 6606 return node 6607 6608 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6609 start = self._prev 6610 6611 # Not to be confused with TRUNCATE(number, decimals) function call 6612 if self._match(TokenType.L_PAREN): 6613 self._retreat(self._index - 2) 6614 return self._parse_function() 6615 6616 # Clickhouse supports TRUNCATE DATABASE as well 6617 is_database = self._match(TokenType.DATABASE) 6618 6619 self._match(TokenType.TABLE) 6620 6621 exists = self._parse_exists(not_=False) 6622 6623 expressions = self._parse_csv( 6624 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6625 ) 6626 6627 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6628 6629 if self._match_text_seq("RESTART", "IDENTITY"): 6630 identity = "RESTART" 6631 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6632 identity = "CONTINUE" 6633 else: 6634 identity = None 6635 6636 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6637 option = self._prev.text 6638 else: 6639 option = None 6640 6641 partition = self._parse_partition() 6642 6643 # Fallback case 6644 if self._curr: 6645 return self._parse_as_command(start) 6646 6647 return self.expression( 6648 exp.TruncateTable, 6649 expressions=expressions, 6650 is_database=is_database, 6651 exists=exists, 6652 cluster=cluster, 6653 identity=identity, 6654 option=option, 6655 partition=partition, 6656 ) 6657 6658 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6659 this = self._parse_ordered(self._parse_opclass) 6660 6661 if not self._match(TokenType.WITH): 6662 return this 6663 6664 op = self._parse_var(any_token=True) 6665 6666 return self.expression(exp.WithOperator, this=this, op=op) 6667 6668 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6669 self._match(TokenType.EQ) 6670 self._match(TokenType.L_PAREN) 6671 6672 opts: t.List[t.Optional[exp.Expression]] = [] 6673 while self._curr and not self._match(TokenType.R_PAREN): 6674 if self._match_text_seq("FORMAT_NAME", "="): 6675 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6676 # so we parse it separately to use _parse_field() 6677 prop = self.expression( 6678 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6679 ) 6680 opts.append(prop) 6681 else: 6682 opts.append(self._parse_property()) 6683 6684 self._match(TokenType.COMMA) 6685 6686 return opts 6687 6688 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6689 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6690 6691 options = [] 6692 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6693 option = self._parse_var(any_token=True) 6694 prev = self._prev.text.upper() 6695 6696 # Different dialects might separate options and values by white space, "=" and "AS" 6697 self._match(TokenType.EQ) 6698 self._match(TokenType.ALIAS) 6699 6700 param = self.expression(exp.CopyParameter, this=option) 6701 6702 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6703 TokenType.L_PAREN, advance=False 6704 ): 6705 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6706 param.set("expressions", self._parse_wrapped_options()) 6707 elif prev == "FILE_FORMAT": 6708 # T-SQL's external file format case 6709 param.set("expression", self._parse_field()) 6710 else: 6711 param.set("expression", self._parse_unquoted_field()) 6712 6713 options.append(param) 6714 self._match(sep) 6715 6716 return options 6717 6718 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6719 expr = self.expression(exp.Credentials) 6720 6721 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6722 expr.set("storage", self._parse_field()) 6723 if self._match_text_seq("CREDENTIALS"): 6724 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6725 creds = ( 6726 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6727 ) 6728 expr.set("credentials", creds) 6729 if self._match_text_seq("ENCRYPTION"): 6730 expr.set("encryption", self._parse_wrapped_options()) 6731 if self._match_text_seq("IAM_ROLE"): 6732 expr.set("iam_role", self._parse_field()) 6733 if self._match_text_seq("REGION"): 6734 expr.set("region", self._parse_field()) 6735 6736 return expr 6737 6738 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6739 return self._parse_field() 6740 6741 def _parse_copy(self) -> exp.Copy | exp.Command: 6742 start = self._prev 6743 6744 self._match(TokenType.INTO) 6745 6746 this = ( 6747 self._parse_select(nested=True, parse_subquery_alias=False) 6748 if self._match(TokenType.L_PAREN, advance=False) 6749 else self._parse_table(schema=True) 6750 ) 6751 6752 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6753 6754 files = self._parse_csv(self._parse_file_location) 6755 credentials = self._parse_credentials() 6756 6757 self._match_text_seq("WITH") 6758 6759 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6760 6761 # Fallback case 6762 if self._curr: 6763 return self._parse_as_command(start) 6764 6765 return self.expression( 6766 exp.Copy, 6767 this=this, 6768 kind=kind, 6769 credentials=credentials, 6770 files=files, 6771 params=params, 6772 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
122class Parser(metaclass=_Parser): 123 """ 124 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 125 126 Args: 127 error_level: The desired error level. 128 Default: ErrorLevel.IMMEDIATE 129 error_message_context: The amount of context to capture from a query string when displaying 130 the error message (in number of characters). 131 Default: 100 132 max_errors: Maximum number of error messages to include in a raised ParseError. 133 This is only relevant if error_level is ErrorLevel.RAISE. 134 Default: 3 135 """ 136 137 FUNCTIONS: t.Dict[str, t.Callable] = { 138 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 139 "CONCAT": lambda args, dialect: exp.Concat( 140 expressions=args, 141 safe=not dialect.STRICT_STRING_CONCAT, 142 coalesce=dialect.CONCAT_COALESCE, 143 ), 144 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 145 expressions=args, 146 safe=not dialect.STRICT_STRING_CONCAT, 147 coalesce=dialect.CONCAT_COALESCE, 148 ), 149 "DATE_TO_DATE_STR": lambda args: exp.Cast( 150 this=seq_get(args, 0), 151 to=exp.DataType(this=exp.DataType.Type.TEXT), 152 ), 153 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 154 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 155 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 156 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 157 "LIKE": build_like, 158 "LOG": build_logarithm, 159 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 160 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 161 "MOD": build_mod, 162 "TIME_TO_TIME_STR": lambda args: exp.Cast( 163 this=seq_get(args, 0), 164 to=exp.DataType(this=exp.DataType.Type.TEXT), 165 ), 166 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 167 this=exp.Cast( 168 this=seq_get(args, 0), 169 to=exp.DataType(this=exp.DataType.Type.TEXT), 170 ), 171 start=exp.Literal.number(1), 172 length=exp.Literal.number(10), 173 ), 174 "VAR_MAP": build_var_map, 175 "LOWER": build_lower, 176 "UPPER": build_upper, 177 "HEX": build_hex, 178 "TO_HEX": build_hex, 179 } 180 181 NO_PAREN_FUNCTIONS = { 182 TokenType.CURRENT_DATE: exp.CurrentDate, 183 TokenType.CURRENT_DATETIME: exp.CurrentDate, 184 TokenType.CURRENT_TIME: exp.CurrentTime, 185 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 186 TokenType.CURRENT_USER: exp.CurrentUser, 187 } 188 189 STRUCT_TYPE_TOKENS = { 190 TokenType.NESTED, 191 TokenType.OBJECT, 192 TokenType.STRUCT, 193 } 194 195 NESTED_TYPE_TOKENS = { 196 TokenType.ARRAY, 197 TokenType.LOWCARDINALITY, 198 TokenType.MAP, 199 TokenType.NULLABLE, 200 *STRUCT_TYPE_TOKENS, 201 } 202 203 ENUM_TYPE_TOKENS = { 204 TokenType.ENUM, 205 TokenType.ENUM8, 206 TokenType.ENUM16, 207 } 208 209 AGGREGATE_TYPE_TOKENS = { 210 TokenType.AGGREGATEFUNCTION, 211 TokenType.SIMPLEAGGREGATEFUNCTION, 212 } 213 214 TYPE_TOKENS = { 215 TokenType.BIT, 216 TokenType.BOOLEAN, 217 TokenType.TINYINT, 218 TokenType.UTINYINT, 219 TokenType.SMALLINT, 220 TokenType.USMALLINT, 221 TokenType.INT, 222 TokenType.UINT, 223 TokenType.BIGINT, 224 TokenType.UBIGINT, 225 TokenType.INT128, 226 TokenType.UINT128, 227 TokenType.INT256, 228 TokenType.UINT256, 229 TokenType.MEDIUMINT, 230 TokenType.UMEDIUMINT, 231 TokenType.FIXEDSTRING, 232 TokenType.FLOAT, 233 TokenType.DOUBLE, 234 TokenType.CHAR, 235 TokenType.NCHAR, 236 TokenType.VARCHAR, 237 TokenType.NVARCHAR, 238 TokenType.BPCHAR, 239 TokenType.TEXT, 240 TokenType.MEDIUMTEXT, 241 TokenType.LONGTEXT, 242 TokenType.MEDIUMBLOB, 243 TokenType.LONGBLOB, 244 TokenType.BINARY, 245 TokenType.VARBINARY, 246 TokenType.JSON, 247 TokenType.JSONB, 248 TokenType.INTERVAL, 249 TokenType.TINYBLOB, 250 TokenType.TINYTEXT, 251 TokenType.TIME, 252 TokenType.TIMETZ, 253 TokenType.TIMESTAMP, 254 TokenType.TIMESTAMP_S, 255 TokenType.TIMESTAMP_MS, 256 TokenType.TIMESTAMP_NS, 257 TokenType.TIMESTAMPTZ, 258 TokenType.TIMESTAMPLTZ, 259 TokenType.TIMESTAMPNTZ, 260 TokenType.DATETIME, 261 TokenType.DATETIME64, 262 TokenType.DATE, 263 TokenType.DATE32, 264 TokenType.INT4RANGE, 265 TokenType.INT4MULTIRANGE, 266 TokenType.INT8RANGE, 267 TokenType.INT8MULTIRANGE, 268 TokenType.NUMRANGE, 269 TokenType.NUMMULTIRANGE, 270 TokenType.TSRANGE, 271 TokenType.TSMULTIRANGE, 272 TokenType.TSTZRANGE, 273 TokenType.TSTZMULTIRANGE, 274 TokenType.DATERANGE, 275 TokenType.DATEMULTIRANGE, 276 TokenType.DECIMAL, 277 TokenType.UDECIMAL, 278 TokenType.BIGDECIMAL, 279 TokenType.UUID, 280 TokenType.GEOGRAPHY, 281 TokenType.GEOMETRY, 282 TokenType.HLLSKETCH, 283 TokenType.HSTORE, 284 TokenType.PSEUDO_TYPE, 285 TokenType.SUPER, 286 TokenType.SERIAL, 287 TokenType.SMALLSERIAL, 288 TokenType.BIGSERIAL, 289 TokenType.XML, 290 TokenType.YEAR, 291 TokenType.UNIQUEIDENTIFIER, 292 TokenType.USERDEFINED, 293 TokenType.MONEY, 294 TokenType.SMALLMONEY, 295 TokenType.ROWVERSION, 296 TokenType.IMAGE, 297 TokenType.VARIANT, 298 TokenType.OBJECT, 299 TokenType.OBJECT_IDENTIFIER, 300 TokenType.INET, 301 TokenType.IPADDRESS, 302 TokenType.IPPREFIX, 303 TokenType.IPV4, 304 TokenType.IPV6, 305 TokenType.UNKNOWN, 306 TokenType.NULL, 307 TokenType.NAME, 308 TokenType.TDIGEST, 309 *ENUM_TYPE_TOKENS, 310 *NESTED_TYPE_TOKENS, 311 *AGGREGATE_TYPE_TOKENS, 312 } 313 314 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 315 TokenType.BIGINT: TokenType.UBIGINT, 316 TokenType.INT: TokenType.UINT, 317 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 318 TokenType.SMALLINT: TokenType.USMALLINT, 319 TokenType.TINYINT: TokenType.UTINYINT, 320 TokenType.DECIMAL: TokenType.UDECIMAL, 321 } 322 323 SUBQUERY_PREDICATES = { 324 TokenType.ANY: exp.Any, 325 TokenType.ALL: exp.All, 326 TokenType.EXISTS: exp.Exists, 327 TokenType.SOME: exp.Any, 328 } 329 330 RESERVED_TOKENS = { 331 *Tokenizer.SINGLE_TOKENS.values(), 332 TokenType.SELECT, 333 } - {TokenType.IDENTIFIER} 334 335 DB_CREATABLES = { 336 TokenType.DATABASE, 337 TokenType.DICTIONARY, 338 TokenType.MODEL, 339 TokenType.SCHEMA, 340 TokenType.SEQUENCE, 341 TokenType.STORAGE_INTEGRATION, 342 TokenType.TABLE, 343 TokenType.TAG, 344 TokenType.VIEW, 345 TokenType.WAREHOUSE, 346 TokenType.STREAMLIT, 347 } 348 349 CREATABLES = { 350 TokenType.COLUMN, 351 TokenType.CONSTRAINT, 352 TokenType.FOREIGN_KEY, 353 TokenType.FUNCTION, 354 TokenType.INDEX, 355 TokenType.PROCEDURE, 356 *DB_CREATABLES, 357 } 358 359 # Tokens that can represent identifiers 360 ID_VAR_TOKENS = { 361 TokenType.VAR, 362 TokenType.ANTI, 363 TokenType.APPLY, 364 TokenType.ASC, 365 TokenType.ASOF, 366 TokenType.AUTO_INCREMENT, 367 TokenType.BEGIN, 368 TokenType.BPCHAR, 369 TokenType.CACHE, 370 TokenType.CASE, 371 TokenType.COLLATE, 372 TokenType.COMMAND, 373 TokenType.COMMENT, 374 TokenType.COMMIT, 375 TokenType.CONSTRAINT, 376 TokenType.COPY, 377 TokenType.DEFAULT, 378 TokenType.DELETE, 379 TokenType.DESC, 380 TokenType.DESCRIBE, 381 TokenType.DICTIONARY, 382 TokenType.DIV, 383 TokenType.END, 384 TokenType.EXECUTE, 385 TokenType.ESCAPE, 386 TokenType.FALSE, 387 TokenType.FIRST, 388 TokenType.FILTER, 389 TokenType.FINAL, 390 TokenType.FORMAT, 391 TokenType.FULL, 392 TokenType.IDENTIFIER, 393 TokenType.IS, 394 TokenType.ISNULL, 395 TokenType.INTERVAL, 396 TokenType.KEEP, 397 TokenType.KILL, 398 TokenType.LEFT, 399 TokenType.LOAD, 400 TokenType.MERGE, 401 TokenType.NATURAL, 402 TokenType.NEXT, 403 TokenType.OFFSET, 404 TokenType.OPERATOR, 405 TokenType.ORDINALITY, 406 TokenType.OVERLAPS, 407 TokenType.OVERWRITE, 408 TokenType.PARTITION, 409 TokenType.PERCENT, 410 TokenType.PIVOT, 411 TokenType.PRAGMA, 412 TokenType.RANGE, 413 TokenType.RECURSIVE, 414 TokenType.REFERENCES, 415 TokenType.REFRESH, 416 TokenType.REPLACE, 417 TokenType.RIGHT, 418 TokenType.ROLLUP, 419 TokenType.ROW, 420 TokenType.ROWS, 421 TokenType.SEMI, 422 TokenType.SET, 423 TokenType.SETTINGS, 424 TokenType.SHOW, 425 TokenType.TEMPORARY, 426 TokenType.TOP, 427 TokenType.TRUE, 428 TokenType.TRUNCATE, 429 TokenType.UNIQUE, 430 TokenType.UNNEST, 431 TokenType.UNPIVOT, 432 TokenType.UPDATE, 433 TokenType.USE, 434 TokenType.VOLATILE, 435 TokenType.WINDOW, 436 *CREATABLES, 437 *SUBQUERY_PREDICATES, 438 *TYPE_TOKENS, 439 *NO_PAREN_FUNCTIONS, 440 } 441 442 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 443 444 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 445 TokenType.ANTI, 446 TokenType.APPLY, 447 TokenType.ASOF, 448 TokenType.FULL, 449 TokenType.LEFT, 450 TokenType.LOCK, 451 TokenType.NATURAL, 452 TokenType.OFFSET, 453 TokenType.RIGHT, 454 TokenType.SEMI, 455 TokenType.WINDOW, 456 } 457 458 ALIAS_TOKENS = ID_VAR_TOKENS 459 460 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 461 462 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 463 464 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 465 466 FUNC_TOKENS = { 467 TokenType.COLLATE, 468 TokenType.COMMAND, 469 TokenType.CURRENT_DATE, 470 TokenType.CURRENT_DATETIME, 471 TokenType.CURRENT_TIMESTAMP, 472 TokenType.CURRENT_TIME, 473 TokenType.CURRENT_USER, 474 TokenType.FILTER, 475 TokenType.FIRST, 476 TokenType.FORMAT, 477 TokenType.GLOB, 478 TokenType.IDENTIFIER, 479 TokenType.INDEX, 480 TokenType.ISNULL, 481 TokenType.ILIKE, 482 TokenType.INSERT, 483 TokenType.LIKE, 484 TokenType.MERGE, 485 TokenType.OFFSET, 486 TokenType.PRIMARY_KEY, 487 TokenType.RANGE, 488 TokenType.REPLACE, 489 TokenType.RLIKE, 490 TokenType.ROW, 491 TokenType.UNNEST, 492 TokenType.VAR, 493 TokenType.LEFT, 494 TokenType.RIGHT, 495 TokenType.SEQUENCE, 496 TokenType.DATE, 497 TokenType.DATETIME, 498 TokenType.TABLE, 499 TokenType.TIMESTAMP, 500 TokenType.TIMESTAMPTZ, 501 TokenType.TRUNCATE, 502 TokenType.WINDOW, 503 TokenType.XOR, 504 *TYPE_TOKENS, 505 *SUBQUERY_PREDICATES, 506 } 507 508 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 509 TokenType.AND: exp.And, 510 } 511 512 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 513 TokenType.COLON_EQ: exp.PropertyEQ, 514 } 515 516 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 517 TokenType.OR: exp.Or, 518 } 519 520 EQUALITY = { 521 TokenType.EQ: exp.EQ, 522 TokenType.NEQ: exp.NEQ, 523 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 524 } 525 526 COMPARISON = { 527 TokenType.GT: exp.GT, 528 TokenType.GTE: exp.GTE, 529 TokenType.LT: exp.LT, 530 TokenType.LTE: exp.LTE, 531 } 532 533 BITWISE = { 534 TokenType.AMP: exp.BitwiseAnd, 535 TokenType.CARET: exp.BitwiseXor, 536 TokenType.PIPE: exp.BitwiseOr, 537 } 538 539 TERM = { 540 TokenType.DASH: exp.Sub, 541 TokenType.PLUS: exp.Add, 542 TokenType.MOD: exp.Mod, 543 TokenType.COLLATE: exp.Collate, 544 } 545 546 FACTOR = { 547 TokenType.DIV: exp.IntDiv, 548 TokenType.LR_ARROW: exp.Distance, 549 TokenType.SLASH: exp.Div, 550 TokenType.STAR: exp.Mul, 551 } 552 553 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 554 555 TIMES = { 556 TokenType.TIME, 557 TokenType.TIMETZ, 558 } 559 560 TIMESTAMPS = { 561 TokenType.TIMESTAMP, 562 TokenType.TIMESTAMPTZ, 563 TokenType.TIMESTAMPLTZ, 564 *TIMES, 565 } 566 567 SET_OPERATIONS = { 568 TokenType.UNION, 569 TokenType.INTERSECT, 570 TokenType.EXCEPT, 571 } 572 573 JOIN_METHODS = { 574 TokenType.ASOF, 575 TokenType.NATURAL, 576 TokenType.POSITIONAL, 577 } 578 579 JOIN_SIDES = { 580 TokenType.LEFT, 581 TokenType.RIGHT, 582 TokenType.FULL, 583 } 584 585 JOIN_KINDS = { 586 TokenType.INNER, 587 TokenType.OUTER, 588 TokenType.CROSS, 589 TokenType.SEMI, 590 TokenType.ANTI, 591 } 592 593 JOIN_HINTS: t.Set[str] = set() 594 595 LAMBDAS = { 596 TokenType.ARROW: lambda self, expressions: self.expression( 597 exp.Lambda, 598 this=self._replace_lambda( 599 self._parse_assignment(), 600 expressions, 601 ), 602 expressions=expressions, 603 ), 604 TokenType.FARROW: lambda self, expressions: self.expression( 605 exp.Kwarg, 606 this=exp.var(expressions[0].name), 607 expression=self._parse_assignment(), 608 ), 609 } 610 611 COLUMN_OPERATORS = { 612 TokenType.DOT: None, 613 TokenType.DCOLON: lambda self, this, to: self.expression( 614 exp.Cast if self.STRICT_CAST else exp.TryCast, 615 this=this, 616 to=to, 617 ), 618 TokenType.ARROW: lambda self, this, path: self.expression( 619 exp.JSONExtract, 620 this=this, 621 expression=self.dialect.to_json_path(path), 622 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 623 ), 624 TokenType.DARROW: lambda self, this, path: self.expression( 625 exp.JSONExtractScalar, 626 this=this, 627 expression=self.dialect.to_json_path(path), 628 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 629 ), 630 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 631 exp.JSONBExtract, 632 this=this, 633 expression=path, 634 ), 635 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 636 exp.JSONBExtractScalar, 637 this=this, 638 expression=path, 639 ), 640 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 641 exp.JSONBContains, 642 this=this, 643 expression=key, 644 ), 645 } 646 647 EXPRESSION_PARSERS = { 648 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 649 exp.Column: lambda self: self._parse_column(), 650 exp.Condition: lambda self: self._parse_assignment(), 651 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 652 exp.Expression: lambda self: self._parse_expression(), 653 exp.From: lambda self: self._parse_from(joins=True), 654 exp.Group: lambda self: self._parse_group(), 655 exp.Having: lambda self: self._parse_having(), 656 exp.Identifier: lambda self: self._parse_id_var(), 657 exp.Join: lambda self: self._parse_join(), 658 exp.Lambda: lambda self: self._parse_lambda(), 659 exp.Lateral: lambda self: self._parse_lateral(), 660 exp.Limit: lambda self: self._parse_limit(), 661 exp.Offset: lambda self: self._parse_offset(), 662 exp.Order: lambda self: self._parse_order(), 663 exp.Ordered: lambda self: self._parse_ordered(), 664 exp.Properties: lambda self: self._parse_properties(), 665 exp.Qualify: lambda self: self._parse_qualify(), 666 exp.Returning: lambda self: self._parse_returning(), 667 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 668 exp.Table: lambda self: self._parse_table_parts(), 669 exp.TableAlias: lambda self: self._parse_table_alias(), 670 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 671 exp.Where: lambda self: self._parse_where(), 672 exp.Window: lambda self: self._parse_named_window(), 673 exp.With: lambda self: self._parse_with(), 674 "JOIN_TYPE": lambda self: self._parse_join_parts(), 675 } 676 677 STATEMENT_PARSERS = { 678 TokenType.ALTER: lambda self: self._parse_alter(), 679 TokenType.BEGIN: lambda self: self._parse_transaction(), 680 TokenType.CACHE: lambda self: self._parse_cache(), 681 TokenType.COMMENT: lambda self: self._parse_comment(), 682 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 683 TokenType.COPY: lambda self: self._parse_copy(), 684 TokenType.CREATE: lambda self: self._parse_create(), 685 TokenType.DELETE: lambda self: self._parse_delete(), 686 TokenType.DESC: lambda self: self._parse_describe(), 687 TokenType.DESCRIBE: lambda self: self._parse_describe(), 688 TokenType.DROP: lambda self: self._parse_drop(), 689 TokenType.INSERT: lambda self: self._parse_insert(), 690 TokenType.KILL: lambda self: self._parse_kill(), 691 TokenType.LOAD: lambda self: self._parse_load(), 692 TokenType.MERGE: lambda self: self._parse_merge(), 693 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 694 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 695 TokenType.REFRESH: lambda self: self._parse_refresh(), 696 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 697 TokenType.SET: lambda self: self._parse_set(), 698 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 699 TokenType.UNCACHE: lambda self: self._parse_uncache(), 700 TokenType.UPDATE: lambda self: self._parse_update(), 701 TokenType.USE: lambda self: self.expression( 702 exp.Use, 703 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 704 this=self._parse_table(schema=False), 705 ), 706 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 707 } 708 709 UNARY_PARSERS = { 710 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 711 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 712 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 713 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 714 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 715 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 716 } 717 718 STRING_PARSERS = { 719 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 720 exp.RawString, this=token.text 721 ), 722 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 723 exp.National, this=token.text 724 ), 725 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 726 TokenType.STRING: lambda self, token: self.expression( 727 exp.Literal, this=token.text, is_string=True 728 ), 729 TokenType.UNICODE_STRING: lambda self, token: self.expression( 730 exp.UnicodeString, 731 this=token.text, 732 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 733 ), 734 } 735 736 NUMERIC_PARSERS = { 737 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 738 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 739 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 740 TokenType.NUMBER: lambda self, token: self.expression( 741 exp.Literal, this=token.text, is_string=False 742 ), 743 } 744 745 PRIMARY_PARSERS = { 746 **STRING_PARSERS, 747 **NUMERIC_PARSERS, 748 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 749 TokenType.NULL: lambda self, _: self.expression(exp.Null), 750 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 751 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 752 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 753 TokenType.STAR: lambda self, _: self.expression( 754 exp.Star, 755 **{ 756 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 757 "replace": self._parse_star_op("REPLACE"), 758 "rename": self._parse_star_op("RENAME"), 759 }, 760 ), 761 } 762 763 PLACEHOLDER_PARSERS = { 764 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 765 TokenType.PARAMETER: lambda self: self._parse_parameter(), 766 TokenType.COLON: lambda self: ( 767 self.expression(exp.Placeholder, this=self._prev.text) 768 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 769 else None 770 ), 771 } 772 773 RANGE_PARSERS = { 774 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 775 TokenType.GLOB: binary_range_parser(exp.Glob), 776 TokenType.ILIKE: binary_range_parser(exp.ILike), 777 TokenType.IN: lambda self, this: self._parse_in(this), 778 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 779 TokenType.IS: lambda self, this: self._parse_is(this), 780 TokenType.LIKE: binary_range_parser(exp.Like), 781 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 782 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 783 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 784 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 785 } 786 787 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 788 "ALLOWED_VALUES": lambda self: self.expression( 789 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 790 ), 791 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 792 "AUTO": lambda self: self._parse_auto_property(), 793 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 794 "BACKUP": lambda self: self.expression( 795 exp.BackupProperty, this=self._parse_var(any_token=True) 796 ), 797 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 798 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 799 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 800 "CHECKSUM": lambda self: self._parse_checksum(), 801 "CLUSTER BY": lambda self: self._parse_cluster(), 802 "CLUSTERED": lambda self: self._parse_clustered_by(), 803 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 804 exp.CollateProperty, **kwargs 805 ), 806 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 807 "CONTAINS": lambda self: self._parse_contains_property(), 808 "COPY": lambda self: self._parse_copy_property(), 809 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 810 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 811 "DEFINER": lambda self: self._parse_definer(), 812 "DETERMINISTIC": lambda self: self.expression( 813 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 814 ), 815 "DISTKEY": lambda self: self._parse_distkey(), 816 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 817 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 818 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 819 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 820 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 821 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 822 "FREESPACE": lambda self: self._parse_freespace(), 823 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 824 "HEAP": lambda self: self.expression(exp.HeapProperty), 825 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 826 "IMMUTABLE": lambda self: self.expression( 827 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 828 ), 829 "INHERITS": lambda self: self.expression( 830 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 831 ), 832 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 833 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 834 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 835 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 836 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 837 "LIKE": lambda self: self._parse_create_like(), 838 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 839 "LOCK": lambda self: self._parse_locking(), 840 "LOCKING": lambda self: self._parse_locking(), 841 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 842 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 843 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 844 "MODIFIES": lambda self: self._parse_modifies_property(), 845 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 846 "NO": lambda self: self._parse_no_property(), 847 "ON": lambda self: self._parse_on_property(), 848 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 849 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 850 "PARTITION": lambda self: self._parse_partitioned_of(), 851 "PARTITION BY": lambda self: self._parse_partitioned_by(), 852 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 853 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 854 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 855 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 856 "READS": lambda self: self._parse_reads_property(), 857 "REMOTE": lambda self: self._parse_remote_with_connection(), 858 "RETURNS": lambda self: self._parse_returns(), 859 "STRICT": lambda self: self.expression(exp.StrictProperty), 860 "ROW": lambda self: self._parse_row(), 861 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 862 "SAMPLE": lambda self: self.expression( 863 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 864 ), 865 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 866 "SETTINGS": lambda self: self.expression( 867 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 868 ), 869 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 870 "SORTKEY": lambda self: self._parse_sortkey(), 871 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 872 "STABLE": lambda self: self.expression( 873 exp.StabilityProperty, this=exp.Literal.string("STABLE") 874 ), 875 "STORED": lambda self: self._parse_stored(), 876 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 877 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 878 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 879 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 880 "TO": lambda self: self._parse_to_table(), 881 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 882 "TRANSFORM": lambda self: self.expression( 883 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 884 ), 885 "TTL": lambda self: self._parse_ttl(), 886 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 887 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 888 "VOLATILE": lambda self: self._parse_volatile_property(), 889 "WITH": lambda self: self._parse_with_property(), 890 } 891 892 CONSTRAINT_PARSERS = { 893 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 894 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 895 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 896 "CHARACTER SET": lambda self: self.expression( 897 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 898 ), 899 "CHECK": lambda self: self.expression( 900 exp.CheckColumnConstraint, 901 this=self._parse_wrapped(self._parse_assignment), 902 enforced=self._match_text_seq("ENFORCED"), 903 ), 904 "COLLATE": lambda self: self.expression( 905 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 906 ), 907 "COMMENT": lambda self: self.expression( 908 exp.CommentColumnConstraint, this=self._parse_string() 909 ), 910 "COMPRESS": lambda self: self._parse_compress(), 911 "CLUSTERED": lambda self: self.expression( 912 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 913 ), 914 "NONCLUSTERED": lambda self: self.expression( 915 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 916 ), 917 "DEFAULT": lambda self: self.expression( 918 exp.DefaultColumnConstraint, this=self._parse_bitwise() 919 ), 920 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 921 "EPHEMERAL": lambda self: self.expression( 922 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 923 ), 924 "EXCLUDE": lambda self: self.expression( 925 exp.ExcludeColumnConstraint, this=self._parse_index_params() 926 ), 927 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 928 "FORMAT": lambda self: self.expression( 929 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 930 ), 931 "GENERATED": lambda self: self._parse_generated_as_identity(), 932 "IDENTITY": lambda self: self._parse_auto_increment(), 933 "INLINE": lambda self: self._parse_inline(), 934 "LIKE": lambda self: self._parse_create_like(), 935 "NOT": lambda self: self._parse_not_constraint(), 936 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 937 "ON": lambda self: ( 938 self._match(TokenType.UPDATE) 939 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 940 ) 941 or self.expression(exp.OnProperty, this=self._parse_id_var()), 942 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 943 "PERIOD": lambda self: self._parse_period_for_system_time(), 944 "PRIMARY KEY": lambda self: self._parse_primary_key(), 945 "REFERENCES": lambda self: self._parse_references(match=False), 946 "TITLE": lambda self: self.expression( 947 exp.TitleColumnConstraint, this=self._parse_var_or_string() 948 ), 949 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 950 "UNIQUE": lambda self: self._parse_unique(), 951 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 952 "WITH": lambda self: self.expression( 953 exp.Properties, expressions=self._parse_wrapped_properties() 954 ), 955 } 956 957 ALTER_PARSERS = { 958 "ADD": lambda self: self._parse_alter_table_add(), 959 "ALTER": lambda self: self._parse_alter_table_alter(), 960 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 961 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 962 "DROP": lambda self: self._parse_alter_table_drop(), 963 "RENAME": lambda self: self._parse_alter_table_rename(), 964 "SET": lambda self: self._parse_alter_table_set(), 965 } 966 967 ALTER_ALTER_PARSERS = { 968 "DISTKEY": lambda self: self._parse_alter_diststyle(), 969 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 970 "SORTKEY": lambda self: self._parse_alter_sortkey(), 971 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 972 } 973 974 SCHEMA_UNNAMED_CONSTRAINTS = { 975 "CHECK", 976 "EXCLUDE", 977 "FOREIGN KEY", 978 "LIKE", 979 "PERIOD", 980 "PRIMARY KEY", 981 "UNIQUE", 982 } 983 984 NO_PAREN_FUNCTION_PARSERS = { 985 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 986 "CASE": lambda self: self._parse_case(), 987 "IF": lambda self: self._parse_if(), 988 "NEXT": lambda self: self._parse_next_value_for(), 989 } 990 991 INVALID_FUNC_NAME_TOKENS = { 992 TokenType.IDENTIFIER, 993 TokenType.STRING, 994 } 995 996 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 997 998 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 999 1000 FUNCTION_PARSERS = { 1001 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1002 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1003 "DECODE": lambda self: self._parse_decode(), 1004 "EXTRACT": lambda self: self._parse_extract(), 1005 "JSON_OBJECT": lambda self: self._parse_json_object(), 1006 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1007 "JSON_TABLE": lambda self: self._parse_json_table(), 1008 "MATCH": lambda self: self._parse_match_against(), 1009 "OPENJSON": lambda self: self._parse_open_json(), 1010 "POSITION": lambda self: self._parse_position(), 1011 "PREDICT": lambda self: self._parse_predict(), 1012 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1013 "STRING_AGG": lambda self: self._parse_string_agg(), 1014 "SUBSTRING": lambda self: self._parse_substring(), 1015 "TRIM": lambda self: self._parse_trim(), 1016 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1017 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1018 } 1019 1020 QUERY_MODIFIER_PARSERS = { 1021 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1022 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1023 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1024 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1025 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1026 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1027 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1028 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1029 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1030 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1031 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1032 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1033 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1034 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1035 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1036 TokenType.CLUSTER_BY: lambda self: ( 1037 "cluster", 1038 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1039 ), 1040 TokenType.DISTRIBUTE_BY: lambda self: ( 1041 "distribute", 1042 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1043 ), 1044 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1045 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1046 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1047 } 1048 1049 SET_PARSERS = { 1050 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1051 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1052 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1053 "TRANSACTION": lambda self: self._parse_set_transaction(), 1054 } 1055 1056 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1057 1058 TYPE_LITERAL_PARSERS = { 1059 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1060 } 1061 1062 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1063 1064 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1065 1066 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1067 1068 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1069 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1070 "ISOLATION": ( 1071 ("LEVEL", "REPEATABLE", "READ"), 1072 ("LEVEL", "READ", "COMMITTED"), 1073 ("LEVEL", "READ", "UNCOMITTED"), 1074 ("LEVEL", "SERIALIZABLE"), 1075 ), 1076 "READ": ("WRITE", "ONLY"), 1077 } 1078 1079 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1080 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1081 ) 1082 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1083 1084 CREATE_SEQUENCE: OPTIONS_TYPE = { 1085 "SCALE": ("EXTEND", "NOEXTEND"), 1086 "SHARD": ("EXTEND", "NOEXTEND"), 1087 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1088 **dict.fromkeys( 1089 ( 1090 "SESSION", 1091 "GLOBAL", 1092 "KEEP", 1093 "NOKEEP", 1094 "ORDER", 1095 "NOORDER", 1096 "NOCACHE", 1097 "CYCLE", 1098 "NOCYCLE", 1099 "NOMINVALUE", 1100 "NOMAXVALUE", 1101 "NOSCALE", 1102 "NOSHARD", 1103 ), 1104 tuple(), 1105 ), 1106 } 1107 1108 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1109 1110 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1111 1112 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1113 1114 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1115 1116 CLONE_KEYWORDS = {"CLONE", "COPY"} 1117 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1118 1119 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1120 1121 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1122 1123 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1124 1125 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1126 1127 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1128 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1129 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1130 1131 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1132 1133 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1134 1135 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1136 1137 DISTINCT_TOKENS = {TokenType.DISTINCT} 1138 1139 NULL_TOKENS = {TokenType.NULL} 1140 1141 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1142 1143 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1144 1145 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1146 1147 STRICT_CAST = True 1148 1149 PREFIXED_PIVOT_COLUMNS = False 1150 IDENTIFY_PIVOT_STRINGS = False 1151 1152 LOG_DEFAULTS_TO_LN = False 1153 1154 # Whether ADD is present for each column added by ALTER TABLE 1155 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1156 1157 # Whether the table sample clause expects CSV syntax 1158 TABLESAMPLE_CSV = False 1159 1160 # The default method used for table sampling 1161 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1162 1163 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1164 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1165 1166 # Whether the TRIM function expects the characters to trim as its first argument 1167 TRIM_PATTERN_FIRST = False 1168 1169 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1170 STRING_ALIASES = False 1171 1172 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1173 MODIFIERS_ATTACHED_TO_UNION = True 1174 UNION_MODIFIERS = {"order", "limit", "offset"} 1175 1176 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1177 NO_PAREN_IF_COMMANDS = True 1178 1179 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1180 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1181 1182 # Whether the `:` operator is used to extract a value from a JSON document 1183 COLON_IS_JSON_EXTRACT = False 1184 1185 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1186 # If this is True and '(' is not found, the keyword will be treated as an identifier 1187 VALUES_FOLLOWED_BY_PAREN = True 1188 1189 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1190 SUPPORTS_IMPLICIT_UNNEST = False 1191 1192 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1193 INTERVAL_SPANS = True 1194 1195 # Whether a PARTITION clause can follow a table reference 1196 SUPPORTS_PARTITION_SELECTION = False 1197 1198 __slots__ = ( 1199 "error_level", 1200 "error_message_context", 1201 "max_errors", 1202 "dialect", 1203 "sql", 1204 "errors", 1205 "_tokens", 1206 "_index", 1207 "_curr", 1208 "_next", 1209 "_prev", 1210 "_prev_comments", 1211 ) 1212 1213 # Autofilled 1214 SHOW_TRIE: t.Dict = {} 1215 SET_TRIE: t.Dict = {} 1216 1217 def __init__( 1218 self, 1219 error_level: t.Optional[ErrorLevel] = None, 1220 error_message_context: int = 100, 1221 max_errors: int = 3, 1222 dialect: DialectType = None, 1223 ): 1224 from sqlglot.dialects import Dialect 1225 1226 self.error_level = error_level or ErrorLevel.IMMEDIATE 1227 self.error_message_context = error_message_context 1228 self.max_errors = max_errors 1229 self.dialect = Dialect.get_or_raise(dialect) 1230 self.reset() 1231 1232 def reset(self): 1233 self.sql = "" 1234 self.errors = [] 1235 self._tokens = [] 1236 self._index = 0 1237 self._curr = None 1238 self._next = None 1239 self._prev = None 1240 self._prev_comments = None 1241 1242 def parse( 1243 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1244 ) -> t.List[t.Optional[exp.Expression]]: 1245 """ 1246 Parses a list of tokens and returns a list of syntax trees, one tree 1247 per parsed SQL statement. 1248 1249 Args: 1250 raw_tokens: The list of tokens. 1251 sql: The original SQL string, used to produce helpful debug messages. 1252 1253 Returns: 1254 The list of the produced syntax trees. 1255 """ 1256 return self._parse( 1257 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1258 ) 1259 1260 def parse_into( 1261 self, 1262 expression_types: exp.IntoType, 1263 raw_tokens: t.List[Token], 1264 sql: t.Optional[str] = None, 1265 ) -> t.List[t.Optional[exp.Expression]]: 1266 """ 1267 Parses a list of tokens into a given Expression type. If a collection of Expression 1268 types is given instead, this method will try to parse the token list into each one 1269 of them, stopping at the first for which the parsing succeeds. 1270 1271 Args: 1272 expression_types: The expression type(s) to try and parse the token list into. 1273 raw_tokens: The list of tokens. 1274 sql: The original SQL string, used to produce helpful debug messages. 1275 1276 Returns: 1277 The target Expression. 1278 """ 1279 errors = [] 1280 for expression_type in ensure_list(expression_types): 1281 parser = self.EXPRESSION_PARSERS.get(expression_type) 1282 if not parser: 1283 raise TypeError(f"No parser registered for {expression_type}") 1284 1285 try: 1286 return self._parse(parser, raw_tokens, sql) 1287 except ParseError as e: 1288 e.errors[0]["into_expression"] = expression_type 1289 errors.append(e) 1290 1291 raise ParseError( 1292 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1293 errors=merge_errors(errors), 1294 ) from errors[-1] 1295 1296 def _parse( 1297 self, 1298 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1299 raw_tokens: t.List[Token], 1300 sql: t.Optional[str] = None, 1301 ) -> t.List[t.Optional[exp.Expression]]: 1302 self.reset() 1303 self.sql = sql or "" 1304 1305 total = len(raw_tokens) 1306 chunks: t.List[t.List[Token]] = [[]] 1307 1308 for i, token in enumerate(raw_tokens): 1309 if token.token_type == TokenType.SEMICOLON: 1310 if token.comments: 1311 chunks.append([token]) 1312 1313 if i < total - 1: 1314 chunks.append([]) 1315 else: 1316 chunks[-1].append(token) 1317 1318 expressions = [] 1319 1320 for tokens in chunks: 1321 self._index = -1 1322 self._tokens = tokens 1323 self._advance() 1324 1325 expressions.append(parse_method(self)) 1326 1327 if self._index < len(self._tokens): 1328 self.raise_error("Invalid expression / Unexpected token") 1329 1330 self.check_errors() 1331 1332 return expressions 1333 1334 def check_errors(self) -> None: 1335 """Logs or raises any found errors, depending on the chosen error level setting.""" 1336 if self.error_level == ErrorLevel.WARN: 1337 for error in self.errors: 1338 logger.error(str(error)) 1339 elif self.error_level == ErrorLevel.RAISE and self.errors: 1340 raise ParseError( 1341 concat_messages(self.errors, self.max_errors), 1342 errors=merge_errors(self.errors), 1343 ) 1344 1345 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1346 """ 1347 Appends an error in the list of recorded errors or raises it, depending on the chosen 1348 error level setting. 1349 """ 1350 token = token or self._curr or self._prev or Token.string("") 1351 start = token.start 1352 end = token.end + 1 1353 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1354 highlight = self.sql[start:end] 1355 end_context = self.sql[end : end + self.error_message_context] 1356 1357 error = ParseError.new( 1358 f"{message}. Line {token.line}, Col: {token.col}.\n" 1359 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1360 description=message, 1361 line=token.line, 1362 col=token.col, 1363 start_context=start_context, 1364 highlight=highlight, 1365 end_context=end_context, 1366 ) 1367 1368 if self.error_level == ErrorLevel.IMMEDIATE: 1369 raise error 1370 1371 self.errors.append(error) 1372 1373 def expression( 1374 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1375 ) -> E: 1376 """ 1377 Creates a new, validated Expression. 1378 1379 Args: 1380 exp_class: The expression class to instantiate. 1381 comments: An optional list of comments to attach to the expression. 1382 kwargs: The arguments to set for the expression along with their respective values. 1383 1384 Returns: 1385 The target expression. 1386 """ 1387 instance = exp_class(**kwargs) 1388 instance.add_comments(comments) if comments else self._add_comments(instance) 1389 return self.validate_expression(instance) 1390 1391 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1392 if expression and self._prev_comments: 1393 expression.add_comments(self._prev_comments) 1394 self._prev_comments = None 1395 1396 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1397 """ 1398 Validates an Expression, making sure that all its mandatory arguments are set. 1399 1400 Args: 1401 expression: The expression to validate. 1402 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1403 1404 Returns: 1405 The validated expression. 1406 """ 1407 if self.error_level != ErrorLevel.IGNORE: 1408 for error_message in expression.error_messages(args): 1409 self.raise_error(error_message) 1410 1411 return expression 1412 1413 def _find_sql(self, start: Token, end: Token) -> str: 1414 return self.sql[start.start : end.end + 1] 1415 1416 def _is_connected(self) -> bool: 1417 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1418 1419 def _advance(self, times: int = 1) -> None: 1420 self._index += times 1421 self._curr = seq_get(self._tokens, self._index) 1422 self._next = seq_get(self._tokens, self._index + 1) 1423 1424 if self._index > 0: 1425 self._prev = self._tokens[self._index - 1] 1426 self._prev_comments = self._prev.comments 1427 else: 1428 self._prev = None 1429 self._prev_comments = None 1430 1431 def _retreat(self, index: int) -> None: 1432 if index != self._index: 1433 self._advance(index - self._index) 1434 1435 def _warn_unsupported(self) -> None: 1436 if len(self._tokens) <= 1: 1437 return 1438 1439 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1440 # interested in emitting a warning for the one being currently processed. 1441 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1442 1443 logger.warning( 1444 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1445 ) 1446 1447 def _parse_command(self) -> exp.Command: 1448 self._warn_unsupported() 1449 return self.expression( 1450 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1451 ) 1452 1453 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1454 """ 1455 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1456 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1457 the parser state accordingly 1458 """ 1459 index = self._index 1460 error_level = self.error_level 1461 1462 self.error_level = ErrorLevel.IMMEDIATE 1463 try: 1464 this = parse_method() 1465 except ParseError: 1466 this = None 1467 finally: 1468 if not this or retreat: 1469 self._retreat(index) 1470 self.error_level = error_level 1471 1472 return this 1473 1474 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1475 start = self._prev 1476 exists = self._parse_exists() if allow_exists else None 1477 1478 self._match(TokenType.ON) 1479 1480 materialized = self._match_text_seq("MATERIALIZED") 1481 kind = self._match_set(self.CREATABLES) and self._prev 1482 if not kind: 1483 return self._parse_as_command(start) 1484 1485 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1486 this = self._parse_user_defined_function(kind=kind.token_type) 1487 elif kind.token_type == TokenType.TABLE: 1488 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1489 elif kind.token_type == TokenType.COLUMN: 1490 this = self._parse_column() 1491 else: 1492 this = self._parse_id_var() 1493 1494 self._match(TokenType.IS) 1495 1496 return self.expression( 1497 exp.Comment, 1498 this=this, 1499 kind=kind.text, 1500 expression=self._parse_string(), 1501 exists=exists, 1502 materialized=materialized, 1503 ) 1504 1505 def _parse_to_table( 1506 self, 1507 ) -> exp.ToTableProperty: 1508 table = self._parse_table_parts(schema=True) 1509 return self.expression(exp.ToTableProperty, this=table) 1510 1511 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1512 def _parse_ttl(self) -> exp.Expression: 1513 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1514 this = self._parse_bitwise() 1515 1516 if self._match_text_seq("DELETE"): 1517 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1518 if self._match_text_seq("RECOMPRESS"): 1519 return self.expression( 1520 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1521 ) 1522 if self._match_text_seq("TO", "DISK"): 1523 return self.expression( 1524 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1525 ) 1526 if self._match_text_seq("TO", "VOLUME"): 1527 return self.expression( 1528 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1529 ) 1530 1531 return this 1532 1533 expressions = self._parse_csv(_parse_ttl_action) 1534 where = self._parse_where() 1535 group = self._parse_group() 1536 1537 aggregates = None 1538 if group and self._match(TokenType.SET): 1539 aggregates = self._parse_csv(self._parse_set_item) 1540 1541 return self.expression( 1542 exp.MergeTreeTTL, 1543 expressions=expressions, 1544 where=where, 1545 group=group, 1546 aggregates=aggregates, 1547 ) 1548 1549 def _parse_statement(self) -> t.Optional[exp.Expression]: 1550 if self._curr is None: 1551 return None 1552 1553 if self._match_set(self.STATEMENT_PARSERS): 1554 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1555 1556 if self._match_set(self.dialect.tokenizer.COMMANDS): 1557 return self._parse_command() 1558 1559 expression = self._parse_expression() 1560 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1561 return self._parse_query_modifiers(expression) 1562 1563 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1564 start = self._prev 1565 temporary = self._match(TokenType.TEMPORARY) 1566 materialized = self._match_text_seq("MATERIALIZED") 1567 1568 kind = self._match_set(self.CREATABLES) and self._prev.text 1569 if not kind: 1570 return self._parse_as_command(start) 1571 1572 if_exists = exists or self._parse_exists() 1573 table = self._parse_table_parts( 1574 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1575 ) 1576 1577 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1578 1579 if self._match(TokenType.L_PAREN, advance=False): 1580 expressions = self._parse_wrapped_csv(self._parse_types) 1581 else: 1582 expressions = None 1583 1584 return self.expression( 1585 exp.Drop, 1586 comments=start.comments, 1587 exists=if_exists, 1588 this=table, 1589 expressions=expressions, 1590 kind=kind.upper(), 1591 temporary=temporary, 1592 materialized=materialized, 1593 cascade=self._match_text_seq("CASCADE"), 1594 constraints=self._match_text_seq("CONSTRAINTS"), 1595 purge=self._match_text_seq("PURGE"), 1596 cluster=cluster, 1597 ) 1598 1599 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1600 return ( 1601 self._match_text_seq("IF") 1602 and (not not_ or self._match(TokenType.NOT)) 1603 and self._match(TokenType.EXISTS) 1604 ) 1605 1606 def _parse_create(self) -> exp.Create | exp.Command: 1607 # Note: this can't be None because we've matched a statement parser 1608 start = self._prev 1609 comments = self._prev_comments 1610 1611 replace = ( 1612 start.token_type == TokenType.REPLACE 1613 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1614 or self._match_pair(TokenType.OR, TokenType.ALTER) 1615 ) 1616 1617 unique = self._match(TokenType.UNIQUE) 1618 1619 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1620 self._advance() 1621 1622 properties = None 1623 create_token = self._match_set(self.CREATABLES) and self._prev 1624 1625 if not create_token: 1626 # exp.Properties.Location.POST_CREATE 1627 properties = self._parse_properties() 1628 create_token = self._match_set(self.CREATABLES) and self._prev 1629 1630 if not properties or not create_token: 1631 return self._parse_as_command(start) 1632 1633 exists = self._parse_exists(not_=True) 1634 this = None 1635 expression: t.Optional[exp.Expression] = None 1636 indexes = None 1637 no_schema_binding = None 1638 begin = None 1639 end = None 1640 clone = None 1641 1642 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1643 nonlocal properties 1644 if properties and temp_props: 1645 properties.expressions.extend(temp_props.expressions) 1646 elif temp_props: 1647 properties = temp_props 1648 1649 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1650 this = self._parse_user_defined_function(kind=create_token.token_type) 1651 1652 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1653 extend_props(self._parse_properties()) 1654 1655 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1656 extend_props(self._parse_properties()) 1657 1658 if not expression: 1659 if self._match(TokenType.COMMAND): 1660 expression = self._parse_as_command(self._prev) 1661 else: 1662 begin = self._match(TokenType.BEGIN) 1663 return_ = self._match_text_seq("RETURN") 1664 1665 if self._match(TokenType.STRING, advance=False): 1666 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1667 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1668 expression = self._parse_string() 1669 extend_props(self._parse_properties()) 1670 else: 1671 expression = self._parse_statement() 1672 1673 end = self._match_text_seq("END") 1674 1675 if return_: 1676 expression = self.expression(exp.Return, this=expression) 1677 elif create_token.token_type == TokenType.INDEX: 1678 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1679 if not self._match(TokenType.ON): 1680 index = self._parse_id_var() 1681 anonymous = False 1682 else: 1683 index = None 1684 anonymous = True 1685 1686 this = self._parse_index(index=index, anonymous=anonymous) 1687 elif create_token.token_type in self.DB_CREATABLES: 1688 table_parts = self._parse_table_parts( 1689 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1690 ) 1691 1692 # exp.Properties.Location.POST_NAME 1693 self._match(TokenType.COMMA) 1694 extend_props(self._parse_properties(before=True)) 1695 1696 this = self._parse_schema(this=table_parts) 1697 1698 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1699 extend_props(self._parse_properties()) 1700 1701 self._match(TokenType.ALIAS) 1702 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1703 # exp.Properties.Location.POST_ALIAS 1704 extend_props(self._parse_properties()) 1705 1706 if create_token.token_type == TokenType.SEQUENCE: 1707 expression = self._parse_types() 1708 extend_props(self._parse_properties()) 1709 else: 1710 expression = self._parse_ddl_select() 1711 1712 if create_token.token_type == TokenType.TABLE: 1713 # exp.Properties.Location.POST_EXPRESSION 1714 extend_props(self._parse_properties()) 1715 1716 indexes = [] 1717 while True: 1718 index = self._parse_index() 1719 1720 # exp.Properties.Location.POST_INDEX 1721 extend_props(self._parse_properties()) 1722 1723 if not index: 1724 break 1725 else: 1726 self._match(TokenType.COMMA) 1727 indexes.append(index) 1728 elif create_token.token_type == TokenType.VIEW: 1729 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1730 no_schema_binding = True 1731 1732 shallow = self._match_text_seq("SHALLOW") 1733 1734 if self._match_texts(self.CLONE_KEYWORDS): 1735 copy = self._prev.text.lower() == "copy" 1736 clone = self.expression( 1737 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1738 ) 1739 1740 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1741 return self._parse_as_command(start) 1742 1743 return self.expression( 1744 exp.Create, 1745 comments=comments, 1746 this=this, 1747 kind=create_token.text.upper(), 1748 replace=replace, 1749 unique=unique, 1750 expression=expression, 1751 exists=exists, 1752 properties=properties, 1753 indexes=indexes, 1754 no_schema_binding=no_schema_binding, 1755 begin=begin, 1756 end=end, 1757 clone=clone, 1758 ) 1759 1760 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1761 seq = exp.SequenceProperties() 1762 1763 options = [] 1764 index = self._index 1765 1766 while self._curr: 1767 self._match(TokenType.COMMA) 1768 if self._match_text_seq("INCREMENT"): 1769 self._match_text_seq("BY") 1770 self._match_text_seq("=") 1771 seq.set("increment", self._parse_term()) 1772 elif self._match_text_seq("MINVALUE"): 1773 seq.set("minvalue", self._parse_term()) 1774 elif self._match_text_seq("MAXVALUE"): 1775 seq.set("maxvalue", self._parse_term()) 1776 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1777 self._match_text_seq("=") 1778 seq.set("start", self._parse_term()) 1779 elif self._match_text_seq("CACHE"): 1780 # T-SQL allows empty CACHE which is initialized dynamically 1781 seq.set("cache", self._parse_number() or True) 1782 elif self._match_text_seq("OWNED", "BY"): 1783 # "OWNED BY NONE" is the default 1784 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1785 else: 1786 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1787 if opt: 1788 options.append(opt) 1789 else: 1790 break 1791 1792 seq.set("options", options if options else None) 1793 return None if self._index == index else seq 1794 1795 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1796 # only used for teradata currently 1797 self._match(TokenType.COMMA) 1798 1799 kwargs = { 1800 "no": self._match_text_seq("NO"), 1801 "dual": self._match_text_seq("DUAL"), 1802 "before": self._match_text_seq("BEFORE"), 1803 "default": self._match_text_seq("DEFAULT"), 1804 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1805 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1806 "after": self._match_text_seq("AFTER"), 1807 "minimum": self._match_texts(("MIN", "MINIMUM")), 1808 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1809 } 1810 1811 if self._match_texts(self.PROPERTY_PARSERS): 1812 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1813 try: 1814 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1815 except TypeError: 1816 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1817 1818 return None 1819 1820 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1821 return self._parse_wrapped_csv(self._parse_property) 1822 1823 def _parse_property(self) -> t.Optional[exp.Expression]: 1824 if self._match_texts(self.PROPERTY_PARSERS): 1825 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1826 1827 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1828 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1829 1830 if self._match_text_seq("COMPOUND", "SORTKEY"): 1831 return self._parse_sortkey(compound=True) 1832 1833 if self._match_text_seq("SQL", "SECURITY"): 1834 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1835 1836 index = self._index 1837 key = self._parse_column() 1838 1839 if not self._match(TokenType.EQ): 1840 self._retreat(index) 1841 return self._parse_sequence_properties() 1842 1843 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1844 if isinstance(key, exp.Column): 1845 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1846 1847 value = self._parse_bitwise() or self._parse_var(any_token=True) 1848 1849 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1850 if isinstance(value, exp.Column): 1851 value = exp.var(value.name) 1852 1853 return self.expression(exp.Property, this=key, value=value) 1854 1855 def _parse_stored(self) -> exp.FileFormatProperty: 1856 self._match(TokenType.ALIAS) 1857 1858 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1859 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1860 1861 return self.expression( 1862 exp.FileFormatProperty, 1863 this=( 1864 self.expression( 1865 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1866 ) 1867 if input_format or output_format 1868 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1869 ), 1870 ) 1871 1872 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1873 field = self._parse_field() 1874 if isinstance(field, exp.Identifier) and not field.quoted: 1875 field = exp.var(field) 1876 1877 return field 1878 1879 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1880 self._match(TokenType.EQ) 1881 self._match(TokenType.ALIAS) 1882 1883 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1884 1885 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1886 properties = [] 1887 while True: 1888 if before: 1889 prop = self._parse_property_before() 1890 else: 1891 prop = self._parse_property() 1892 if not prop: 1893 break 1894 for p in ensure_list(prop): 1895 properties.append(p) 1896 1897 if properties: 1898 return self.expression(exp.Properties, expressions=properties) 1899 1900 return None 1901 1902 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1903 return self.expression( 1904 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1905 ) 1906 1907 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1908 if self._index >= 2: 1909 pre_volatile_token = self._tokens[self._index - 2] 1910 else: 1911 pre_volatile_token = None 1912 1913 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1914 return exp.VolatileProperty() 1915 1916 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1917 1918 def _parse_retention_period(self) -> exp.Var: 1919 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1920 number = self._parse_number() 1921 number_str = f"{number} " if number else "" 1922 unit = self._parse_var(any_token=True) 1923 return exp.var(f"{number_str}{unit}") 1924 1925 def _parse_system_versioning_property( 1926 self, with_: bool = False 1927 ) -> exp.WithSystemVersioningProperty: 1928 self._match(TokenType.EQ) 1929 prop = self.expression( 1930 exp.WithSystemVersioningProperty, 1931 **{ # type: ignore 1932 "on": True, 1933 "with": with_, 1934 }, 1935 ) 1936 1937 if self._match_text_seq("OFF"): 1938 prop.set("on", False) 1939 return prop 1940 1941 self._match(TokenType.ON) 1942 if self._match(TokenType.L_PAREN): 1943 while self._curr and not self._match(TokenType.R_PAREN): 1944 if self._match_text_seq("HISTORY_TABLE", "="): 1945 prop.set("this", self._parse_table_parts()) 1946 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1947 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1948 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1949 prop.set("retention_period", self._parse_retention_period()) 1950 1951 self._match(TokenType.COMMA) 1952 1953 return prop 1954 1955 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1956 self._match(TokenType.EQ) 1957 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1958 prop = self.expression(exp.DataDeletionProperty, on=on) 1959 1960 if self._match(TokenType.L_PAREN): 1961 while self._curr and not self._match(TokenType.R_PAREN): 1962 if self._match_text_seq("FILTER_COLUMN", "="): 1963 prop.set("filter_column", self._parse_column()) 1964 elif self._match_text_seq("RETENTION_PERIOD", "="): 1965 prop.set("retention_period", self._parse_retention_period()) 1966 1967 self._match(TokenType.COMMA) 1968 1969 return prop 1970 1971 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1972 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1973 prop = self._parse_system_versioning_property(with_=True) 1974 self._match_r_paren() 1975 return prop 1976 1977 if self._match(TokenType.L_PAREN, advance=False): 1978 return self._parse_wrapped_properties() 1979 1980 if self._match_text_seq("JOURNAL"): 1981 return self._parse_withjournaltable() 1982 1983 if self._match_texts(self.VIEW_ATTRIBUTES): 1984 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1985 1986 if self._match_text_seq("DATA"): 1987 return self._parse_withdata(no=False) 1988 elif self._match_text_seq("NO", "DATA"): 1989 return self._parse_withdata(no=True) 1990 1991 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1992 return self._parse_serde_properties(with_=True) 1993 1994 if not self._next: 1995 return None 1996 1997 return self._parse_withisolatedloading() 1998 1999 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2000 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2001 self._match(TokenType.EQ) 2002 2003 user = self._parse_id_var() 2004 self._match(TokenType.PARAMETER) 2005 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2006 2007 if not user or not host: 2008 return None 2009 2010 return exp.DefinerProperty(this=f"{user}@{host}") 2011 2012 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2013 self._match(TokenType.TABLE) 2014 self._match(TokenType.EQ) 2015 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2016 2017 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2018 return self.expression(exp.LogProperty, no=no) 2019 2020 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2021 return self.expression(exp.JournalProperty, **kwargs) 2022 2023 def _parse_checksum(self) -> exp.ChecksumProperty: 2024 self._match(TokenType.EQ) 2025 2026 on = None 2027 if self._match(TokenType.ON): 2028 on = True 2029 elif self._match_text_seq("OFF"): 2030 on = False 2031 2032 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2033 2034 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2035 return self.expression( 2036 exp.Cluster, 2037 expressions=( 2038 self._parse_wrapped_csv(self._parse_ordered) 2039 if wrapped 2040 else self._parse_csv(self._parse_ordered) 2041 ), 2042 ) 2043 2044 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2045 self._match_text_seq("BY") 2046 2047 self._match_l_paren() 2048 expressions = self._parse_csv(self._parse_column) 2049 self._match_r_paren() 2050 2051 if self._match_text_seq("SORTED", "BY"): 2052 self._match_l_paren() 2053 sorted_by = self._parse_csv(self._parse_ordered) 2054 self._match_r_paren() 2055 else: 2056 sorted_by = None 2057 2058 self._match(TokenType.INTO) 2059 buckets = self._parse_number() 2060 self._match_text_seq("BUCKETS") 2061 2062 return self.expression( 2063 exp.ClusteredByProperty, 2064 expressions=expressions, 2065 sorted_by=sorted_by, 2066 buckets=buckets, 2067 ) 2068 2069 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2070 if not self._match_text_seq("GRANTS"): 2071 self._retreat(self._index - 1) 2072 return None 2073 2074 return self.expression(exp.CopyGrantsProperty) 2075 2076 def _parse_freespace(self) -> exp.FreespaceProperty: 2077 self._match(TokenType.EQ) 2078 return self.expression( 2079 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2080 ) 2081 2082 def _parse_mergeblockratio( 2083 self, no: bool = False, default: bool = False 2084 ) -> exp.MergeBlockRatioProperty: 2085 if self._match(TokenType.EQ): 2086 return self.expression( 2087 exp.MergeBlockRatioProperty, 2088 this=self._parse_number(), 2089 percent=self._match(TokenType.PERCENT), 2090 ) 2091 2092 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2093 2094 def _parse_datablocksize( 2095 self, 2096 default: t.Optional[bool] = None, 2097 minimum: t.Optional[bool] = None, 2098 maximum: t.Optional[bool] = None, 2099 ) -> exp.DataBlocksizeProperty: 2100 self._match(TokenType.EQ) 2101 size = self._parse_number() 2102 2103 units = None 2104 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2105 units = self._prev.text 2106 2107 return self.expression( 2108 exp.DataBlocksizeProperty, 2109 size=size, 2110 units=units, 2111 default=default, 2112 minimum=minimum, 2113 maximum=maximum, 2114 ) 2115 2116 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2117 self._match(TokenType.EQ) 2118 always = self._match_text_seq("ALWAYS") 2119 manual = self._match_text_seq("MANUAL") 2120 never = self._match_text_seq("NEVER") 2121 default = self._match_text_seq("DEFAULT") 2122 2123 autotemp = None 2124 if self._match_text_seq("AUTOTEMP"): 2125 autotemp = self._parse_schema() 2126 2127 return self.expression( 2128 exp.BlockCompressionProperty, 2129 always=always, 2130 manual=manual, 2131 never=never, 2132 default=default, 2133 autotemp=autotemp, 2134 ) 2135 2136 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2137 index = self._index 2138 no = self._match_text_seq("NO") 2139 concurrent = self._match_text_seq("CONCURRENT") 2140 2141 if not self._match_text_seq("ISOLATED", "LOADING"): 2142 self._retreat(index) 2143 return None 2144 2145 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2146 return self.expression( 2147 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2148 ) 2149 2150 def _parse_locking(self) -> exp.LockingProperty: 2151 if self._match(TokenType.TABLE): 2152 kind = "TABLE" 2153 elif self._match(TokenType.VIEW): 2154 kind = "VIEW" 2155 elif self._match(TokenType.ROW): 2156 kind = "ROW" 2157 elif self._match_text_seq("DATABASE"): 2158 kind = "DATABASE" 2159 else: 2160 kind = None 2161 2162 if kind in ("DATABASE", "TABLE", "VIEW"): 2163 this = self._parse_table_parts() 2164 else: 2165 this = None 2166 2167 if self._match(TokenType.FOR): 2168 for_or_in = "FOR" 2169 elif self._match(TokenType.IN): 2170 for_or_in = "IN" 2171 else: 2172 for_or_in = None 2173 2174 if self._match_text_seq("ACCESS"): 2175 lock_type = "ACCESS" 2176 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2177 lock_type = "EXCLUSIVE" 2178 elif self._match_text_seq("SHARE"): 2179 lock_type = "SHARE" 2180 elif self._match_text_seq("READ"): 2181 lock_type = "READ" 2182 elif self._match_text_seq("WRITE"): 2183 lock_type = "WRITE" 2184 elif self._match_text_seq("CHECKSUM"): 2185 lock_type = "CHECKSUM" 2186 else: 2187 lock_type = None 2188 2189 override = self._match_text_seq("OVERRIDE") 2190 2191 return self.expression( 2192 exp.LockingProperty, 2193 this=this, 2194 kind=kind, 2195 for_or_in=for_or_in, 2196 lock_type=lock_type, 2197 override=override, 2198 ) 2199 2200 def _parse_partition_by(self) -> t.List[exp.Expression]: 2201 if self._match(TokenType.PARTITION_BY): 2202 return self._parse_csv(self._parse_assignment) 2203 return [] 2204 2205 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2206 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2207 if self._match_text_seq("MINVALUE"): 2208 return exp.var("MINVALUE") 2209 if self._match_text_seq("MAXVALUE"): 2210 return exp.var("MAXVALUE") 2211 return self._parse_bitwise() 2212 2213 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2214 expression = None 2215 from_expressions = None 2216 to_expressions = None 2217 2218 if self._match(TokenType.IN): 2219 this = self._parse_wrapped_csv(self._parse_bitwise) 2220 elif self._match(TokenType.FROM): 2221 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2222 self._match_text_seq("TO") 2223 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2224 elif self._match_text_seq("WITH", "(", "MODULUS"): 2225 this = self._parse_number() 2226 self._match_text_seq(",", "REMAINDER") 2227 expression = self._parse_number() 2228 self._match_r_paren() 2229 else: 2230 self.raise_error("Failed to parse partition bound spec.") 2231 2232 return self.expression( 2233 exp.PartitionBoundSpec, 2234 this=this, 2235 expression=expression, 2236 from_expressions=from_expressions, 2237 to_expressions=to_expressions, 2238 ) 2239 2240 # https://www.postgresql.org/docs/current/sql-createtable.html 2241 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2242 if not self._match_text_seq("OF"): 2243 self._retreat(self._index - 1) 2244 return None 2245 2246 this = self._parse_table(schema=True) 2247 2248 if self._match(TokenType.DEFAULT): 2249 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2250 elif self._match_text_seq("FOR", "VALUES"): 2251 expression = self._parse_partition_bound_spec() 2252 else: 2253 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2254 2255 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2256 2257 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2258 self._match(TokenType.EQ) 2259 return self.expression( 2260 exp.PartitionedByProperty, 2261 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2262 ) 2263 2264 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2265 if self._match_text_seq("AND", "STATISTICS"): 2266 statistics = True 2267 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2268 statistics = False 2269 else: 2270 statistics = None 2271 2272 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2273 2274 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2275 if self._match_text_seq("SQL"): 2276 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2277 return None 2278 2279 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2280 if self._match_text_seq("SQL", "DATA"): 2281 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2282 return None 2283 2284 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2285 if self._match_text_seq("PRIMARY", "INDEX"): 2286 return exp.NoPrimaryIndexProperty() 2287 if self._match_text_seq("SQL"): 2288 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2289 return None 2290 2291 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2292 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2293 return exp.OnCommitProperty() 2294 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2295 return exp.OnCommitProperty(delete=True) 2296 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2297 2298 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2299 if self._match_text_seq("SQL", "DATA"): 2300 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2301 return None 2302 2303 def _parse_distkey(self) -> exp.DistKeyProperty: 2304 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2305 2306 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2307 table = self._parse_table(schema=True) 2308 2309 options = [] 2310 while self._match_texts(("INCLUDING", "EXCLUDING")): 2311 this = self._prev.text.upper() 2312 2313 id_var = self._parse_id_var() 2314 if not id_var: 2315 return None 2316 2317 options.append( 2318 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2319 ) 2320 2321 return self.expression(exp.LikeProperty, this=table, expressions=options) 2322 2323 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2324 return self.expression( 2325 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2326 ) 2327 2328 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2329 self._match(TokenType.EQ) 2330 return self.expression( 2331 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2332 ) 2333 2334 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2335 self._match_text_seq("WITH", "CONNECTION") 2336 return self.expression( 2337 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2338 ) 2339 2340 def _parse_returns(self) -> exp.ReturnsProperty: 2341 value: t.Optional[exp.Expression] 2342 null = None 2343 is_table = self._match(TokenType.TABLE) 2344 2345 if is_table: 2346 if self._match(TokenType.LT): 2347 value = self.expression( 2348 exp.Schema, 2349 this="TABLE", 2350 expressions=self._parse_csv(self._parse_struct_types), 2351 ) 2352 if not self._match(TokenType.GT): 2353 self.raise_error("Expecting >") 2354 else: 2355 value = self._parse_schema(exp.var("TABLE")) 2356 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2357 null = True 2358 value = None 2359 else: 2360 value = self._parse_types() 2361 2362 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2363 2364 def _parse_describe(self) -> exp.Describe: 2365 kind = self._match_set(self.CREATABLES) and self._prev.text 2366 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2367 if self._match(TokenType.DOT): 2368 style = None 2369 self._retreat(self._index - 2) 2370 this = self._parse_table(schema=True) 2371 properties = self._parse_properties() 2372 expressions = properties.expressions if properties else None 2373 return self.expression( 2374 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2375 ) 2376 2377 def _parse_insert(self) -> exp.Insert: 2378 comments = ensure_list(self._prev_comments) 2379 hint = self._parse_hint() 2380 overwrite = self._match(TokenType.OVERWRITE) 2381 ignore = self._match(TokenType.IGNORE) 2382 local = self._match_text_seq("LOCAL") 2383 alternative = None 2384 is_function = None 2385 2386 if self._match_text_seq("DIRECTORY"): 2387 this: t.Optional[exp.Expression] = self.expression( 2388 exp.Directory, 2389 this=self._parse_var_or_string(), 2390 local=local, 2391 row_format=self._parse_row_format(match_row=True), 2392 ) 2393 else: 2394 if self._match(TokenType.OR): 2395 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2396 2397 self._match(TokenType.INTO) 2398 comments += ensure_list(self._prev_comments) 2399 self._match(TokenType.TABLE) 2400 is_function = self._match(TokenType.FUNCTION) 2401 2402 this = ( 2403 self._parse_table(schema=True, parse_partition=True) 2404 if not is_function 2405 else self._parse_function() 2406 ) 2407 2408 returning = self._parse_returning() 2409 2410 return self.expression( 2411 exp.Insert, 2412 comments=comments, 2413 hint=hint, 2414 is_function=is_function, 2415 this=this, 2416 stored=self._match_text_seq("STORED") and self._parse_stored(), 2417 by_name=self._match_text_seq("BY", "NAME"), 2418 exists=self._parse_exists(), 2419 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2420 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2421 conflict=self._parse_on_conflict(), 2422 returning=returning or self._parse_returning(), 2423 overwrite=overwrite, 2424 alternative=alternative, 2425 ignore=ignore, 2426 ) 2427 2428 def _parse_kill(self) -> exp.Kill: 2429 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2430 2431 return self.expression( 2432 exp.Kill, 2433 this=self._parse_primary(), 2434 kind=kind, 2435 ) 2436 2437 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2438 conflict = self._match_text_seq("ON", "CONFLICT") 2439 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2440 2441 if not conflict and not duplicate: 2442 return None 2443 2444 conflict_keys = None 2445 constraint = None 2446 2447 if conflict: 2448 if self._match_text_seq("ON", "CONSTRAINT"): 2449 constraint = self._parse_id_var() 2450 elif self._match(TokenType.L_PAREN): 2451 conflict_keys = self._parse_csv(self._parse_id_var) 2452 self._match_r_paren() 2453 2454 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2455 if self._prev.token_type == TokenType.UPDATE: 2456 self._match(TokenType.SET) 2457 expressions = self._parse_csv(self._parse_equality) 2458 else: 2459 expressions = None 2460 2461 return self.expression( 2462 exp.OnConflict, 2463 duplicate=duplicate, 2464 expressions=expressions, 2465 action=action, 2466 conflict_keys=conflict_keys, 2467 constraint=constraint, 2468 ) 2469 2470 def _parse_returning(self) -> t.Optional[exp.Returning]: 2471 if not self._match(TokenType.RETURNING): 2472 return None 2473 return self.expression( 2474 exp.Returning, 2475 expressions=self._parse_csv(self._parse_expression), 2476 into=self._match(TokenType.INTO) and self._parse_table_part(), 2477 ) 2478 2479 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2480 if not self._match(TokenType.FORMAT): 2481 return None 2482 return self._parse_row_format() 2483 2484 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2485 index = self._index 2486 with_ = with_ or self._match_text_seq("WITH") 2487 2488 if not self._match(TokenType.SERDE_PROPERTIES): 2489 self._retreat(index) 2490 return None 2491 return self.expression( 2492 exp.SerdeProperties, 2493 **{ # type: ignore 2494 "expressions": self._parse_wrapped_properties(), 2495 "with": with_, 2496 }, 2497 ) 2498 2499 def _parse_row_format( 2500 self, match_row: bool = False 2501 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2502 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2503 return None 2504 2505 if self._match_text_seq("SERDE"): 2506 this = self._parse_string() 2507 2508 serde_properties = self._parse_serde_properties() 2509 2510 return self.expression( 2511 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2512 ) 2513 2514 self._match_text_seq("DELIMITED") 2515 2516 kwargs = {} 2517 2518 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2519 kwargs["fields"] = self._parse_string() 2520 if self._match_text_seq("ESCAPED", "BY"): 2521 kwargs["escaped"] = self._parse_string() 2522 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2523 kwargs["collection_items"] = self._parse_string() 2524 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2525 kwargs["map_keys"] = self._parse_string() 2526 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2527 kwargs["lines"] = self._parse_string() 2528 if self._match_text_seq("NULL", "DEFINED", "AS"): 2529 kwargs["null"] = self._parse_string() 2530 2531 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2532 2533 def _parse_load(self) -> exp.LoadData | exp.Command: 2534 if self._match_text_seq("DATA"): 2535 local = self._match_text_seq("LOCAL") 2536 self._match_text_seq("INPATH") 2537 inpath = self._parse_string() 2538 overwrite = self._match(TokenType.OVERWRITE) 2539 self._match_pair(TokenType.INTO, TokenType.TABLE) 2540 2541 return self.expression( 2542 exp.LoadData, 2543 this=self._parse_table(schema=True), 2544 local=local, 2545 overwrite=overwrite, 2546 inpath=inpath, 2547 partition=self._parse_partition(), 2548 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2549 serde=self._match_text_seq("SERDE") and self._parse_string(), 2550 ) 2551 return self._parse_as_command(self._prev) 2552 2553 def _parse_delete(self) -> exp.Delete: 2554 # This handles MySQL's "Multiple-Table Syntax" 2555 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2556 tables = None 2557 comments = self._prev_comments 2558 if not self._match(TokenType.FROM, advance=False): 2559 tables = self._parse_csv(self._parse_table) or None 2560 2561 returning = self._parse_returning() 2562 2563 return self.expression( 2564 exp.Delete, 2565 comments=comments, 2566 tables=tables, 2567 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2568 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2569 where=self._parse_where(), 2570 returning=returning or self._parse_returning(), 2571 limit=self._parse_limit(), 2572 ) 2573 2574 def _parse_update(self) -> exp.Update: 2575 comments = self._prev_comments 2576 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2577 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2578 returning = self._parse_returning() 2579 return self.expression( 2580 exp.Update, 2581 comments=comments, 2582 **{ # type: ignore 2583 "this": this, 2584 "expressions": expressions, 2585 "from": self._parse_from(joins=True), 2586 "where": self._parse_where(), 2587 "returning": returning or self._parse_returning(), 2588 "order": self._parse_order(), 2589 "limit": self._parse_limit(), 2590 }, 2591 ) 2592 2593 def _parse_uncache(self) -> exp.Uncache: 2594 if not self._match(TokenType.TABLE): 2595 self.raise_error("Expecting TABLE after UNCACHE") 2596 2597 return self.expression( 2598 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2599 ) 2600 2601 def _parse_cache(self) -> exp.Cache: 2602 lazy = self._match_text_seq("LAZY") 2603 self._match(TokenType.TABLE) 2604 table = self._parse_table(schema=True) 2605 2606 options = [] 2607 if self._match_text_seq("OPTIONS"): 2608 self._match_l_paren() 2609 k = self._parse_string() 2610 self._match(TokenType.EQ) 2611 v = self._parse_string() 2612 options = [k, v] 2613 self._match_r_paren() 2614 2615 self._match(TokenType.ALIAS) 2616 return self.expression( 2617 exp.Cache, 2618 this=table, 2619 lazy=lazy, 2620 options=options, 2621 expression=self._parse_select(nested=True), 2622 ) 2623 2624 def _parse_partition(self) -> t.Optional[exp.Partition]: 2625 if not self._match(TokenType.PARTITION): 2626 return None 2627 2628 return self.expression( 2629 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2630 ) 2631 2632 def _parse_value(self) -> t.Optional[exp.Tuple]: 2633 if self._match(TokenType.L_PAREN): 2634 expressions = self._parse_csv(self._parse_expression) 2635 self._match_r_paren() 2636 return self.expression(exp.Tuple, expressions=expressions) 2637 2638 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2639 expression = self._parse_expression() 2640 if expression: 2641 return self.expression(exp.Tuple, expressions=[expression]) 2642 return None 2643 2644 def _parse_projections(self) -> t.List[exp.Expression]: 2645 return self._parse_expressions() 2646 2647 def _parse_select( 2648 self, 2649 nested: bool = False, 2650 table: bool = False, 2651 parse_subquery_alias: bool = True, 2652 parse_set_operation: bool = True, 2653 ) -> t.Optional[exp.Expression]: 2654 cte = self._parse_with() 2655 2656 if cte: 2657 this = self._parse_statement() 2658 2659 if not this: 2660 self.raise_error("Failed to parse any statement following CTE") 2661 return cte 2662 2663 if "with" in this.arg_types: 2664 this.set("with", cte) 2665 else: 2666 self.raise_error(f"{this.key} does not support CTE") 2667 this = cte 2668 2669 return this 2670 2671 # duckdb supports leading with FROM x 2672 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2673 2674 if self._match(TokenType.SELECT): 2675 comments = self._prev_comments 2676 2677 hint = self._parse_hint() 2678 all_ = self._match(TokenType.ALL) 2679 distinct = self._match_set(self.DISTINCT_TOKENS) 2680 2681 kind = ( 2682 self._match(TokenType.ALIAS) 2683 and self._match_texts(("STRUCT", "VALUE")) 2684 and self._prev.text.upper() 2685 ) 2686 2687 if distinct: 2688 distinct = self.expression( 2689 exp.Distinct, 2690 on=self._parse_value() if self._match(TokenType.ON) else None, 2691 ) 2692 2693 if all_ and distinct: 2694 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2695 2696 limit = self._parse_limit(top=True) 2697 projections = self._parse_projections() 2698 2699 this = self.expression( 2700 exp.Select, 2701 kind=kind, 2702 hint=hint, 2703 distinct=distinct, 2704 expressions=projections, 2705 limit=limit, 2706 ) 2707 this.comments = comments 2708 2709 into = self._parse_into() 2710 if into: 2711 this.set("into", into) 2712 2713 if not from_: 2714 from_ = self._parse_from() 2715 2716 if from_: 2717 this.set("from", from_) 2718 2719 this = self._parse_query_modifiers(this) 2720 elif (table or nested) and self._match(TokenType.L_PAREN): 2721 if self._match(TokenType.PIVOT): 2722 this = self._parse_simplified_pivot() 2723 elif self._match(TokenType.FROM): 2724 this = exp.select("*").from_( 2725 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2726 ) 2727 else: 2728 this = ( 2729 self._parse_table() 2730 if table 2731 else self._parse_select(nested=True, parse_set_operation=False) 2732 ) 2733 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2734 2735 self._match_r_paren() 2736 2737 # We return early here so that the UNION isn't attached to the subquery by the 2738 # following call to _parse_set_operations, but instead becomes the parent node 2739 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2740 elif self._match(TokenType.VALUES, advance=False): 2741 this = self._parse_derived_table_values() 2742 elif from_: 2743 this = exp.select("*").from_(from_.this, copy=False) 2744 else: 2745 this = None 2746 2747 if parse_set_operation: 2748 return self._parse_set_operations(this) 2749 return this 2750 2751 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2752 if not skip_with_token and not self._match(TokenType.WITH): 2753 return None 2754 2755 comments = self._prev_comments 2756 recursive = self._match(TokenType.RECURSIVE) 2757 2758 expressions = [] 2759 while True: 2760 expressions.append(self._parse_cte()) 2761 2762 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2763 break 2764 else: 2765 self._match(TokenType.WITH) 2766 2767 return self.expression( 2768 exp.With, comments=comments, expressions=expressions, recursive=recursive 2769 ) 2770 2771 def _parse_cte(self) -> exp.CTE: 2772 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2773 if not alias or not alias.this: 2774 self.raise_error("Expected CTE to have alias") 2775 2776 self._match(TokenType.ALIAS) 2777 2778 if self._match_text_seq("NOT", "MATERIALIZED"): 2779 materialized = False 2780 elif self._match_text_seq("MATERIALIZED"): 2781 materialized = True 2782 else: 2783 materialized = None 2784 2785 return self.expression( 2786 exp.CTE, 2787 this=self._parse_wrapped(self._parse_statement), 2788 alias=alias, 2789 materialized=materialized, 2790 ) 2791 2792 def _parse_table_alias( 2793 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2794 ) -> t.Optional[exp.TableAlias]: 2795 any_token = self._match(TokenType.ALIAS) 2796 alias = ( 2797 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2798 or self._parse_string_as_identifier() 2799 ) 2800 2801 index = self._index 2802 if self._match(TokenType.L_PAREN): 2803 columns = self._parse_csv(self._parse_function_parameter) 2804 self._match_r_paren() if columns else self._retreat(index) 2805 else: 2806 columns = None 2807 2808 if not alias and not columns: 2809 return None 2810 2811 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2812 2813 # We bubble up comments from the Identifier to the TableAlias 2814 if isinstance(alias, exp.Identifier): 2815 table_alias.add_comments(alias.pop_comments()) 2816 2817 return table_alias 2818 2819 def _parse_subquery( 2820 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2821 ) -> t.Optional[exp.Subquery]: 2822 if not this: 2823 return None 2824 2825 return self.expression( 2826 exp.Subquery, 2827 this=this, 2828 pivots=self._parse_pivots(), 2829 alias=self._parse_table_alias() if parse_alias else None, 2830 ) 2831 2832 def _implicit_unnests_to_explicit(self, this: E) -> E: 2833 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2834 2835 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2836 for i, join in enumerate(this.args.get("joins") or []): 2837 table = join.this 2838 normalized_table = table.copy() 2839 normalized_table.meta["maybe_column"] = True 2840 normalized_table = _norm(normalized_table, dialect=self.dialect) 2841 2842 if isinstance(table, exp.Table) and not join.args.get("on"): 2843 if normalized_table.parts[0].name in refs: 2844 table_as_column = table.to_column() 2845 unnest = exp.Unnest(expressions=[table_as_column]) 2846 2847 # Table.to_column creates a parent Alias node that we want to convert to 2848 # a TableAlias and attach to the Unnest, so it matches the parser's output 2849 if isinstance(table.args.get("alias"), exp.TableAlias): 2850 table_as_column.replace(table_as_column.this) 2851 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2852 2853 table.replace(unnest) 2854 2855 refs.add(normalized_table.alias_or_name) 2856 2857 return this 2858 2859 def _parse_query_modifiers( 2860 self, this: t.Optional[exp.Expression] 2861 ) -> t.Optional[exp.Expression]: 2862 if isinstance(this, (exp.Query, exp.Table)): 2863 for join in self._parse_joins(): 2864 this.append("joins", join) 2865 for lateral in iter(self._parse_lateral, None): 2866 this.append("laterals", lateral) 2867 2868 while True: 2869 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2870 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2871 key, expression = parser(self) 2872 2873 if expression: 2874 this.set(key, expression) 2875 if key == "limit": 2876 offset = expression.args.pop("offset", None) 2877 2878 if offset: 2879 offset = exp.Offset(expression=offset) 2880 this.set("offset", offset) 2881 2882 limit_by_expressions = expression.expressions 2883 expression.set("expressions", None) 2884 offset.set("expressions", limit_by_expressions) 2885 continue 2886 break 2887 2888 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2889 this = self._implicit_unnests_to_explicit(this) 2890 2891 return this 2892 2893 def _parse_hint(self) -> t.Optional[exp.Hint]: 2894 if self._match(TokenType.HINT): 2895 hints = [] 2896 for hint in iter( 2897 lambda: self._parse_csv( 2898 lambda: self._parse_function() or self._parse_var(upper=True) 2899 ), 2900 [], 2901 ): 2902 hints.extend(hint) 2903 2904 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2905 self.raise_error("Expected */ after HINT") 2906 2907 return self.expression(exp.Hint, expressions=hints) 2908 2909 return None 2910 2911 def _parse_into(self) -> t.Optional[exp.Into]: 2912 if not self._match(TokenType.INTO): 2913 return None 2914 2915 temp = self._match(TokenType.TEMPORARY) 2916 unlogged = self._match_text_seq("UNLOGGED") 2917 self._match(TokenType.TABLE) 2918 2919 return self.expression( 2920 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2921 ) 2922 2923 def _parse_from( 2924 self, joins: bool = False, skip_from_token: bool = False 2925 ) -> t.Optional[exp.From]: 2926 if not skip_from_token and not self._match(TokenType.FROM): 2927 return None 2928 2929 return self.expression( 2930 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2931 ) 2932 2933 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2934 return self.expression( 2935 exp.MatchRecognizeMeasure, 2936 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2937 this=self._parse_expression(), 2938 ) 2939 2940 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2941 if not self._match(TokenType.MATCH_RECOGNIZE): 2942 return None 2943 2944 self._match_l_paren() 2945 2946 partition = self._parse_partition_by() 2947 order = self._parse_order() 2948 2949 measures = ( 2950 self._parse_csv(self._parse_match_recognize_measure) 2951 if self._match_text_seq("MEASURES") 2952 else None 2953 ) 2954 2955 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2956 rows = exp.var("ONE ROW PER MATCH") 2957 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2958 text = "ALL ROWS PER MATCH" 2959 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2960 text += " SHOW EMPTY MATCHES" 2961 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2962 text += " OMIT EMPTY MATCHES" 2963 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2964 text += " WITH UNMATCHED ROWS" 2965 rows = exp.var(text) 2966 else: 2967 rows = None 2968 2969 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2970 text = "AFTER MATCH SKIP" 2971 if self._match_text_seq("PAST", "LAST", "ROW"): 2972 text += " PAST LAST ROW" 2973 elif self._match_text_seq("TO", "NEXT", "ROW"): 2974 text += " TO NEXT ROW" 2975 elif self._match_text_seq("TO", "FIRST"): 2976 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2977 elif self._match_text_seq("TO", "LAST"): 2978 text += f" TO LAST {self._advance_any().text}" # type: ignore 2979 after = exp.var(text) 2980 else: 2981 after = None 2982 2983 if self._match_text_seq("PATTERN"): 2984 self._match_l_paren() 2985 2986 if not self._curr: 2987 self.raise_error("Expecting )", self._curr) 2988 2989 paren = 1 2990 start = self._curr 2991 2992 while self._curr and paren > 0: 2993 if self._curr.token_type == TokenType.L_PAREN: 2994 paren += 1 2995 if self._curr.token_type == TokenType.R_PAREN: 2996 paren -= 1 2997 2998 end = self._prev 2999 self._advance() 3000 3001 if paren > 0: 3002 self.raise_error("Expecting )", self._curr) 3003 3004 pattern = exp.var(self._find_sql(start, end)) 3005 else: 3006 pattern = None 3007 3008 define = ( 3009 self._parse_csv(self._parse_name_as_expression) 3010 if self._match_text_seq("DEFINE") 3011 else None 3012 ) 3013 3014 self._match_r_paren() 3015 3016 return self.expression( 3017 exp.MatchRecognize, 3018 partition_by=partition, 3019 order=order, 3020 measures=measures, 3021 rows=rows, 3022 after=after, 3023 pattern=pattern, 3024 define=define, 3025 alias=self._parse_table_alias(), 3026 ) 3027 3028 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3029 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3030 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3031 cross_apply = False 3032 3033 if cross_apply is not None: 3034 this = self._parse_select(table=True) 3035 view = None 3036 outer = None 3037 elif self._match(TokenType.LATERAL): 3038 this = self._parse_select(table=True) 3039 view = self._match(TokenType.VIEW) 3040 outer = self._match(TokenType.OUTER) 3041 else: 3042 return None 3043 3044 if not this: 3045 this = ( 3046 self._parse_unnest() 3047 or self._parse_function() 3048 or self._parse_id_var(any_token=False) 3049 ) 3050 3051 while self._match(TokenType.DOT): 3052 this = exp.Dot( 3053 this=this, 3054 expression=self._parse_function() or self._parse_id_var(any_token=False), 3055 ) 3056 3057 if view: 3058 table = self._parse_id_var(any_token=False) 3059 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3060 table_alias: t.Optional[exp.TableAlias] = self.expression( 3061 exp.TableAlias, this=table, columns=columns 3062 ) 3063 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3064 # We move the alias from the lateral's child node to the lateral itself 3065 table_alias = this.args["alias"].pop() 3066 else: 3067 table_alias = self._parse_table_alias() 3068 3069 return self.expression( 3070 exp.Lateral, 3071 this=this, 3072 view=view, 3073 outer=outer, 3074 alias=table_alias, 3075 cross_apply=cross_apply, 3076 ) 3077 3078 def _parse_join_parts( 3079 self, 3080 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3081 return ( 3082 self._match_set(self.JOIN_METHODS) and self._prev, 3083 self._match_set(self.JOIN_SIDES) and self._prev, 3084 self._match_set(self.JOIN_KINDS) and self._prev, 3085 ) 3086 3087 def _parse_join( 3088 self, skip_join_token: bool = False, parse_bracket: bool = False 3089 ) -> t.Optional[exp.Join]: 3090 if self._match(TokenType.COMMA): 3091 return self.expression(exp.Join, this=self._parse_table()) 3092 3093 index = self._index 3094 method, side, kind = self._parse_join_parts() 3095 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3096 join = self._match(TokenType.JOIN) 3097 3098 if not skip_join_token and not join: 3099 self._retreat(index) 3100 kind = None 3101 method = None 3102 side = None 3103 3104 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3105 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3106 3107 if not skip_join_token and not join and not outer_apply and not cross_apply: 3108 return None 3109 3110 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3111 3112 if method: 3113 kwargs["method"] = method.text 3114 if side: 3115 kwargs["side"] = side.text 3116 if kind: 3117 kwargs["kind"] = kind.text 3118 if hint: 3119 kwargs["hint"] = hint 3120 3121 if self._match(TokenType.MATCH_CONDITION): 3122 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3123 3124 if self._match(TokenType.ON): 3125 kwargs["on"] = self._parse_assignment() 3126 elif self._match(TokenType.USING): 3127 kwargs["using"] = self._parse_wrapped_id_vars() 3128 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3129 kind and kind.token_type == TokenType.CROSS 3130 ): 3131 index = self._index 3132 joins: t.Optional[list] = list(self._parse_joins()) 3133 3134 if joins and self._match(TokenType.ON): 3135 kwargs["on"] = self._parse_assignment() 3136 elif joins and self._match(TokenType.USING): 3137 kwargs["using"] = self._parse_wrapped_id_vars() 3138 else: 3139 joins = None 3140 self._retreat(index) 3141 3142 kwargs["this"].set("joins", joins if joins else None) 3143 3144 comments = [c for token in (method, side, kind) if token for c in token.comments] 3145 return self.expression(exp.Join, comments=comments, **kwargs) 3146 3147 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3148 this = self._parse_assignment() 3149 3150 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3151 return this 3152 3153 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3154 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3155 3156 return this 3157 3158 def _parse_index_params(self) -> exp.IndexParameters: 3159 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3160 3161 if self._match(TokenType.L_PAREN, advance=False): 3162 columns = self._parse_wrapped_csv(self._parse_with_operator) 3163 else: 3164 columns = None 3165 3166 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3167 partition_by = self._parse_partition_by() 3168 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3169 tablespace = ( 3170 self._parse_var(any_token=True) 3171 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3172 else None 3173 ) 3174 where = self._parse_where() 3175 3176 return self.expression( 3177 exp.IndexParameters, 3178 using=using, 3179 columns=columns, 3180 include=include, 3181 partition_by=partition_by, 3182 where=where, 3183 with_storage=with_storage, 3184 tablespace=tablespace, 3185 ) 3186 3187 def _parse_index( 3188 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3189 ) -> t.Optional[exp.Index]: 3190 if index or anonymous: 3191 unique = None 3192 primary = None 3193 amp = None 3194 3195 self._match(TokenType.ON) 3196 self._match(TokenType.TABLE) # hive 3197 table = self._parse_table_parts(schema=True) 3198 else: 3199 unique = self._match(TokenType.UNIQUE) 3200 primary = self._match_text_seq("PRIMARY") 3201 amp = self._match_text_seq("AMP") 3202 3203 if not self._match(TokenType.INDEX): 3204 return None 3205 3206 index = self._parse_id_var() 3207 table = None 3208 3209 params = self._parse_index_params() 3210 3211 return self.expression( 3212 exp.Index, 3213 this=index, 3214 table=table, 3215 unique=unique, 3216 primary=primary, 3217 amp=amp, 3218 params=params, 3219 ) 3220 3221 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3222 hints: t.List[exp.Expression] = [] 3223 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3224 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3225 hints.append( 3226 self.expression( 3227 exp.WithTableHint, 3228 expressions=self._parse_csv( 3229 lambda: self._parse_function() or self._parse_var(any_token=True) 3230 ), 3231 ) 3232 ) 3233 self._match_r_paren() 3234 else: 3235 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3236 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3237 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3238 3239 self._match_texts(("INDEX", "KEY")) 3240 if self._match(TokenType.FOR): 3241 hint.set("target", self._advance_any() and self._prev.text.upper()) 3242 3243 hint.set("expressions", self._parse_wrapped_id_vars()) 3244 hints.append(hint) 3245 3246 return hints or None 3247 3248 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3249 return ( 3250 (not schema and self._parse_function(optional_parens=False)) 3251 or self._parse_id_var(any_token=False) 3252 or self._parse_string_as_identifier() 3253 or self._parse_placeholder() 3254 ) 3255 3256 def _parse_table_parts( 3257 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3258 ) -> exp.Table: 3259 catalog = None 3260 db = None 3261 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3262 3263 while self._match(TokenType.DOT): 3264 if catalog: 3265 # This allows nesting the table in arbitrarily many dot expressions if needed 3266 table = self.expression( 3267 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3268 ) 3269 else: 3270 catalog = db 3271 db = table 3272 # "" used for tsql FROM a..b case 3273 table = self._parse_table_part(schema=schema) or "" 3274 3275 if ( 3276 wildcard 3277 and self._is_connected() 3278 and (isinstance(table, exp.Identifier) or not table) 3279 and self._match(TokenType.STAR) 3280 ): 3281 if isinstance(table, exp.Identifier): 3282 table.args["this"] += "*" 3283 else: 3284 table = exp.Identifier(this="*") 3285 3286 # We bubble up comments from the Identifier to the Table 3287 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3288 3289 if is_db_reference: 3290 catalog = db 3291 db = table 3292 table = None 3293 3294 if not table and not is_db_reference: 3295 self.raise_error(f"Expected table name but got {self._curr}") 3296 if not db and is_db_reference: 3297 self.raise_error(f"Expected database name but got {self._curr}") 3298 3299 return self.expression( 3300 exp.Table, 3301 comments=comments, 3302 this=table, 3303 db=db, 3304 catalog=catalog, 3305 pivots=self._parse_pivots(), 3306 ) 3307 3308 def _parse_table( 3309 self, 3310 schema: bool = False, 3311 joins: bool = False, 3312 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3313 parse_bracket: bool = False, 3314 is_db_reference: bool = False, 3315 parse_partition: bool = False, 3316 ) -> t.Optional[exp.Expression]: 3317 lateral = self._parse_lateral() 3318 if lateral: 3319 return lateral 3320 3321 unnest = self._parse_unnest() 3322 if unnest: 3323 return unnest 3324 3325 values = self._parse_derived_table_values() 3326 if values: 3327 return values 3328 3329 subquery = self._parse_select(table=True) 3330 if subquery: 3331 if not subquery.args.get("pivots"): 3332 subquery.set("pivots", self._parse_pivots()) 3333 return subquery 3334 3335 bracket = parse_bracket and self._parse_bracket(None) 3336 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3337 3338 only = self._match(TokenType.ONLY) 3339 3340 this = t.cast( 3341 exp.Expression, 3342 bracket 3343 or self._parse_bracket( 3344 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3345 ), 3346 ) 3347 3348 if only: 3349 this.set("only", only) 3350 3351 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3352 self._match_text_seq("*") 3353 3354 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3355 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3356 this.set("partition", self._parse_partition()) 3357 3358 if schema: 3359 return self._parse_schema(this=this) 3360 3361 version = self._parse_version() 3362 3363 if version: 3364 this.set("version", version) 3365 3366 if self.dialect.ALIAS_POST_TABLESAMPLE: 3367 table_sample = self._parse_table_sample() 3368 3369 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3370 if alias: 3371 this.set("alias", alias) 3372 3373 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3374 return self.expression( 3375 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3376 ) 3377 3378 this.set("hints", self._parse_table_hints()) 3379 3380 if not this.args.get("pivots"): 3381 this.set("pivots", self._parse_pivots()) 3382 3383 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3384 table_sample = self._parse_table_sample() 3385 3386 if table_sample: 3387 table_sample.set("this", this) 3388 this = table_sample 3389 3390 if joins: 3391 for join in self._parse_joins(): 3392 this.append("joins", join) 3393 3394 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3395 this.set("ordinality", True) 3396 this.set("alias", self._parse_table_alias()) 3397 3398 return this 3399 3400 def _parse_version(self) -> t.Optional[exp.Version]: 3401 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3402 this = "TIMESTAMP" 3403 elif self._match(TokenType.VERSION_SNAPSHOT): 3404 this = "VERSION" 3405 else: 3406 return None 3407 3408 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3409 kind = self._prev.text.upper() 3410 start = self._parse_bitwise() 3411 self._match_texts(("TO", "AND")) 3412 end = self._parse_bitwise() 3413 expression: t.Optional[exp.Expression] = self.expression( 3414 exp.Tuple, expressions=[start, end] 3415 ) 3416 elif self._match_text_seq("CONTAINED", "IN"): 3417 kind = "CONTAINED IN" 3418 expression = self.expression( 3419 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3420 ) 3421 elif self._match(TokenType.ALL): 3422 kind = "ALL" 3423 expression = None 3424 else: 3425 self._match_text_seq("AS", "OF") 3426 kind = "AS OF" 3427 expression = self._parse_type() 3428 3429 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3430 3431 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3432 if not self._match(TokenType.UNNEST): 3433 return None 3434 3435 expressions = self._parse_wrapped_csv(self._parse_equality) 3436 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3437 3438 alias = self._parse_table_alias() if with_alias else None 3439 3440 if alias: 3441 if self.dialect.UNNEST_COLUMN_ONLY: 3442 if alias.args.get("columns"): 3443 self.raise_error("Unexpected extra column alias in unnest.") 3444 3445 alias.set("columns", [alias.this]) 3446 alias.set("this", None) 3447 3448 columns = alias.args.get("columns") or [] 3449 if offset and len(expressions) < len(columns): 3450 offset = columns.pop() 3451 3452 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3453 self._match(TokenType.ALIAS) 3454 offset = self._parse_id_var( 3455 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3456 ) or exp.to_identifier("offset") 3457 3458 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3459 3460 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3461 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3462 if not is_derived and not self._match_text_seq("VALUES"): 3463 return None 3464 3465 expressions = self._parse_csv(self._parse_value) 3466 alias = self._parse_table_alias() 3467 3468 if is_derived: 3469 self._match_r_paren() 3470 3471 return self.expression( 3472 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3473 ) 3474 3475 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3476 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3477 as_modifier and self._match_text_seq("USING", "SAMPLE") 3478 ): 3479 return None 3480 3481 bucket_numerator = None 3482 bucket_denominator = None 3483 bucket_field = None 3484 percent = None 3485 size = None 3486 seed = None 3487 3488 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3489 matched_l_paren = self._match(TokenType.L_PAREN) 3490 3491 if self.TABLESAMPLE_CSV: 3492 num = None 3493 expressions = self._parse_csv(self._parse_primary) 3494 else: 3495 expressions = None 3496 num = ( 3497 self._parse_factor() 3498 if self._match(TokenType.NUMBER, advance=False) 3499 else self._parse_primary() or self._parse_placeholder() 3500 ) 3501 3502 if self._match_text_seq("BUCKET"): 3503 bucket_numerator = self._parse_number() 3504 self._match_text_seq("OUT", "OF") 3505 bucket_denominator = bucket_denominator = self._parse_number() 3506 self._match(TokenType.ON) 3507 bucket_field = self._parse_field() 3508 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3509 percent = num 3510 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3511 size = num 3512 else: 3513 percent = num 3514 3515 if matched_l_paren: 3516 self._match_r_paren() 3517 3518 if self._match(TokenType.L_PAREN): 3519 method = self._parse_var(upper=True) 3520 seed = self._match(TokenType.COMMA) and self._parse_number() 3521 self._match_r_paren() 3522 elif self._match_texts(("SEED", "REPEATABLE")): 3523 seed = self._parse_wrapped(self._parse_number) 3524 3525 if not method and self.DEFAULT_SAMPLING_METHOD: 3526 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3527 3528 return self.expression( 3529 exp.TableSample, 3530 expressions=expressions, 3531 method=method, 3532 bucket_numerator=bucket_numerator, 3533 bucket_denominator=bucket_denominator, 3534 bucket_field=bucket_field, 3535 percent=percent, 3536 size=size, 3537 seed=seed, 3538 ) 3539 3540 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3541 return list(iter(self._parse_pivot, None)) or None 3542 3543 def _parse_joins(self) -> t.Iterator[exp.Join]: 3544 return iter(self._parse_join, None) 3545 3546 # https://duckdb.org/docs/sql/statements/pivot 3547 def _parse_simplified_pivot(self) -> exp.Pivot: 3548 def _parse_on() -> t.Optional[exp.Expression]: 3549 this = self._parse_bitwise() 3550 return self._parse_in(this) if self._match(TokenType.IN) else this 3551 3552 this = self._parse_table() 3553 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3554 using = self._match(TokenType.USING) and self._parse_csv( 3555 lambda: self._parse_alias(self._parse_function()) 3556 ) 3557 group = self._parse_group() 3558 return self.expression( 3559 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3560 ) 3561 3562 def _parse_pivot_in(self) -> exp.In: 3563 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3564 this = self._parse_assignment() 3565 3566 self._match(TokenType.ALIAS) 3567 alias = self._parse_field() 3568 if alias: 3569 return self.expression(exp.PivotAlias, this=this, alias=alias) 3570 3571 return this 3572 3573 value = self._parse_column() 3574 3575 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3576 self.raise_error("Expecting IN (") 3577 3578 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3579 3580 self._match_r_paren() 3581 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3582 3583 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3584 index = self._index 3585 include_nulls = None 3586 3587 if self._match(TokenType.PIVOT): 3588 unpivot = False 3589 elif self._match(TokenType.UNPIVOT): 3590 unpivot = True 3591 3592 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3593 if self._match_text_seq("INCLUDE", "NULLS"): 3594 include_nulls = True 3595 elif self._match_text_seq("EXCLUDE", "NULLS"): 3596 include_nulls = False 3597 else: 3598 return None 3599 3600 expressions = [] 3601 3602 if not self._match(TokenType.L_PAREN): 3603 self._retreat(index) 3604 return None 3605 3606 if unpivot: 3607 expressions = self._parse_csv(self._parse_column) 3608 else: 3609 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3610 3611 if not expressions: 3612 self.raise_error("Failed to parse PIVOT's aggregation list") 3613 3614 if not self._match(TokenType.FOR): 3615 self.raise_error("Expecting FOR") 3616 3617 field = self._parse_pivot_in() 3618 3619 self._match_r_paren() 3620 3621 pivot = self.expression( 3622 exp.Pivot, 3623 expressions=expressions, 3624 field=field, 3625 unpivot=unpivot, 3626 include_nulls=include_nulls, 3627 ) 3628 3629 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3630 pivot.set("alias", self._parse_table_alias()) 3631 3632 if not unpivot: 3633 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3634 3635 columns: t.List[exp.Expression] = [] 3636 for fld in pivot.args["field"].expressions: 3637 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3638 for name in names: 3639 if self.PREFIXED_PIVOT_COLUMNS: 3640 name = f"{name}_{field_name}" if name else field_name 3641 else: 3642 name = f"{field_name}_{name}" if name else field_name 3643 3644 columns.append(exp.to_identifier(name)) 3645 3646 pivot.set("columns", columns) 3647 3648 return pivot 3649 3650 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3651 return [agg.alias for agg in aggregations] 3652 3653 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3654 if not skip_where_token and not self._match(TokenType.PREWHERE): 3655 return None 3656 3657 return self.expression( 3658 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3659 ) 3660 3661 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3662 if not skip_where_token and not self._match(TokenType.WHERE): 3663 return None 3664 3665 return self.expression( 3666 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3667 ) 3668 3669 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3670 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3671 return None 3672 3673 elements: t.Dict[str, t.Any] = defaultdict(list) 3674 3675 if self._match(TokenType.ALL): 3676 elements["all"] = True 3677 elif self._match(TokenType.DISTINCT): 3678 elements["all"] = False 3679 3680 while True: 3681 expressions = self._parse_csv( 3682 lambda: None 3683 if self._match(TokenType.ROLLUP, advance=False) 3684 else self._parse_assignment() 3685 ) 3686 if expressions: 3687 elements["expressions"].extend(expressions) 3688 3689 grouping_sets = self._parse_grouping_sets() 3690 if grouping_sets: 3691 elements["grouping_sets"].extend(grouping_sets) 3692 3693 rollup = None 3694 cube = None 3695 totals = None 3696 3697 index = self._index 3698 with_ = self._match(TokenType.WITH) 3699 if self._match(TokenType.ROLLUP): 3700 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3701 elements["rollup"].extend(ensure_list(rollup)) 3702 3703 if self._match(TokenType.CUBE): 3704 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3705 elements["cube"].extend(ensure_list(cube)) 3706 3707 if self._match_text_seq("TOTALS"): 3708 totals = True 3709 elements["totals"] = True # type: ignore 3710 3711 if not (grouping_sets or rollup or cube or totals): 3712 if with_: 3713 self._retreat(index) 3714 break 3715 3716 return self.expression(exp.Group, **elements) # type: ignore 3717 3718 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3719 if not self._match(TokenType.GROUPING_SETS): 3720 return None 3721 3722 return self._parse_wrapped_csv(self._parse_grouping_set) 3723 3724 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3725 if self._match(TokenType.L_PAREN): 3726 grouping_set = self._parse_csv(self._parse_column) 3727 self._match_r_paren() 3728 return self.expression(exp.Tuple, expressions=grouping_set) 3729 3730 return self._parse_column() 3731 3732 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3733 if not skip_having_token and not self._match(TokenType.HAVING): 3734 return None 3735 return self.expression(exp.Having, this=self._parse_assignment()) 3736 3737 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3738 if not self._match(TokenType.QUALIFY): 3739 return None 3740 return self.expression(exp.Qualify, this=self._parse_assignment()) 3741 3742 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3743 if skip_start_token: 3744 start = None 3745 elif self._match(TokenType.START_WITH): 3746 start = self._parse_assignment() 3747 else: 3748 return None 3749 3750 self._match(TokenType.CONNECT_BY) 3751 nocycle = self._match_text_seq("NOCYCLE") 3752 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3753 exp.Prior, this=self._parse_bitwise() 3754 ) 3755 connect = self._parse_assignment() 3756 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3757 3758 if not start and self._match(TokenType.START_WITH): 3759 start = self._parse_assignment() 3760 3761 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3762 3763 def _parse_name_as_expression(self) -> exp.Alias: 3764 return self.expression( 3765 exp.Alias, 3766 alias=self._parse_id_var(any_token=True), 3767 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3768 ) 3769 3770 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3771 if self._match_text_seq("INTERPOLATE"): 3772 return self._parse_wrapped_csv(self._parse_name_as_expression) 3773 return None 3774 3775 def _parse_order( 3776 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3777 ) -> t.Optional[exp.Expression]: 3778 siblings = None 3779 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3780 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3781 return this 3782 3783 siblings = True 3784 3785 return self.expression( 3786 exp.Order, 3787 this=this, 3788 expressions=self._parse_csv(self._parse_ordered), 3789 interpolate=self._parse_interpolate(), 3790 siblings=siblings, 3791 ) 3792 3793 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3794 if not self._match(token): 3795 return None 3796 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3797 3798 def _parse_ordered( 3799 self, parse_method: t.Optional[t.Callable] = None 3800 ) -> t.Optional[exp.Ordered]: 3801 this = parse_method() if parse_method else self._parse_assignment() 3802 if not this: 3803 return None 3804 3805 asc = self._match(TokenType.ASC) 3806 desc = self._match(TokenType.DESC) or (asc and False) 3807 3808 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3809 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3810 3811 nulls_first = is_nulls_first or False 3812 explicitly_null_ordered = is_nulls_first or is_nulls_last 3813 3814 if ( 3815 not explicitly_null_ordered 3816 and ( 3817 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3818 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3819 ) 3820 and self.dialect.NULL_ORDERING != "nulls_are_last" 3821 ): 3822 nulls_first = True 3823 3824 if self._match_text_seq("WITH", "FILL"): 3825 with_fill = self.expression( 3826 exp.WithFill, 3827 **{ # type: ignore 3828 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3829 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3830 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3831 }, 3832 ) 3833 else: 3834 with_fill = None 3835 3836 return self.expression( 3837 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3838 ) 3839 3840 def _parse_limit( 3841 self, 3842 this: t.Optional[exp.Expression] = None, 3843 top: bool = False, 3844 skip_limit_token: bool = False, 3845 ) -> t.Optional[exp.Expression]: 3846 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3847 comments = self._prev_comments 3848 if top: 3849 limit_paren = self._match(TokenType.L_PAREN) 3850 expression = self._parse_term() if limit_paren else self._parse_number() 3851 3852 if limit_paren: 3853 self._match_r_paren() 3854 else: 3855 expression = self._parse_term() 3856 3857 if self._match(TokenType.COMMA): 3858 offset = expression 3859 expression = self._parse_term() 3860 else: 3861 offset = None 3862 3863 limit_exp = self.expression( 3864 exp.Limit, 3865 this=this, 3866 expression=expression, 3867 offset=offset, 3868 comments=comments, 3869 expressions=self._parse_limit_by(), 3870 ) 3871 3872 return limit_exp 3873 3874 if self._match(TokenType.FETCH): 3875 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3876 direction = self._prev.text.upper() if direction else "FIRST" 3877 3878 count = self._parse_field(tokens=self.FETCH_TOKENS) 3879 percent = self._match(TokenType.PERCENT) 3880 3881 self._match_set((TokenType.ROW, TokenType.ROWS)) 3882 3883 only = self._match_text_seq("ONLY") 3884 with_ties = self._match_text_seq("WITH", "TIES") 3885 3886 if only and with_ties: 3887 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3888 3889 return self.expression( 3890 exp.Fetch, 3891 direction=direction, 3892 count=count, 3893 percent=percent, 3894 with_ties=with_ties, 3895 ) 3896 3897 return this 3898 3899 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3900 if not self._match(TokenType.OFFSET): 3901 return this 3902 3903 count = self._parse_term() 3904 self._match_set((TokenType.ROW, TokenType.ROWS)) 3905 3906 return self.expression( 3907 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3908 ) 3909 3910 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3911 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3912 3913 def _parse_locks(self) -> t.List[exp.Lock]: 3914 locks = [] 3915 while True: 3916 if self._match_text_seq("FOR", "UPDATE"): 3917 update = True 3918 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3919 "LOCK", "IN", "SHARE", "MODE" 3920 ): 3921 update = False 3922 else: 3923 break 3924 3925 expressions = None 3926 if self._match_text_seq("OF"): 3927 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3928 3929 wait: t.Optional[bool | exp.Expression] = None 3930 if self._match_text_seq("NOWAIT"): 3931 wait = True 3932 elif self._match_text_seq("WAIT"): 3933 wait = self._parse_primary() 3934 elif self._match_text_seq("SKIP", "LOCKED"): 3935 wait = False 3936 3937 locks.append( 3938 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3939 ) 3940 3941 return locks 3942 3943 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3944 while this and self._match_set(self.SET_OPERATIONS): 3945 token_type = self._prev.token_type 3946 3947 if token_type == TokenType.UNION: 3948 operation = exp.Union 3949 elif token_type == TokenType.EXCEPT: 3950 operation = exp.Except 3951 else: 3952 operation = exp.Intersect 3953 3954 comments = self._prev.comments 3955 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3956 by_name = self._match_text_seq("BY", "NAME") 3957 expression = self._parse_select(nested=True, parse_set_operation=False) 3958 3959 this = self.expression( 3960 operation, 3961 comments=comments, 3962 this=this, 3963 distinct=distinct, 3964 by_name=by_name, 3965 expression=expression, 3966 ) 3967 3968 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3969 expression = this.expression 3970 3971 if expression: 3972 for arg in self.UNION_MODIFIERS: 3973 expr = expression.args.get(arg) 3974 if expr: 3975 this.set(arg, expr.pop()) 3976 3977 return this 3978 3979 def _parse_expression(self) -> t.Optional[exp.Expression]: 3980 return self._parse_alias(self._parse_assignment()) 3981 3982 def _parse_assignment(self) -> t.Optional[exp.Expression]: 3983 this = self._parse_disjunction() 3984 3985 while self._match_set(self.ASSIGNMENT): 3986 this = self.expression( 3987 self.ASSIGNMENT[self._prev.token_type], 3988 this=this, 3989 comments=self._prev_comments, 3990 expression=self._parse_assignment(), 3991 ) 3992 3993 return this 3994 3995 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 3996 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 3997 3998 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3999 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4000 4001 def _parse_equality(self) -> t.Optional[exp.Expression]: 4002 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4003 4004 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4005 return self._parse_tokens(self._parse_range, self.COMPARISON) 4006 4007 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4008 this = this or self._parse_bitwise() 4009 negate = self._match(TokenType.NOT) 4010 4011 if self._match_set(self.RANGE_PARSERS): 4012 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4013 if not expression: 4014 return this 4015 4016 this = expression 4017 elif self._match(TokenType.ISNULL): 4018 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4019 4020 # Postgres supports ISNULL and NOTNULL for conditions. 4021 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4022 if self._match(TokenType.NOTNULL): 4023 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4024 this = self.expression(exp.Not, this=this) 4025 4026 if negate: 4027 this = self.expression(exp.Not, this=this) 4028 4029 if self._match(TokenType.IS): 4030 this = self._parse_is(this) 4031 4032 return this 4033 4034 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4035 index = self._index - 1 4036 negate = self._match(TokenType.NOT) 4037 4038 if self._match_text_seq("DISTINCT", "FROM"): 4039 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4040 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4041 4042 expression = self._parse_null() or self._parse_boolean() 4043 if not expression: 4044 self._retreat(index) 4045 return None 4046 4047 this = self.expression(exp.Is, this=this, expression=expression) 4048 return self.expression(exp.Not, this=this) if negate else this 4049 4050 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4051 unnest = self._parse_unnest(with_alias=False) 4052 if unnest: 4053 this = self.expression(exp.In, this=this, unnest=unnest) 4054 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4055 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4056 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4057 4058 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4059 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4060 else: 4061 this = self.expression(exp.In, this=this, expressions=expressions) 4062 4063 if matched_l_paren: 4064 self._match_r_paren(this) 4065 elif not self._match(TokenType.R_BRACKET, expression=this): 4066 self.raise_error("Expecting ]") 4067 else: 4068 this = self.expression(exp.In, this=this, field=self._parse_field()) 4069 4070 return this 4071 4072 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4073 low = self._parse_bitwise() 4074 self._match(TokenType.AND) 4075 high = self._parse_bitwise() 4076 return self.expression(exp.Between, this=this, low=low, high=high) 4077 4078 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4079 if not self._match(TokenType.ESCAPE): 4080 return this 4081 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4082 4083 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4084 index = self._index 4085 4086 if not self._match(TokenType.INTERVAL) and match_interval: 4087 return None 4088 4089 if self._match(TokenType.STRING, advance=False): 4090 this = self._parse_primary() 4091 else: 4092 this = self._parse_term() 4093 4094 if not this or ( 4095 isinstance(this, exp.Column) 4096 and not this.table 4097 and not this.this.quoted 4098 and this.name.upper() == "IS" 4099 ): 4100 self._retreat(index) 4101 return None 4102 4103 unit = self._parse_function() or ( 4104 not self._match(TokenType.ALIAS, advance=False) 4105 and self._parse_var(any_token=True, upper=True) 4106 ) 4107 4108 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4109 # each INTERVAL expression into this canonical form so it's easy to transpile 4110 if this and this.is_number: 4111 this = exp.Literal.string(this.name) 4112 elif this and this.is_string: 4113 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4114 if len(parts) == 1: 4115 if unit: 4116 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4117 self._retreat(self._index - 1) 4118 4119 this = exp.Literal.string(parts[0][0]) 4120 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4121 4122 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4123 unit = self.expression( 4124 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4125 ) 4126 4127 interval = self.expression(exp.Interval, this=this, unit=unit) 4128 4129 index = self._index 4130 self._match(TokenType.PLUS) 4131 4132 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4133 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4134 return self.expression( 4135 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4136 ) 4137 4138 self._retreat(index) 4139 return interval 4140 4141 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4142 this = self._parse_term() 4143 4144 while True: 4145 if self._match_set(self.BITWISE): 4146 this = self.expression( 4147 self.BITWISE[self._prev.token_type], 4148 this=this, 4149 expression=self._parse_term(), 4150 ) 4151 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4152 this = self.expression( 4153 exp.DPipe, 4154 this=this, 4155 expression=self._parse_term(), 4156 safe=not self.dialect.STRICT_STRING_CONCAT, 4157 ) 4158 elif self._match(TokenType.DQMARK): 4159 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4160 elif self._match_pair(TokenType.LT, TokenType.LT): 4161 this = self.expression( 4162 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4163 ) 4164 elif self._match_pair(TokenType.GT, TokenType.GT): 4165 this = self.expression( 4166 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4167 ) 4168 else: 4169 break 4170 4171 return this 4172 4173 def _parse_term(self) -> t.Optional[exp.Expression]: 4174 return self._parse_tokens(self._parse_factor, self.TERM) 4175 4176 def _parse_factor(self) -> t.Optional[exp.Expression]: 4177 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4178 this = parse_method() 4179 4180 while self._match_set(self.FACTOR): 4181 klass = self.FACTOR[self._prev.token_type] 4182 comments = self._prev_comments 4183 expression = parse_method() 4184 4185 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4186 self._retreat(self._index - 1) 4187 return this 4188 4189 this = self.expression(klass, this=this, comments=comments, expression=expression) 4190 4191 if isinstance(this, exp.Div): 4192 this.args["typed"] = self.dialect.TYPED_DIVISION 4193 this.args["safe"] = self.dialect.SAFE_DIVISION 4194 4195 return this 4196 4197 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4198 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4199 4200 def _parse_unary(self) -> t.Optional[exp.Expression]: 4201 if self._match_set(self.UNARY_PARSERS): 4202 return self.UNARY_PARSERS[self._prev.token_type](self) 4203 return self._parse_at_time_zone(self._parse_type()) 4204 4205 def _parse_type( 4206 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4207 ) -> t.Optional[exp.Expression]: 4208 interval = parse_interval and self._parse_interval() 4209 if interval: 4210 return interval 4211 4212 index = self._index 4213 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4214 4215 if data_type: 4216 index2 = self._index 4217 this = self._parse_primary() 4218 4219 if isinstance(this, exp.Literal): 4220 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4221 if parser: 4222 return parser(self, this, data_type) 4223 4224 return self.expression(exp.Cast, this=this, to=data_type) 4225 4226 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4227 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4228 # 4229 # If the index difference here is greater than 1, that means the parser itself must have 4230 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4231 # 4232 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4233 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4234 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4235 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4236 # 4237 # In these cases, we don't really want to return the converted type, but instead retreat 4238 # and try to parse a Column or Identifier in the section below. 4239 if data_type.expressions and index2 - index > 1: 4240 self._retreat(index2) 4241 return self._parse_column_ops(data_type) 4242 4243 self._retreat(index) 4244 4245 if fallback_to_identifier: 4246 return self._parse_id_var() 4247 4248 this = self._parse_column() 4249 return this and self._parse_column_ops(this) 4250 4251 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4252 this = self._parse_type() 4253 if not this: 4254 return None 4255 4256 if isinstance(this, exp.Column) and not this.table: 4257 this = exp.var(this.name.upper()) 4258 4259 return self.expression( 4260 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4261 ) 4262 4263 def _parse_types( 4264 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4265 ) -> t.Optional[exp.Expression]: 4266 index = self._index 4267 4268 this: t.Optional[exp.Expression] = None 4269 prefix = self._match_text_seq("SYSUDTLIB", ".") 4270 4271 if not self._match_set(self.TYPE_TOKENS): 4272 identifier = allow_identifiers and self._parse_id_var( 4273 any_token=False, tokens=(TokenType.VAR,) 4274 ) 4275 if identifier: 4276 tokens = self.dialect.tokenize(identifier.name) 4277 4278 if len(tokens) != 1: 4279 self.raise_error("Unexpected identifier", self._prev) 4280 4281 if tokens[0].token_type in self.TYPE_TOKENS: 4282 self._prev = tokens[0] 4283 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4284 type_name = identifier.name 4285 4286 while self._match(TokenType.DOT): 4287 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4288 4289 this = exp.DataType.build(type_name, udt=True) 4290 else: 4291 self._retreat(self._index - 1) 4292 return None 4293 else: 4294 return None 4295 4296 type_token = self._prev.token_type 4297 4298 if type_token == TokenType.PSEUDO_TYPE: 4299 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4300 4301 if type_token == TokenType.OBJECT_IDENTIFIER: 4302 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4303 4304 nested = type_token in self.NESTED_TYPE_TOKENS 4305 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4306 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4307 expressions = None 4308 maybe_func = False 4309 4310 if self._match(TokenType.L_PAREN): 4311 if is_struct: 4312 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4313 elif nested: 4314 expressions = self._parse_csv( 4315 lambda: self._parse_types( 4316 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4317 ) 4318 ) 4319 elif type_token in self.ENUM_TYPE_TOKENS: 4320 expressions = self._parse_csv(self._parse_equality) 4321 elif is_aggregate: 4322 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4323 any_token=False, tokens=(TokenType.VAR,) 4324 ) 4325 if not func_or_ident or not self._match(TokenType.COMMA): 4326 return None 4327 expressions = self._parse_csv( 4328 lambda: self._parse_types( 4329 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4330 ) 4331 ) 4332 expressions.insert(0, func_or_ident) 4333 else: 4334 expressions = self._parse_csv(self._parse_type_size) 4335 4336 if not expressions or not self._match(TokenType.R_PAREN): 4337 self._retreat(index) 4338 return None 4339 4340 maybe_func = True 4341 4342 values: t.Optional[t.List[exp.Expression]] = None 4343 4344 if nested and self._match(TokenType.LT): 4345 if is_struct: 4346 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4347 else: 4348 expressions = self._parse_csv( 4349 lambda: self._parse_types( 4350 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4351 ) 4352 ) 4353 4354 if not self._match(TokenType.GT): 4355 self.raise_error("Expecting >") 4356 4357 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4358 values = self._parse_csv(self._parse_assignment) 4359 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4360 4361 if type_token in self.TIMESTAMPS: 4362 if self._match_text_seq("WITH", "TIME", "ZONE"): 4363 maybe_func = False 4364 tz_type = ( 4365 exp.DataType.Type.TIMETZ 4366 if type_token in self.TIMES 4367 else exp.DataType.Type.TIMESTAMPTZ 4368 ) 4369 this = exp.DataType(this=tz_type, expressions=expressions) 4370 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4371 maybe_func = False 4372 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4373 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4374 maybe_func = False 4375 elif type_token == TokenType.INTERVAL: 4376 unit = self._parse_var(upper=True) 4377 if unit: 4378 if self._match_text_seq("TO"): 4379 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4380 4381 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4382 else: 4383 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4384 4385 if maybe_func and check_func: 4386 index2 = self._index 4387 peek = self._parse_string() 4388 4389 if not peek: 4390 self._retreat(index) 4391 return None 4392 4393 self._retreat(index2) 4394 4395 if not this: 4396 if self._match_text_seq("UNSIGNED"): 4397 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4398 if not unsigned_type_token: 4399 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4400 4401 type_token = unsigned_type_token or type_token 4402 4403 this = exp.DataType( 4404 this=exp.DataType.Type[type_token.value], 4405 expressions=expressions, 4406 nested=nested, 4407 values=values, 4408 prefix=prefix, 4409 ) 4410 elif expressions: 4411 this.set("expressions", expressions) 4412 4413 index = self._index 4414 4415 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4416 matched_array = self._match(TokenType.ARRAY) 4417 4418 while self._curr: 4419 matched_l_bracket = self._match(TokenType.L_BRACKET) 4420 if not matched_l_bracket and not matched_array: 4421 break 4422 4423 matched_array = False 4424 values = self._parse_csv(self._parse_assignment) or None 4425 if values and not schema: 4426 self._retreat(index) 4427 break 4428 4429 this = exp.DataType( 4430 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4431 ) 4432 self._match(TokenType.R_BRACKET) 4433 4434 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4435 converter = self.TYPE_CONVERTER.get(this.this) 4436 if converter: 4437 this = converter(t.cast(exp.DataType, this)) 4438 4439 return this 4440 4441 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4442 index = self._index 4443 this = ( 4444 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4445 or self._parse_id_var() 4446 ) 4447 self._match(TokenType.COLON) 4448 4449 if ( 4450 type_required 4451 and not isinstance(this, exp.DataType) 4452 and not self._match_set(self.TYPE_TOKENS, advance=False) 4453 ): 4454 self._retreat(index) 4455 return self._parse_types() 4456 4457 return self._parse_column_def(this) 4458 4459 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4460 if not self._match_text_seq("AT", "TIME", "ZONE"): 4461 return this 4462 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4463 4464 def _parse_column(self) -> t.Optional[exp.Expression]: 4465 this = self._parse_column_reference() 4466 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4467 4468 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4469 this = self._parse_field() 4470 if ( 4471 not this 4472 and self._match(TokenType.VALUES, advance=False) 4473 and self.VALUES_FOLLOWED_BY_PAREN 4474 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4475 ): 4476 this = self._parse_id_var() 4477 4478 if isinstance(this, exp.Identifier): 4479 # We bubble up comments from the Identifier to the Column 4480 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4481 4482 return this 4483 4484 def _parse_colon_as_json_extract( 4485 self, this: t.Optional[exp.Expression] 4486 ) -> t.Optional[exp.Expression]: 4487 casts = [] 4488 json_path = [] 4489 4490 while self._match(TokenType.COLON): 4491 start_index = self._index 4492 path = self._parse_column_ops(self._parse_field(any_token=True)) 4493 4494 # The cast :: operator has a lower precedence than the extraction operator :, so 4495 # we rearrange the AST appropriately to avoid casting the JSON path 4496 while isinstance(path, exp.Cast): 4497 casts.append(path.to) 4498 path = path.this 4499 4500 if casts: 4501 dcolon_offset = next( 4502 i 4503 for i, t in enumerate(self._tokens[start_index:]) 4504 if t.token_type == TokenType.DCOLON 4505 ) 4506 end_token = self._tokens[start_index + dcolon_offset - 1] 4507 else: 4508 end_token = self._prev 4509 4510 if path: 4511 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4512 4513 if json_path: 4514 this = self.expression( 4515 exp.JSONExtract, 4516 this=this, 4517 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4518 ) 4519 4520 while casts: 4521 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4522 4523 return this 4524 4525 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4526 this = self._parse_bracket(this) 4527 4528 while self._match_set(self.COLUMN_OPERATORS): 4529 op_token = self._prev.token_type 4530 op = self.COLUMN_OPERATORS.get(op_token) 4531 4532 if op_token == TokenType.DCOLON: 4533 field = self._parse_types() 4534 if not field: 4535 self.raise_error("Expected type") 4536 elif op and self._curr: 4537 field = self._parse_column_reference() 4538 else: 4539 field = self._parse_field(any_token=True, anonymous_func=True) 4540 4541 if isinstance(field, exp.Func) and this: 4542 # bigquery allows function calls like x.y.count(...) 4543 # SAFE.SUBSTR(...) 4544 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4545 this = exp.replace_tree( 4546 this, 4547 lambda n: ( 4548 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4549 if n.table 4550 else n.this 4551 ) 4552 if isinstance(n, exp.Column) 4553 else n, 4554 ) 4555 4556 if op: 4557 this = op(self, this, field) 4558 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4559 this = self.expression( 4560 exp.Column, 4561 this=field, 4562 table=this.this, 4563 db=this.args.get("table"), 4564 catalog=this.args.get("db"), 4565 ) 4566 else: 4567 this = self.expression(exp.Dot, this=this, expression=field) 4568 4569 this = self._parse_bracket(this) 4570 4571 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4572 4573 def _parse_primary(self) -> t.Optional[exp.Expression]: 4574 if self._match_set(self.PRIMARY_PARSERS): 4575 token_type = self._prev.token_type 4576 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4577 4578 if token_type == TokenType.STRING: 4579 expressions = [primary] 4580 while self._match(TokenType.STRING): 4581 expressions.append(exp.Literal.string(self._prev.text)) 4582 4583 if len(expressions) > 1: 4584 return self.expression(exp.Concat, expressions=expressions) 4585 4586 return primary 4587 4588 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4589 return exp.Literal.number(f"0.{self._prev.text}") 4590 4591 if self._match(TokenType.L_PAREN): 4592 comments = self._prev_comments 4593 query = self._parse_select() 4594 4595 if query: 4596 expressions = [query] 4597 else: 4598 expressions = self._parse_expressions() 4599 4600 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4601 4602 if not this and self._match(TokenType.R_PAREN, advance=False): 4603 this = self.expression(exp.Tuple) 4604 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4605 this = self._parse_subquery(this=this, parse_alias=False) 4606 elif isinstance(this, exp.Subquery): 4607 this = self._parse_subquery( 4608 this=self._parse_set_operations(this), parse_alias=False 4609 ) 4610 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4611 this = self.expression(exp.Tuple, expressions=expressions) 4612 else: 4613 this = self.expression(exp.Paren, this=this) 4614 4615 if this: 4616 this.add_comments(comments) 4617 4618 self._match_r_paren(expression=this) 4619 return this 4620 4621 return None 4622 4623 def _parse_field( 4624 self, 4625 any_token: bool = False, 4626 tokens: t.Optional[t.Collection[TokenType]] = None, 4627 anonymous_func: bool = False, 4628 ) -> t.Optional[exp.Expression]: 4629 if anonymous_func: 4630 field = ( 4631 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4632 or self._parse_primary() 4633 ) 4634 else: 4635 field = self._parse_primary() or self._parse_function( 4636 anonymous=anonymous_func, any_token=any_token 4637 ) 4638 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4639 4640 def _parse_function( 4641 self, 4642 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4643 anonymous: bool = False, 4644 optional_parens: bool = True, 4645 any_token: bool = False, 4646 ) -> t.Optional[exp.Expression]: 4647 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4648 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4649 fn_syntax = False 4650 if ( 4651 self._match(TokenType.L_BRACE, advance=False) 4652 and self._next 4653 and self._next.text.upper() == "FN" 4654 ): 4655 self._advance(2) 4656 fn_syntax = True 4657 4658 func = self._parse_function_call( 4659 functions=functions, 4660 anonymous=anonymous, 4661 optional_parens=optional_parens, 4662 any_token=any_token, 4663 ) 4664 4665 if fn_syntax: 4666 self._match(TokenType.R_BRACE) 4667 4668 return func 4669 4670 def _parse_function_call( 4671 self, 4672 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4673 anonymous: bool = False, 4674 optional_parens: bool = True, 4675 any_token: bool = False, 4676 ) -> t.Optional[exp.Expression]: 4677 if not self._curr: 4678 return None 4679 4680 comments = self._curr.comments 4681 token_type = self._curr.token_type 4682 this = self._curr.text 4683 upper = this.upper() 4684 4685 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4686 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4687 self._advance() 4688 return self._parse_window(parser(self)) 4689 4690 if not self._next or self._next.token_type != TokenType.L_PAREN: 4691 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4692 self._advance() 4693 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4694 4695 return None 4696 4697 if any_token: 4698 if token_type in self.RESERVED_TOKENS: 4699 return None 4700 elif token_type not in self.FUNC_TOKENS: 4701 return None 4702 4703 self._advance(2) 4704 4705 parser = self.FUNCTION_PARSERS.get(upper) 4706 if parser and not anonymous: 4707 this = parser(self) 4708 else: 4709 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4710 4711 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4712 this = self.expression(subquery_predicate, this=self._parse_select()) 4713 self._match_r_paren() 4714 return this 4715 4716 if functions is None: 4717 functions = self.FUNCTIONS 4718 4719 function = functions.get(upper) 4720 4721 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4722 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4723 4724 if alias: 4725 args = self._kv_to_prop_eq(args) 4726 4727 if function and not anonymous: 4728 if "dialect" in function.__code__.co_varnames: 4729 func = function(args, dialect=self.dialect) 4730 else: 4731 func = function(args) 4732 4733 func = self.validate_expression(func, args) 4734 if not self.dialect.NORMALIZE_FUNCTIONS: 4735 func.meta["name"] = this 4736 4737 this = func 4738 else: 4739 if token_type == TokenType.IDENTIFIER: 4740 this = exp.Identifier(this=this, quoted=True) 4741 this = self.expression(exp.Anonymous, this=this, expressions=args) 4742 4743 if isinstance(this, exp.Expression): 4744 this.add_comments(comments) 4745 4746 self._match_r_paren(this) 4747 return self._parse_window(this) 4748 4749 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4750 transformed = [] 4751 4752 for e in expressions: 4753 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4754 if isinstance(e, exp.Alias): 4755 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4756 4757 if not isinstance(e, exp.PropertyEQ): 4758 e = self.expression( 4759 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4760 ) 4761 4762 if isinstance(e.this, exp.Column): 4763 e.this.replace(e.this.this) 4764 4765 transformed.append(e) 4766 4767 return transformed 4768 4769 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4770 return self._parse_column_def(self._parse_id_var()) 4771 4772 def _parse_user_defined_function( 4773 self, kind: t.Optional[TokenType] = None 4774 ) -> t.Optional[exp.Expression]: 4775 this = self._parse_id_var() 4776 4777 while self._match(TokenType.DOT): 4778 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4779 4780 if not self._match(TokenType.L_PAREN): 4781 return this 4782 4783 expressions = self._parse_csv(self._parse_function_parameter) 4784 self._match_r_paren() 4785 return self.expression( 4786 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4787 ) 4788 4789 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4790 literal = self._parse_primary() 4791 if literal: 4792 return self.expression(exp.Introducer, this=token.text, expression=literal) 4793 4794 return self.expression(exp.Identifier, this=token.text) 4795 4796 def _parse_session_parameter(self) -> exp.SessionParameter: 4797 kind = None 4798 this = self._parse_id_var() or self._parse_primary() 4799 4800 if this and self._match(TokenType.DOT): 4801 kind = this.name 4802 this = self._parse_var() or self._parse_primary() 4803 4804 return self.expression(exp.SessionParameter, this=this, kind=kind) 4805 4806 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4807 return self._parse_id_var() 4808 4809 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4810 index = self._index 4811 4812 if self._match(TokenType.L_PAREN): 4813 expressions = t.cast( 4814 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4815 ) 4816 4817 if not self._match(TokenType.R_PAREN): 4818 self._retreat(index) 4819 else: 4820 expressions = [self._parse_lambda_arg()] 4821 4822 if self._match_set(self.LAMBDAS): 4823 return self.LAMBDAS[self._prev.token_type](self, expressions) 4824 4825 self._retreat(index) 4826 4827 this: t.Optional[exp.Expression] 4828 4829 if self._match(TokenType.DISTINCT): 4830 this = self.expression( 4831 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4832 ) 4833 else: 4834 this = self._parse_select_or_expression(alias=alias) 4835 4836 return self._parse_limit( 4837 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4838 ) 4839 4840 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4841 index = self._index 4842 if not self._match(TokenType.L_PAREN): 4843 return this 4844 4845 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4846 # expr can be of both types 4847 if self._match_set(self.SELECT_START_TOKENS): 4848 self._retreat(index) 4849 return this 4850 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4851 self._match_r_paren() 4852 return self.expression(exp.Schema, this=this, expressions=args) 4853 4854 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4855 return self._parse_column_def(self._parse_field(any_token=True)) 4856 4857 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4858 # column defs are not really columns, they're identifiers 4859 if isinstance(this, exp.Column): 4860 this = this.this 4861 4862 kind = self._parse_types(schema=True) 4863 4864 if self._match_text_seq("FOR", "ORDINALITY"): 4865 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4866 4867 constraints: t.List[exp.Expression] = [] 4868 4869 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4870 ("ALIAS", "MATERIALIZED") 4871 ): 4872 persisted = self._prev.text.upper() == "MATERIALIZED" 4873 constraints.append( 4874 self.expression( 4875 exp.ComputedColumnConstraint, 4876 this=self._parse_assignment(), 4877 persisted=persisted or self._match_text_seq("PERSISTED"), 4878 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4879 ) 4880 ) 4881 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4882 self._match(TokenType.ALIAS) 4883 constraints.append( 4884 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4885 ) 4886 4887 while True: 4888 constraint = self._parse_column_constraint() 4889 if not constraint: 4890 break 4891 constraints.append(constraint) 4892 4893 if not kind and not constraints: 4894 return this 4895 4896 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4897 4898 def _parse_auto_increment( 4899 self, 4900 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4901 start = None 4902 increment = None 4903 4904 if self._match(TokenType.L_PAREN, advance=False): 4905 args = self._parse_wrapped_csv(self._parse_bitwise) 4906 start = seq_get(args, 0) 4907 increment = seq_get(args, 1) 4908 elif self._match_text_seq("START"): 4909 start = self._parse_bitwise() 4910 self._match_text_seq("INCREMENT") 4911 increment = self._parse_bitwise() 4912 4913 if start and increment: 4914 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4915 4916 return exp.AutoIncrementColumnConstraint() 4917 4918 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4919 if not self._match_text_seq("REFRESH"): 4920 self._retreat(self._index - 1) 4921 return None 4922 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4923 4924 def _parse_compress(self) -> exp.CompressColumnConstraint: 4925 if self._match(TokenType.L_PAREN, advance=False): 4926 return self.expression( 4927 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4928 ) 4929 4930 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4931 4932 def _parse_generated_as_identity( 4933 self, 4934 ) -> ( 4935 exp.GeneratedAsIdentityColumnConstraint 4936 | exp.ComputedColumnConstraint 4937 | exp.GeneratedAsRowColumnConstraint 4938 ): 4939 if self._match_text_seq("BY", "DEFAULT"): 4940 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4941 this = self.expression( 4942 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4943 ) 4944 else: 4945 self._match_text_seq("ALWAYS") 4946 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4947 4948 self._match(TokenType.ALIAS) 4949 4950 if self._match_text_seq("ROW"): 4951 start = self._match_text_seq("START") 4952 if not start: 4953 self._match(TokenType.END) 4954 hidden = self._match_text_seq("HIDDEN") 4955 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4956 4957 identity = self._match_text_seq("IDENTITY") 4958 4959 if self._match(TokenType.L_PAREN): 4960 if self._match(TokenType.START_WITH): 4961 this.set("start", self._parse_bitwise()) 4962 if self._match_text_seq("INCREMENT", "BY"): 4963 this.set("increment", self._parse_bitwise()) 4964 if self._match_text_seq("MINVALUE"): 4965 this.set("minvalue", self._parse_bitwise()) 4966 if self._match_text_seq("MAXVALUE"): 4967 this.set("maxvalue", self._parse_bitwise()) 4968 4969 if self._match_text_seq("CYCLE"): 4970 this.set("cycle", True) 4971 elif self._match_text_seq("NO", "CYCLE"): 4972 this.set("cycle", False) 4973 4974 if not identity: 4975 this.set("expression", self._parse_range()) 4976 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4977 args = self._parse_csv(self._parse_bitwise) 4978 this.set("start", seq_get(args, 0)) 4979 this.set("increment", seq_get(args, 1)) 4980 4981 self._match_r_paren() 4982 4983 return this 4984 4985 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4986 self._match_text_seq("LENGTH") 4987 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4988 4989 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4990 if self._match_text_seq("NULL"): 4991 return self.expression(exp.NotNullColumnConstraint) 4992 if self._match_text_seq("CASESPECIFIC"): 4993 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4994 if self._match_text_seq("FOR", "REPLICATION"): 4995 return self.expression(exp.NotForReplicationColumnConstraint) 4996 return None 4997 4998 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4999 if self._match(TokenType.CONSTRAINT): 5000 this = self._parse_id_var() 5001 else: 5002 this = None 5003 5004 if self._match_texts(self.CONSTRAINT_PARSERS): 5005 return self.expression( 5006 exp.ColumnConstraint, 5007 this=this, 5008 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5009 ) 5010 5011 return this 5012 5013 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5014 if not self._match(TokenType.CONSTRAINT): 5015 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5016 5017 return self.expression( 5018 exp.Constraint, 5019 this=self._parse_id_var(), 5020 expressions=self._parse_unnamed_constraints(), 5021 ) 5022 5023 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5024 constraints = [] 5025 while True: 5026 constraint = self._parse_unnamed_constraint() or self._parse_function() 5027 if not constraint: 5028 break 5029 constraints.append(constraint) 5030 5031 return constraints 5032 5033 def _parse_unnamed_constraint( 5034 self, constraints: t.Optional[t.Collection[str]] = None 5035 ) -> t.Optional[exp.Expression]: 5036 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5037 constraints or self.CONSTRAINT_PARSERS 5038 ): 5039 return None 5040 5041 constraint = self._prev.text.upper() 5042 if constraint not in self.CONSTRAINT_PARSERS: 5043 self.raise_error(f"No parser found for schema constraint {constraint}.") 5044 5045 return self.CONSTRAINT_PARSERS[constraint](self) 5046 5047 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5048 self._match_text_seq("KEY") 5049 return self.expression( 5050 exp.UniqueColumnConstraint, 5051 this=self._parse_schema(self._parse_id_var(any_token=False)), 5052 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5053 on_conflict=self._parse_on_conflict(), 5054 ) 5055 5056 def _parse_key_constraint_options(self) -> t.List[str]: 5057 options = [] 5058 while True: 5059 if not self._curr: 5060 break 5061 5062 if self._match(TokenType.ON): 5063 action = None 5064 on = self._advance_any() and self._prev.text 5065 5066 if self._match_text_seq("NO", "ACTION"): 5067 action = "NO ACTION" 5068 elif self._match_text_seq("CASCADE"): 5069 action = "CASCADE" 5070 elif self._match_text_seq("RESTRICT"): 5071 action = "RESTRICT" 5072 elif self._match_pair(TokenType.SET, TokenType.NULL): 5073 action = "SET NULL" 5074 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5075 action = "SET DEFAULT" 5076 else: 5077 self.raise_error("Invalid key constraint") 5078 5079 options.append(f"ON {on} {action}") 5080 elif self._match_text_seq("NOT", "ENFORCED"): 5081 options.append("NOT ENFORCED") 5082 elif self._match_text_seq("DEFERRABLE"): 5083 options.append("DEFERRABLE") 5084 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5085 options.append("INITIALLY DEFERRED") 5086 elif self._match_text_seq("NORELY"): 5087 options.append("NORELY") 5088 elif self._match_text_seq("MATCH", "FULL"): 5089 options.append("MATCH FULL") 5090 else: 5091 break 5092 5093 return options 5094 5095 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5096 if match and not self._match(TokenType.REFERENCES): 5097 return None 5098 5099 expressions = None 5100 this = self._parse_table(schema=True) 5101 options = self._parse_key_constraint_options() 5102 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5103 5104 def _parse_foreign_key(self) -> exp.ForeignKey: 5105 expressions = self._parse_wrapped_id_vars() 5106 reference = self._parse_references() 5107 options = {} 5108 5109 while self._match(TokenType.ON): 5110 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5111 self.raise_error("Expected DELETE or UPDATE") 5112 5113 kind = self._prev.text.lower() 5114 5115 if self._match_text_seq("NO", "ACTION"): 5116 action = "NO ACTION" 5117 elif self._match(TokenType.SET): 5118 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5119 action = "SET " + self._prev.text.upper() 5120 else: 5121 self._advance() 5122 action = self._prev.text.upper() 5123 5124 options[kind] = action 5125 5126 return self.expression( 5127 exp.ForeignKey, 5128 expressions=expressions, 5129 reference=reference, 5130 **options, # type: ignore 5131 ) 5132 5133 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5134 return self._parse_field() 5135 5136 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5137 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5138 self._retreat(self._index - 1) 5139 return None 5140 5141 id_vars = self._parse_wrapped_id_vars() 5142 return self.expression( 5143 exp.PeriodForSystemTimeConstraint, 5144 this=seq_get(id_vars, 0), 5145 expression=seq_get(id_vars, 1), 5146 ) 5147 5148 def _parse_primary_key( 5149 self, wrapped_optional: bool = False, in_props: bool = False 5150 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5151 desc = ( 5152 self._match_set((TokenType.ASC, TokenType.DESC)) 5153 and self._prev.token_type == TokenType.DESC 5154 ) 5155 5156 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5157 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5158 5159 expressions = self._parse_wrapped_csv( 5160 self._parse_primary_key_part, optional=wrapped_optional 5161 ) 5162 options = self._parse_key_constraint_options() 5163 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5164 5165 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5166 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5167 5168 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5169 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5170 return this 5171 5172 bracket_kind = self._prev.token_type 5173 expressions = self._parse_csv( 5174 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5175 ) 5176 5177 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5178 self.raise_error("Expected ]") 5179 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5180 self.raise_error("Expected }") 5181 5182 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5183 if bracket_kind == TokenType.L_BRACE: 5184 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5185 elif not this or this.name.upper() == "ARRAY": 5186 this = self.expression(exp.Array, expressions=expressions) 5187 else: 5188 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5189 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5190 5191 self._add_comments(this) 5192 return self._parse_bracket(this) 5193 5194 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5195 if self._match(TokenType.COLON): 5196 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5197 return this 5198 5199 def _parse_case(self) -> t.Optional[exp.Expression]: 5200 ifs = [] 5201 default = None 5202 5203 comments = self._prev_comments 5204 expression = self._parse_assignment() 5205 5206 while self._match(TokenType.WHEN): 5207 this = self._parse_assignment() 5208 self._match(TokenType.THEN) 5209 then = self._parse_assignment() 5210 ifs.append(self.expression(exp.If, this=this, true=then)) 5211 5212 if self._match(TokenType.ELSE): 5213 default = self._parse_assignment() 5214 5215 if not self._match(TokenType.END): 5216 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5217 default = exp.column("interval") 5218 else: 5219 self.raise_error("Expected END after CASE", self._prev) 5220 5221 return self.expression( 5222 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5223 ) 5224 5225 def _parse_if(self) -> t.Optional[exp.Expression]: 5226 if self._match(TokenType.L_PAREN): 5227 args = self._parse_csv(self._parse_assignment) 5228 this = self.validate_expression(exp.If.from_arg_list(args), args) 5229 self._match_r_paren() 5230 else: 5231 index = self._index - 1 5232 5233 if self.NO_PAREN_IF_COMMANDS and index == 0: 5234 return self._parse_as_command(self._prev) 5235 5236 condition = self._parse_assignment() 5237 5238 if not condition: 5239 self._retreat(index) 5240 return None 5241 5242 self._match(TokenType.THEN) 5243 true = self._parse_assignment() 5244 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5245 self._match(TokenType.END) 5246 this = self.expression(exp.If, this=condition, true=true, false=false) 5247 5248 return this 5249 5250 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5251 if not self._match_text_seq("VALUE", "FOR"): 5252 self._retreat(self._index - 1) 5253 return None 5254 5255 return self.expression( 5256 exp.NextValueFor, 5257 this=self._parse_column(), 5258 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5259 ) 5260 5261 def _parse_extract(self) -> exp.Extract: 5262 this = self._parse_function() or self._parse_var() or self._parse_type() 5263 5264 if self._match(TokenType.FROM): 5265 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5266 5267 if not self._match(TokenType.COMMA): 5268 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5269 5270 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5271 5272 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5273 this = self._parse_assignment() 5274 5275 if not self._match(TokenType.ALIAS): 5276 if self._match(TokenType.COMMA): 5277 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5278 5279 self.raise_error("Expected AS after CAST") 5280 5281 fmt = None 5282 to = self._parse_types() 5283 5284 if self._match(TokenType.FORMAT): 5285 fmt_string = self._parse_string() 5286 fmt = self._parse_at_time_zone(fmt_string) 5287 5288 if not to: 5289 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5290 if to.this in exp.DataType.TEMPORAL_TYPES: 5291 this = self.expression( 5292 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5293 this=this, 5294 format=exp.Literal.string( 5295 format_time( 5296 fmt_string.this if fmt_string else "", 5297 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5298 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5299 ) 5300 ), 5301 ) 5302 5303 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5304 this.set("zone", fmt.args["zone"]) 5305 return this 5306 elif not to: 5307 self.raise_error("Expected TYPE after CAST") 5308 elif isinstance(to, exp.Identifier): 5309 to = exp.DataType.build(to.name, udt=True) 5310 elif to.this == exp.DataType.Type.CHAR: 5311 if self._match(TokenType.CHARACTER_SET): 5312 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5313 5314 return self.expression( 5315 exp.Cast if strict else exp.TryCast, 5316 this=this, 5317 to=to, 5318 format=fmt, 5319 safe=safe, 5320 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5321 ) 5322 5323 def _parse_string_agg(self) -> exp.Expression: 5324 if self._match(TokenType.DISTINCT): 5325 args: t.List[t.Optional[exp.Expression]] = [ 5326 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5327 ] 5328 if self._match(TokenType.COMMA): 5329 args.extend(self._parse_csv(self._parse_assignment)) 5330 else: 5331 args = self._parse_csv(self._parse_assignment) # type: ignore 5332 5333 index = self._index 5334 if not self._match(TokenType.R_PAREN) and args: 5335 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5336 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5337 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5338 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5339 5340 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5341 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5342 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5343 if not self._match_text_seq("WITHIN", "GROUP"): 5344 self._retreat(index) 5345 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5346 5347 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5348 order = self._parse_order(this=seq_get(args, 0)) 5349 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5350 5351 def _parse_convert( 5352 self, strict: bool, safe: t.Optional[bool] = None 5353 ) -> t.Optional[exp.Expression]: 5354 this = self._parse_bitwise() 5355 5356 if self._match(TokenType.USING): 5357 to: t.Optional[exp.Expression] = self.expression( 5358 exp.CharacterSet, this=self._parse_var() 5359 ) 5360 elif self._match(TokenType.COMMA): 5361 to = self._parse_types() 5362 else: 5363 to = None 5364 5365 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5366 5367 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5368 """ 5369 There are generally two variants of the DECODE function: 5370 5371 - DECODE(bin, charset) 5372 - DECODE(expression, search, result [, search, result] ... [, default]) 5373 5374 The second variant will always be parsed into a CASE expression. Note that NULL 5375 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5376 instead of relying on pattern matching. 5377 """ 5378 args = self._parse_csv(self._parse_assignment) 5379 5380 if len(args) < 3: 5381 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5382 5383 expression, *expressions = args 5384 if not expression: 5385 return None 5386 5387 ifs = [] 5388 for search, result in zip(expressions[::2], expressions[1::2]): 5389 if not search or not result: 5390 return None 5391 5392 if isinstance(search, exp.Literal): 5393 ifs.append( 5394 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5395 ) 5396 elif isinstance(search, exp.Null): 5397 ifs.append( 5398 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5399 ) 5400 else: 5401 cond = exp.or_( 5402 exp.EQ(this=expression.copy(), expression=search), 5403 exp.and_( 5404 exp.Is(this=expression.copy(), expression=exp.Null()), 5405 exp.Is(this=search.copy(), expression=exp.Null()), 5406 copy=False, 5407 ), 5408 copy=False, 5409 ) 5410 ifs.append(exp.If(this=cond, true=result)) 5411 5412 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5413 5414 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5415 self._match_text_seq("KEY") 5416 key = self._parse_column() 5417 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5418 self._match_text_seq("VALUE") 5419 value = self._parse_bitwise() 5420 5421 if not key and not value: 5422 return None 5423 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5424 5425 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5426 if not this or not self._match_text_seq("FORMAT", "JSON"): 5427 return this 5428 5429 return self.expression(exp.FormatJson, this=this) 5430 5431 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5432 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5433 for value in values: 5434 if self._match_text_seq(value, "ON", on): 5435 return f"{value} ON {on}" 5436 5437 return None 5438 5439 @t.overload 5440 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5441 5442 @t.overload 5443 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5444 5445 def _parse_json_object(self, agg=False): 5446 star = self._parse_star() 5447 expressions = ( 5448 [star] 5449 if star 5450 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5451 ) 5452 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5453 5454 unique_keys = None 5455 if self._match_text_seq("WITH", "UNIQUE"): 5456 unique_keys = True 5457 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5458 unique_keys = False 5459 5460 self._match_text_seq("KEYS") 5461 5462 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5463 self._parse_type() 5464 ) 5465 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5466 5467 return self.expression( 5468 exp.JSONObjectAgg if agg else exp.JSONObject, 5469 expressions=expressions, 5470 null_handling=null_handling, 5471 unique_keys=unique_keys, 5472 return_type=return_type, 5473 encoding=encoding, 5474 ) 5475 5476 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5477 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5478 if not self._match_text_seq("NESTED"): 5479 this = self._parse_id_var() 5480 kind = self._parse_types(allow_identifiers=False) 5481 nested = None 5482 else: 5483 this = None 5484 kind = None 5485 nested = True 5486 5487 path = self._match_text_seq("PATH") and self._parse_string() 5488 nested_schema = nested and self._parse_json_schema() 5489 5490 return self.expression( 5491 exp.JSONColumnDef, 5492 this=this, 5493 kind=kind, 5494 path=path, 5495 nested_schema=nested_schema, 5496 ) 5497 5498 def _parse_json_schema(self) -> exp.JSONSchema: 5499 self._match_text_seq("COLUMNS") 5500 return self.expression( 5501 exp.JSONSchema, 5502 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5503 ) 5504 5505 def _parse_json_table(self) -> exp.JSONTable: 5506 this = self._parse_format_json(self._parse_bitwise()) 5507 path = self._match(TokenType.COMMA) and self._parse_string() 5508 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5509 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5510 schema = self._parse_json_schema() 5511 5512 return exp.JSONTable( 5513 this=this, 5514 schema=schema, 5515 path=path, 5516 error_handling=error_handling, 5517 empty_handling=empty_handling, 5518 ) 5519 5520 def _parse_match_against(self) -> exp.MatchAgainst: 5521 expressions = self._parse_csv(self._parse_column) 5522 5523 self._match_text_seq(")", "AGAINST", "(") 5524 5525 this = self._parse_string() 5526 5527 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5528 modifier = "IN NATURAL LANGUAGE MODE" 5529 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5530 modifier = f"{modifier} WITH QUERY EXPANSION" 5531 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5532 modifier = "IN BOOLEAN MODE" 5533 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5534 modifier = "WITH QUERY EXPANSION" 5535 else: 5536 modifier = None 5537 5538 return self.expression( 5539 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5540 ) 5541 5542 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5543 def _parse_open_json(self) -> exp.OpenJSON: 5544 this = self._parse_bitwise() 5545 path = self._match(TokenType.COMMA) and self._parse_string() 5546 5547 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5548 this = self._parse_field(any_token=True) 5549 kind = self._parse_types() 5550 path = self._parse_string() 5551 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5552 5553 return self.expression( 5554 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5555 ) 5556 5557 expressions = None 5558 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5559 self._match_l_paren() 5560 expressions = self._parse_csv(_parse_open_json_column_def) 5561 5562 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5563 5564 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5565 args = self._parse_csv(self._parse_bitwise) 5566 5567 if self._match(TokenType.IN): 5568 return self.expression( 5569 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5570 ) 5571 5572 if haystack_first: 5573 haystack = seq_get(args, 0) 5574 needle = seq_get(args, 1) 5575 else: 5576 needle = seq_get(args, 0) 5577 haystack = seq_get(args, 1) 5578 5579 return self.expression( 5580 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5581 ) 5582 5583 def _parse_predict(self) -> exp.Predict: 5584 self._match_text_seq("MODEL") 5585 this = self._parse_table() 5586 5587 self._match(TokenType.COMMA) 5588 self._match_text_seq("TABLE") 5589 5590 return self.expression( 5591 exp.Predict, 5592 this=this, 5593 expression=self._parse_table(), 5594 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5595 ) 5596 5597 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5598 args = self._parse_csv(self._parse_table) 5599 return exp.JoinHint(this=func_name.upper(), expressions=args) 5600 5601 def _parse_substring(self) -> exp.Substring: 5602 # Postgres supports the form: substring(string [from int] [for int]) 5603 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5604 5605 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5606 5607 if self._match(TokenType.FROM): 5608 args.append(self._parse_bitwise()) 5609 if self._match(TokenType.FOR): 5610 if len(args) == 1: 5611 args.append(exp.Literal.number(1)) 5612 args.append(self._parse_bitwise()) 5613 5614 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5615 5616 def _parse_trim(self) -> exp.Trim: 5617 # https://www.w3resource.com/sql/character-functions/trim.php 5618 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5619 5620 position = None 5621 collation = None 5622 expression = None 5623 5624 if self._match_texts(self.TRIM_TYPES): 5625 position = self._prev.text.upper() 5626 5627 this = self._parse_bitwise() 5628 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5629 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5630 expression = self._parse_bitwise() 5631 5632 if invert_order: 5633 this, expression = expression, this 5634 5635 if self._match(TokenType.COLLATE): 5636 collation = self._parse_bitwise() 5637 5638 return self.expression( 5639 exp.Trim, this=this, position=position, expression=expression, collation=collation 5640 ) 5641 5642 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5643 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5644 5645 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5646 return self._parse_window(self._parse_id_var(), alias=True) 5647 5648 def _parse_respect_or_ignore_nulls( 5649 self, this: t.Optional[exp.Expression] 5650 ) -> t.Optional[exp.Expression]: 5651 if self._match_text_seq("IGNORE", "NULLS"): 5652 return self.expression(exp.IgnoreNulls, this=this) 5653 if self._match_text_seq("RESPECT", "NULLS"): 5654 return self.expression(exp.RespectNulls, this=this) 5655 return this 5656 5657 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5658 if self._match(TokenType.HAVING): 5659 self._match_texts(("MAX", "MIN")) 5660 max = self._prev.text.upper() != "MIN" 5661 return self.expression( 5662 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5663 ) 5664 5665 return this 5666 5667 def _parse_window( 5668 self, this: t.Optional[exp.Expression], alias: bool = False 5669 ) -> t.Optional[exp.Expression]: 5670 func = this 5671 comments = func.comments if isinstance(func, exp.Expression) else None 5672 5673 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5674 self._match(TokenType.WHERE) 5675 this = self.expression( 5676 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5677 ) 5678 self._match_r_paren() 5679 5680 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5681 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5682 if self._match_text_seq("WITHIN", "GROUP"): 5683 order = self._parse_wrapped(self._parse_order) 5684 this = self.expression(exp.WithinGroup, this=this, expression=order) 5685 5686 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5687 # Some dialects choose to implement and some do not. 5688 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5689 5690 # There is some code above in _parse_lambda that handles 5691 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5692 5693 # The below changes handle 5694 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5695 5696 # Oracle allows both formats 5697 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5698 # and Snowflake chose to do the same for familiarity 5699 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5700 if isinstance(this, exp.AggFunc): 5701 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5702 5703 if ignore_respect and ignore_respect is not this: 5704 ignore_respect.replace(ignore_respect.this) 5705 this = self.expression(ignore_respect.__class__, this=this) 5706 5707 this = self._parse_respect_or_ignore_nulls(this) 5708 5709 # bigquery select from window x AS (partition by ...) 5710 if alias: 5711 over = None 5712 self._match(TokenType.ALIAS) 5713 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5714 return this 5715 else: 5716 over = self._prev.text.upper() 5717 5718 if comments and isinstance(func, exp.Expression): 5719 func.pop_comments() 5720 5721 if not self._match(TokenType.L_PAREN): 5722 return self.expression( 5723 exp.Window, 5724 comments=comments, 5725 this=this, 5726 alias=self._parse_id_var(False), 5727 over=over, 5728 ) 5729 5730 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5731 5732 first = self._match(TokenType.FIRST) 5733 if self._match_text_seq("LAST"): 5734 first = False 5735 5736 partition, order = self._parse_partition_and_order() 5737 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5738 5739 if kind: 5740 self._match(TokenType.BETWEEN) 5741 start = self._parse_window_spec() 5742 self._match(TokenType.AND) 5743 end = self._parse_window_spec() 5744 5745 spec = self.expression( 5746 exp.WindowSpec, 5747 kind=kind, 5748 start=start["value"], 5749 start_side=start["side"], 5750 end=end["value"], 5751 end_side=end["side"], 5752 ) 5753 else: 5754 spec = None 5755 5756 self._match_r_paren() 5757 5758 window = self.expression( 5759 exp.Window, 5760 comments=comments, 5761 this=this, 5762 partition_by=partition, 5763 order=order, 5764 spec=spec, 5765 alias=window_alias, 5766 over=over, 5767 first=first, 5768 ) 5769 5770 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5771 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5772 return self._parse_window(window, alias=alias) 5773 5774 return window 5775 5776 def _parse_partition_and_order( 5777 self, 5778 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5779 return self._parse_partition_by(), self._parse_order() 5780 5781 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5782 self._match(TokenType.BETWEEN) 5783 5784 return { 5785 "value": ( 5786 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5787 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5788 or self._parse_bitwise() 5789 ), 5790 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5791 } 5792 5793 def _parse_alias( 5794 self, this: t.Optional[exp.Expression], explicit: bool = False 5795 ) -> t.Optional[exp.Expression]: 5796 any_token = self._match(TokenType.ALIAS) 5797 comments = self._prev_comments or [] 5798 5799 if explicit and not any_token: 5800 return this 5801 5802 if self._match(TokenType.L_PAREN): 5803 aliases = self.expression( 5804 exp.Aliases, 5805 comments=comments, 5806 this=this, 5807 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5808 ) 5809 self._match_r_paren(aliases) 5810 return aliases 5811 5812 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5813 self.STRING_ALIASES and self._parse_string_as_identifier() 5814 ) 5815 5816 if alias: 5817 comments.extend(alias.pop_comments()) 5818 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5819 column = this.this 5820 5821 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5822 if not this.comments and column and column.comments: 5823 this.comments = column.pop_comments() 5824 5825 return this 5826 5827 def _parse_id_var( 5828 self, 5829 any_token: bool = True, 5830 tokens: t.Optional[t.Collection[TokenType]] = None, 5831 ) -> t.Optional[exp.Expression]: 5832 expression = self._parse_identifier() 5833 if not expression and ( 5834 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5835 ): 5836 quoted = self._prev.token_type == TokenType.STRING 5837 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5838 5839 return expression 5840 5841 def _parse_string(self) -> t.Optional[exp.Expression]: 5842 if self._match_set(self.STRING_PARSERS): 5843 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5844 return self._parse_placeholder() 5845 5846 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5847 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5848 5849 def _parse_number(self) -> t.Optional[exp.Expression]: 5850 if self._match_set(self.NUMERIC_PARSERS): 5851 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5852 return self._parse_placeholder() 5853 5854 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5855 if self._match(TokenType.IDENTIFIER): 5856 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5857 return self._parse_placeholder() 5858 5859 def _parse_var( 5860 self, 5861 any_token: bool = False, 5862 tokens: t.Optional[t.Collection[TokenType]] = None, 5863 upper: bool = False, 5864 ) -> t.Optional[exp.Expression]: 5865 if ( 5866 (any_token and self._advance_any()) 5867 or self._match(TokenType.VAR) 5868 or (self._match_set(tokens) if tokens else False) 5869 ): 5870 return self.expression( 5871 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5872 ) 5873 return self._parse_placeholder() 5874 5875 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5876 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5877 self._advance() 5878 return self._prev 5879 return None 5880 5881 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5882 return self._parse_var() or self._parse_string() 5883 5884 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5885 return self._parse_primary() or self._parse_var(any_token=True) 5886 5887 def _parse_null(self) -> t.Optional[exp.Expression]: 5888 if self._match_set(self.NULL_TOKENS): 5889 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5890 return self._parse_placeholder() 5891 5892 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5893 if self._match(TokenType.TRUE): 5894 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5895 if self._match(TokenType.FALSE): 5896 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5897 return self._parse_placeholder() 5898 5899 def _parse_star(self) -> t.Optional[exp.Expression]: 5900 if self._match(TokenType.STAR): 5901 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5902 return self._parse_placeholder() 5903 5904 def _parse_parameter(self) -> exp.Parameter: 5905 this = self._parse_identifier() or self._parse_primary_or_var() 5906 return self.expression(exp.Parameter, this=this) 5907 5908 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5909 if self._match_set(self.PLACEHOLDER_PARSERS): 5910 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5911 if placeholder: 5912 return placeholder 5913 self._advance(-1) 5914 return None 5915 5916 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5917 if not self._match_texts(keywords): 5918 return None 5919 if self._match(TokenType.L_PAREN, advance=False): 5920 return self._parse_wrapped_csv(self._parse_expression) 5921 5922 expression = self._parse_expression() 5923 return [expression] if expression else None 5924 5925 def _parse_csv( 5926 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5927 ) -> t.List[exp.Expression]: 5928 parse_result = parse_method() 5929 items = [parse_result] if parse_result is not None else [] 5930 5931 while self._match(sep): 5932 self._add_comments(parse_result) 5933 parse_result = parse_method() 5934 if parse_result is not None: 5935 items.append(parse_result) 5936 5937 return items 5938 5939 def _parse_tokens( 5940 self, parse_method: t.Callable, expressions: t.Dict 5941 ) -> t.Optional[exp.Expression]: 5942 this = parse_method() 5943 5944 while self._match_set(expressions): 5945 this = self.expression( 5946 expressions[self._prev.token_type], 5947 this=this, 5948 comments=self._prev_comments, 5949 expression=parse_method(), 5950 ) 5951 5952 return this 5953 5954 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5955 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5956 5957 def _parse_wrapped_csv( 5958 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5959 ) -> t.List[exp.Expression]: 5960 return self._parse_wrapped( 5961 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5962 ) 5963 5964 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5965 wrapped = self._match(TokenType.L_PAREN) 5966 if not wrapped and not optional: 5967 self.raise_error("Expecting (") 5968 parse_result = parse_method() 5969 if wrapped: 5970 self._match_r_paren() 5971 return parse_result 5972 5973 def _parse_expressions(self) -> t.List[exp.Expression]: 5974 return self._parse_csv(self._parse_expression) 5975 5976 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5977 return self._parse_select() or self._parse_set_operations( 5978 self._parse_expression() if alias else self._parse_assignment() 5979 ) 5980 5981 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5982 return self._parse_query_modifiers( 5983 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5984 ) 5985 5986 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5987 this = None 5988 if self._match_texts(self.TRANSACTION_KIND): 5989 this = self._prev.text 5990 5991 self._match_texts(("TRANSACTION", "WORK")) 5992 5993 modes = [] 5994 while True: 5995 mode = [] 5996 while self._match(TokenType.VAR): 5997 mode.append(self._prev.text) 5998 5999 if mode: 6000 modes.append(" ".join(mode)) 6001 if not self._match(TokenType.COMMA): 6002 break 6003 6004 return self.expression(exp.Transaction, this=this, modes=modes) 6005 6006 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6007 chain = None 6008 savepoint = None 6009 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6010 6011 self._match_texts(("TRANSACTION", "WORK")) 6012 6013 if self._match_text_seq("TO"): 6014 self._match_text_seq("SAVEPOINT") 6015 savepoint = self._parse_id_var() 6016 6017 if self._match(TokenType.AND): 6018 chain = not self._match_text_seq("NO") 6019 self._match_text_seq("CHAIN") 6020 6021 if is_rollback: 6022 return self.expression(exp.Rollback, savepoint=savepoint) 6023 6024 return self.expression(exp.Commit, chain=chain) 6025 6026 def _parse_refresh(self) -> exp.Refresh: 6027 self._match(TokenType.TABLE) 6028 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6029 6030 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6031 if not self._match_text_seq("ADD"): 6032 return None 6033 6034 self._match(TokenType.COLUMN) 6035 exists_column = self._parse_exists(not_=True) 6036 expression = self._parse_field_def() 6037 6038 if expression: 6039 expression.set("exists", exists_column) 6040 6041 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6042 if self._match_texts(("FIRST", "AFTER")): 6043 position = self._prev.text 6044 column_position = self.expression( 6045 exp.ColumnPosition, this=self._parse_column(), position=position 6046 ) 6047 expression.set("position", column_position) 6048 6049 return expression 6050 6051 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6052 drop = self._match(TokenType.DROP) and self._parse_drop() 6053 if drop and not isinstance(drop, exp.Command): 6054 drop.set("kind", drop.args.get("kind", "COLUMN")) 6055 return drop 6056 6057 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6058 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6059 return self.expression( 6060 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6061 ) 6062 6063 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6064 index = self._index - 1 6065 6066 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6067 return self._parse_csv( 6068 lambda: self.expression( 6069 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6070 ) 6071 ) 6072 6073 self._retreat(index) 6074 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6075 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6076 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6077 6078 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6079 if self._match_texts(self.ALTER_ALTER_PARSERS): 6080 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6081 6082 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6083 # keyword after ALTER we default to parsing this statement 6084 self._match(TokenType.COLUMN) 6085 column = self._parse_field(any_token=True) 6086 6087 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6088 return self.expression(exp.AlterColumn, this=column, drop=True) 6089 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6090 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6091 if self._match(TokenType.COMMENT): 6092 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6093 if self._match_text_seq("DROP", "NOT", "NULL"): 6094 return self.expression( 6095 exp.AlterColumn, 6096 this=column, 6097 drop=True, 6098 allow_null=True, 6099 ) 6100 if self._match_text_seq("SET", "NOT", "NULL"): 6101 return self.expression( 6102 exp.AlterColumn, 6103 this=column, 6104 allow_null=False, 6105 ) 6106 self._match_text_seq("SET", "DATA") 6107 self._match_text_seq("TYPE") 6108 return self.expression( 6109 exp.AlterColumn, 6110 this=column, 6111 dtype=self._parse_types(), 6112 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6113 using=self._match(TokenType.USING) and self._parse_assignment(), 6114 ) 6115 6116 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6117 if self._match_texts(("ALL", "EVEN", "AUTO")): 6118 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6119 6120 self._match_text_seq("KEY", "DISTKEY") 6121 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6122 6123 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6124 if compound: 6125 self._match_text_seq("SORTKEY") 6126 6127 if self._match(TokenType.L_PAREN, advance=False): 6128 return self.expression( 6129 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6130 ) 6131 6132 self._match_texts(("AUTO", "NONE")) 6133 return self.expression( 6134 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6135 ) 6136 6137 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6138 index = self._index - 1 6139 6140 partition_exists = self._parse_exists() 6141 if self._match(TokenType.PARTITION, advance=False): 6142 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6143 6144 self._retreat(index) 6145 return self._parse_csv(self._parse_drop_column) 6146 6147 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6148 if self._match(TokenType.COLUMN): 6149 exists = self._parse_exists() 6150 old_column = self._parse_column() 6151 to = self._match_text_seq("TO") 6152 new_column = self._parse_column() 6153 6154 if old_column is None or to is None or new_column is None: 6155 return None 6156 6157 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6158 6159 self._match_text_seq("TO") 6160 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6161 6162 def _parse_alter_table_set(self) -> exp.AlterSet: 6163 alter_set = self.expression(exp.AlterSet) 6164 6165 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6166 "TABLE", "PROPERTIES" 6167 ): 6168 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6169 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6170 alter_set.set("expressions", [self._parse_assignment()]) 6171 elif self._match_texts(("LOGGED", "UNLOGGED")): 6172 alter_set.set("option", exp.var(self._prev.text.upper())) 6173 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6174 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6175 elif self._match_text_seq("LOCATION"): 6176 alter_set.set("location", self._parse_field()) 6177 elif self._match_text_seq("ACCESS", "METHOD"): 6178 alter_set.set("access_method", self._parse_field()) 6179 elif self._match_text_seq("TABLESPACE"): 6180 alter_set.set("tablespace", self._parse_field()) 6181 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6182 alter_set.set("file_format", [self._parse_field()]) 6183 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6184 alter_set.set("file_format", self._parse_wrapped_options()) 6185 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6186 alter_set.set("copy_options", self._parse_wrapped_options()) 6187 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6188 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6189 else: 6190 if self._match_text_seq("SERDE"): 6191 alter_set.set("serde", self._parse_field()) 6192 6193 alter_set.set("expressions", [self._parse_properties()]) 6194 6195 return alter_set 6196 6197 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6198 start = self._prev 6199 6200 if not self._match(TokenType.TABLE): 6201 return self._parse_as_command(start) 6202 6203 exists = self._parse_exists() 6204 only = self._match_text_seq("ONLY") 6205 this = self._parse_table(schema=True) 6206 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6207 6208 if self._next: 6209 self._advance() 6210 6211 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6212 if parser: 6213 actions = ensure_list(parser(self)) 6214 options = self._parse_csv(self._parse_property) 6215 6216 if not self._curr and actions: 6217 return self.expression( 6218 exp.AlterTable, 6219 this=this, 6220 exists=exists, 6221 actions=actions, 6222 only=only, 6223 options=options, 6224 cluster=cluster, 6225 ) 6226 6227 return self._parse_as_command(start) 6228 6229 def _parse_merge(self) -> exp.Merge: 6230 self._match(TokenType.INTO) 6231 target = self._parse_table() 6232 6233 if target and self._match(TokenType.ALIAS, advance=False): 6234 target.set("alias", self._parse_table_alias()) 6235 6236 self._match(TokenType.USING) 6237 using = self._parse_table() 6238 6239 self._match(TokenType.ON) 6240 on = self._parse_assignment() 6241 6242 return self.expression( 6243 exp.Merge, 6244 this=target, 6245 using=using, 6246 on=on, 6247 expressions=self._parse_when_matched(), 6248 ) 6249 6250 def _parse_when_matched(self) -> t.List[exp.When]: 6251 whens = [] 6252 6253 while self._match(TokenType.WHEN): 6254 matched = not self._match(TokenType.NOT) 6255 self._match_text_seq("MATCHED") 6256 source = ( 6257 False 6258 if self._match_text_seq("BY", "TARGET") 6259 else self._match_text_seq("BY", "SOURCE") 6260 ) 6261 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6262 6263 self._match(TokenType.THEN) 6264 6265 if self._match(TokenType.INSERT): 6266 _this = self._parse_star() 6267 if _this: 6268 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6269 else: 6270 then = self.expression( 6271 exp.Insert, 6272 this=self._parse_value(), 6273 expression=self._match_text_seq("VALUES") and self._parse_value(), 6274 ) 6275 elif self._match(TokenType.UPDATE): 6276 expressions = self._parse_star() 6277 if expressions: 6278 then = self.expression(exp.Update, expressions=expressions) 6279 else: 6280 then = self.expression( 6281 exp.Update, 6282 expressions=self._match(TokenType.SET) 6283 and self._parse_csv(self._parse_equality), 6284 ) 6285 elif self._match(TokenType.DELETE): 6286 then = self.expression(exp.Var, this=self._prev.text) 6287 else: 6288 then = None 6289 6290 whens.append( 6291 self.expression( 6292 exp.When, 6293 matched=matched, 6294 source=source, 6295 condition=condition, 6296 then=then, 6297 ) 6298 ) 6299 return whens 6300 6301 def _parse_show(self) -> t.Optional[exp.Expression]: 6302 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6303 if parser: 6304 return parser(self) 6305 return self._parse_as_command(self._prev) 6306 6307 def _parse_set_item_assignment( 6308 self, kind: t.Optional[str] = None 6309 ) -> t.Optional[exp.Expression]: 6310 index = self._index 6311 6312 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6313 return self._parse_set_transaction(global_=kind == "GLOBAL") 6314 6315 left = self._parse_primary() or self._parse_column() 6316 assignment_delimiter = self._match_texts(("=", "TO")) 6317 6318 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6319 self._retreat(index) 6320 return None 6321 6322 right = self._parse_statement() or self._parse_id_var() 6323 if isinstance(right, (exp.Column, exp.Identifier)): 6324 right = exp.var(right.name) 6325 6326 this = self.expression(exp.EQ, this=left, expression=right) 6327 return self.expression(exp.SetItem, this=this, kind=kind) 6328 6329 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6330 self._match_text_seq("TRANSACTION") 6331 characteristics = self._parse_csv( 6332 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6333 ) 6334 return self.expression( 6335 exp.SetItem, 6336 expressions=characteristics, 6337 kind="TRANSACTION", 6338 **{"global": global_}, # type: ignore 6339 ) 6340 6341 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6342 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6343 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6344 6345 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6346 index = self._index 6347 set_ = self.expression( 6348 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6349 ) 6350 6351 if self._curr: 6352 self._retreat(index) 6353 return self._parse_as_command(self._prev) 6354 6355 return set_ 6356 6357 def _parse_var_from_options( 6358 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6359 ) -> t.Optional[exp.Var]: 6360 start = self._curr 6361 if not start: 6362 return None 6363 6364 option = start.text.upper() 6365 continuations = options.get(option) 6366 6367 index = self._index 6368 self._advance() 6369 for keywords in continuations or []: 6370 if isinstance(keywords, str): 6371 keywords = (keywords,) 6372 6373 if self._match_text_seq(*keywords): 6374 option = f"{option} {' '.join(keywords)}" 6375 break 6376 else: 6377 if continuations or continuations is None: 6378 if raise_unmatched: 6379 self.raise_error(f"Unknown option {option}") 6380 6381 self._retreat(index) 6382 return None 6383 6384 return exp.var(option) 6385 6386 def _parse_as_command(self, start: Token) -> exp.Command: 6387 while self._curr: 6388 self._advance() 6389 text = self._find_sql(start, self._prev) 6390 size = len(start.text) 6391 self._warn_unsupported() 6392 return exp.Command(this=text[:size], expression=text[size:]) 6393 6394 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6395 settings = [] 6396 6397 self._match_l_paren() 6398 kind = self._parse_id_var() 6399 6400 if self._match(TokenType.L_PAREN): 6401 while True: 6402 key = self._parse_id_var() 6403 value = self._parse_primary() 6404 6405 if not key and value is None: 6406 break 6407 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6408 self._match(TokenType.R_PAREN) 6409 6410 self._match_r_paren() 6411 6412 return self.expression( 6413 exp.DictProperty, 6414 this=this, 6415 kind=kind.this if kind else None, 6416 settings=settings, 6417 ) 6418 6419 def _parse_dict_range(self, this: str) -> exp.DictRange: 6420 self._match_l_paren() 6421 has_min = self._match_text_seq("MIN") 6422 if has_min: 6423 min = self._parse_var() or self._parse_primary() 6424 self._match_text_seq("MAX") 6425 max = self._parse_var() or self._parse_primary() 6426 else: 6427 max = self._parse_var() or self._parse_primary() 6428 min = exp.Literal.number(0) 6429 self._match_r_paren() 6430 return self.expression(exp.DictRange, this=this, min=min, max=max) 6431 6432 def _parse_comprehension( 6433 self, this: t.Optional[exp.Expression] 6434 ) -> t.Optional[exp.Comprehension]: 6435 index = self._index 6436 expression = self._parse_column() 6437 if not self._match(TokenType.IN): 6438 self._retreat(index - 1) 6439 return None 6440 iterator = self._parse_column() 6441 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6442 return self.expression( 6443 exp.Comprehension, 6444 this=this, 6445 expression=expression, 6446 iterator=iterator, 6447 condition=condition, 6448 ) 6449 6450 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6451 if self._match(TokenType.HEREDOC_STRING): 6452 return self.expression(exp.Heredoc, this=self._prev.text) 6453 6454 if not self._match_text_seq("$"): 6455 return None 6456 6457 tags = ["$"] 6458 tag_text = None 6459 6460 if self._is_connected(): 6461 self._advance() 6462 tags.append(self._prev.text.upper()) 6463 else: 6464 self.raise_error("No closing $ found") 6465 6466 if tags[-1] != "$": 6467 if self._is_connected() and self._match_text_seq("$"): 6468 tag_text = tags[-1] 6469 tags.append("$") 6470 else: 6471 self.raise_error("No closing $ found") 6472 6473 heredoc_start = self._curr 6474 6475 while self._curr: 6476 if self._match_text_seq(*tags, advance=False): 6477 this = self._find_sql(heredoc_start, self._prev) 6478 self._advance(len(tags)) 6479 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6480 6481 self._advance() 6482 6483 self.raise_error(f"No closing {''.join(tags)} found") 6484 return None 6485 6486 def _find_parser( 6487 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6488 ) -> t.Optional[t.Callable]: 6489 if not self._curr: 6490 return None 6491 6492 index = self._index 6493 this = [] 6494 while True: 6495 # The current token might be multiple words 6496 curr = self._curr.text.upper() 6497 key = curr.split(" ") 6498 this.append(curr) 6499 6500 self._advance() 6501 result, trie = in_trie(trie, key) 6502 if result == TrieResult.FAILED: 6503 break 6504 6505 if result == TrieResult.EXISTS: 6506 subparser = parsers[" ".join(this)] 6507 return subparser 6508 6509 self._retreat(index) 6510 return None 6511 6512 def _match(self, token_type, advance=True, expression=None): 6513 if not self._curr: 6514 return None 6515 6516 if self._curr.token_type == token_type: 6517 if advance: 6518 self._advance() 6519 self._add_comments(expression) 6520 return True 6521 6522 return None 6523 6524 def _match_set(self, types, advance=True): 6525 if not self._curr: 6526 return None 6527 6528 if self._curr.token_type in types: 6529 if advance: 6530 self._advance() 6531 return True 6532 6533 return None 6534 6535 def _match_pair(self, token_type_a, token_type_b, advance=True): 6536 if not self._curr or not self._next: 6537 return None 6538 6539 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6540 if advance: 6541 self._advance(2) 6542 return True 6543 6544 return None 6545 6546 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6547 if not self._match(TokenType.L_PAREN, expression=expression): 6548 self.raise_error("Expecting (") 6549 6550 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6551 if not self._match(TokenType.R_PAREN, expression=expression): 6552 self.raise_error("Expecting )") 6553 6554 def _match_texts(self, texts, advance=True): 6555 if self._curr and self._curr.text.upper() in texts: 6556 if advance: 6557 self._advance() 6558 return True 6559 return None 6560 6561 def _match_text_seq(self, *texts, advance=True): 6562 index = self._index 6563 for text in texts: 6564 if self._curr and self._curr.text.upper() == text: 6565 self._advance() 6566 else: 6567 self._retreat(index) 6568 return None 6569 6570 if not advance: 6571 self._retreat(index) 6572 6573 return True 6574 6575 def _replace_lambda( 6576 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6577 ) -> t.Optional[exp.Expression]: 6578 if not node: 6579 return node 6580 6581 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6582 6583 for column in node.find_all(exp.Column): 6584 typ = lambda_types.get(column.parts[0].name) 6585 if typ is not None: 6586 dot_or_id = column.to_dot() if column.table else column.this 6587 6588 if typ: 6589 dot_or_id = self.expression( 6590 exp.Cast, 6591 this=dot_or_id, 6592 to=typ, 6593 ) 6594 6595 parent = column.parent 6596 6597 while isinstance(parent, exp.Dot): 6598 if not isinstance(parent.parent, exp.Dot): 6599 parent.replace(dot_or_id) 6600 break 6601 parent = parent.parent 6602 else: 6603 if column is node: 6604 node = dot_or_id 6605 else: 6606 column.replace(dot_or_id) 6607 return node 6608 6609 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6610 start = self._prev 6611 6612 # Not to be confused with TRUNCATE(number, decimals) function call 6613 if self._match(TokenType.L_PAREN): 6614 self._retreat(self._index - 2) 6615 return self._parse_function() 6616 6617 # Clickhouse supports TRUNCATE DATABASE as well 6618 is_database = self._match(TokenType.DATABASE) 6619 6620 self._match(TokenType.TABLE) 6621 6622 exists = self._parse_exists(not_=False) 6623 6624 expressions = self._parse_csv( 6625 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6626 ) 6627 6628 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6629 6630 if self._match_text_seq("RESTART", "IDENTITY"): 6631 identity = "RESTART" 6632 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6633 identity = "CONTINUE" 6634 else: 6635 identity = None 6636 6637 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6638 option = self._prev.text 6639 else: 6640 option = None 6641 6642 partition = self._parse_partition() 6643 6644 # Fallback case 6645 if self._curr: 6646 return self._parse_as_command(start) 6647 6648 return self.expression( 6649 exp.TruncateTable, 6650 expressions=expressions, 6651 is_database=is_database, 6652 exists=exists, 6653 cluster=cluster, 6654 identity=identity, 6655 option=option, 6656 partition=partition, 6657 ) 6658 6659 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6660 this = self._parse_ordered(self._parse_opclass) 6661 6662 if not self._match(TokenType.WITH): 6663 return this 6664 6665 op = self._parse_var(any_token=True) 6666 6667 return self.expression(exp.WithOperator, this=this, op=op) 6668 6669 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6670 self._match(TokenType.EQ) 6671 self._match(TokenType.L_PAREN) 6672 6673 opts: t.List[t.Optional[exp.Expression]] = [] 6674 while self._curr and not self._match(TokenType.R_PAREN): 6675 if self._match_text_seq("FORMAT_NAME", "="): 6676 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6677 # so we parse it separately to use _parse_field() 6678 prop = self.expression( 6679 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6680 ) 6681 opts.append(prop) 6682 else: 6683 opts.append(self._parse_property()) 6684 6685 self._match(TokenType.COMMA) 6686 6687 return opts 6688 6689 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6690 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6691 6692 options = [] 6693 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6694 option = self._parse_var(any_token=True) 6695 prev = self._prev.text.upper() 6696 6697 # Different dialects might separate options and values by white space, "=" and "AS" 6698 self._match(TokenType.EQ) 6699 self._match(TokenType.ALIAS) 6700 6701 param = self.expression(exp.CopyParameter, this=option) 6702 6703 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6704 TokenType.L_PAREN, advance=False 6705 ): 6706 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6707 param.set("expressions", self._parse_wrapped_options()) 6708 elif prev == "FILE_FORMAT": 6709 # T-SQL's external file format case 6710 param.set("expression", self._parse_field()) 6711 else: 6712 param.set("expression", self._parse_unquoted_field()) 6713 6714 options.append(param) 6715 self._match(sep) 6716 6717 return options 6718 6719 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6720 expr = self.expression(exp.Credentials) 6721 6722 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6723 expr.set("storage", self._parse_field()) 6724 if self._match_text_seq("CREDENTIALS"): 6725 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6726 creds = ( 6727 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6728 ) 6729 expr.set("credentials", creds) 6730 if self._match_text_seq("ENCRYPTION"): 6731 expr.set("encryption", self._parse_wrapped_options()) 6732 if self._match_text_seq("IAM_ROLE"): 6733 expr.set("iam_role", self._parse_field()) 6734 if self._match_text_seq("REGION"): 6735 expr.set("region", self._parse_field()) 6736 6737 return expr 6738 6739 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6740 return self._parse_field() 6741 6742 def _parse_copy(self) -> exp.Copy | exp.Command: 6743 start = self._prev 6744 6745 self._match(TokenType.INTO) 6746 6747 this = ( 6748 self._parse_select(nested=True, parse_subquery_alias=False) 6749 if self._match(TokenType.L_PAREN, advance=False) 6750 else self._parse_table(schema=True) 6751 ) 6752 6753 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6754 6755 files = self._parse_csv(self._parse_file_location) 6756 credentials = self._parse_credentials() 6757 6758 self._match_text_seq("WITH") 6759 6760 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6761 6762 # Fallback case 6763 if self._curr: 6764 return self._parse_as_command(start) 6765 6766 return self.expression( 6767 exp.Copy, 6768 this=this, 6769 kind=kind, 6770 credentials=credentials, 6771 files=files, 6772 params=params, 6773 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1217 def __init__( 1218 self, 1219 error_level: t.Optional[ErrorLevel] = None, 1220 error_message_context: int = 100, 1221 max_errors: int = 3, 1222 dialect: DialectType = None, 1223 ): 1224 from sqlglot.dialects import Dialect 1225 1226 self.error_level = error_level or ErrorLevel.IMMEDIATE 1227 self.error_message_context = error_message_context 1228 self.max_errors = max_errors 1229 self.dialect = Dialect.get_or_raise(dialect) 1230 self.reset()
1242 def parse( 1243 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1244 ) -> t.List[t.Optional[exp.Expression]]: 1245 """ 1246 Parses a list of tokens and returns a list of syntax trees, one tree 1247 per parsed SQL statement. 1248 1249 Args: 1250 raw_tokens: The list of tokens. 1251 sql: The original SQL string, used to produce helpful debug messages. 1252 1253 Returns: 1254 The list of the produced syntax trees. 1255 """ 1256 return self._parse( 1257 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1258 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1260 def parse_into( 1261 self, 1262 expression_types: exp.IntoType, 1263 raw_tokens: t.List[Token], 1264 sql: t.Optional[str] = None, 1265 ) -> t.List[t.Optional[exp.Expression]]: 1266 """ 1267 Parses a list of tokens into a given Expression type. If a collection of Expression 1268 types is given instead, this method will try to parse the token list into each one 1269 of them, stopping at the first for which the parsing succeeds. 1270 1271 Args: 1272 expression_types: The expression type(s) to try and parse the token list into. 1273 raw_tokens: The list of tokens. 1274 sql: The original SQL string, used to produce helpful debug messages. 1275 1276 Returns: 1277 The target Expression. 1278 """ 1279 errors = [] 1280 for expression_type in ensure_list(expression_types): 1281 parser = self.EXPRESSION_PARSERS.get(expression_type) 1282 if not parser: 1283 raise TypeError(f"No parser registered for {expression_type}") 1284 1285 try: 1286 return self._parse(parser, raw_tokens, sql) 1287 except ParseError as e: 1288 e.errors[0]["into_expression"] = expression_type 1289 errors.append(e) 1290 1291 raise ParseError( 1292 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1293 errors=merge_errors(errors), 1294 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1334 def check_errors(self) -> None: 1335 """Logs or raises any found errors, depending on the chosen error level setting.""" 1336 if self.error_level == ErrorLevel.WARN: 1337 for error in self.errors: 1338 logger.error(str(error)) 1339 elif self.error_level == ErrorLevel.RAISE and self.errors: 1340 raise ParseError( 1341 concat_messages(self.errors, self.max_errors), 1342 errors=merge_errors(self.errors), 1343 )
Logs or raises any found errors, depending on the chosen error level setting.
1345 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1346 """ 1347 Appends an error in the list of recorded errors or raises it, depending on the chosen 1348 error level setting. 1349 """ 1350 token = token or self._curr or self._prev or Token.string("") 1351 start = token.start 1352 end = token.end + 1 1353 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1354 highlight = self.sql[start:end] 1355 end_context = self.sql[end : end + self.error_message_context] 1356 1357 error = ParseError.new( 1358 f"{message}. Line {token.line}, Col: {token.col}.\n" 1359 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1360 description=message, 1361 line=token.line, 1362 col=token.col, 1363 start_context=start_context, 1364 highlight=highlight, 1365 end_context=end_context, 1366 ) 1367 1368 if self.error_level == ErrorLevel.IMMEDIATE: 1369 raise error 1370 1371 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1373 def expression( 1374 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1375 ) -> E: 1376 """ 1377 Creates a new, validated Expression. 1378 1379 Args: 1380 exp_class: The expression class to instantiate. 1381 comments: An optional list of comments to attach to the expression. 1382 kwargs: The arguments to set for the expression along with their respective values. 1383 1384 Returns: 1385 The target expression. 1386 """ 1387 instance = exp_class(**kwargs) 1388 instance.add_comments(comments) if comments else self._add_comments(instance) 1389 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1396 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1397 """ 1398 Validates an Expression, making sure that all its mandatory arguments are set. 1399 1400 Args: 1401 expression: The expression to validate. 1402 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1403 1404 Returns: 1405 The validated expression. 1406 """ 1407 if self.error_level != ErrorLevel.IGNORE: 1408 for error_message in expression.error_messages(args): 1409 self.raise_error(error_message) 1410 1411 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.