sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111class _Parser(type): 112 def __new__(cls, clsname, bases, attrs): 113 klass = super().__new__(cls, clsname, bases, attrs) 114 115 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 116 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 117 118 return klass 119 120 121class Parser(metaclass=_Parser): 122 """ 123 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 124 125 Args: 126 error_level: The desired error level. 127 Default: ErrorLevel.IMMEDIATE 128 error_message_context: The amount of context to capture from a query string when displaying 129 the error message (in number of characters). 130 Default: 100 131 max_errors: Maximum number of error messages to include in a raised ParseError. 132 This is only relevant if error_level is ErrorLevel.RAISE. 133 Default: 3 134 """ 135 136 FUNCTIONS: t.Dict[str, t.Callable] = { 137 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 138 "CONCAT": lambda args, dialect: exp.Concat( 139 expressions=args, 140 safe=not dialect.STRICT_STRING_CONCAT, 141 coalesce=dialect.CONCAT_COALESCE, 142 ), 143 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 144 expressions=args, 145 safe=not dialect.STRICT_STRING_CONCAT, 146 coalesce=dialect.CONCAT_COALESCE, 147 ), 148 "DATE_TO_DATE_STR": lambda args: exp.Cast( 149 this=seq_get(args, 0), 150 to=exp.DataType(this=exp.DataType.Type.TEXT), 151 ), 152 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 153 "HEX": build_hex, 154 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 155 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 156 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 157 "LIKE": build_like, 158 "LOG": build_logarithm, 159 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 160 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 161 "LOWER": build_lower, 162 "MOD": build_mod, 163 "TIME_TO_TIME_STR": lambda args: exp.Cast( 164 this=seq_get(args, 0), 165 to=exp.DataType(this=exp.DataType.Type.TEXT), 166 ), 167 "TO_HEX": build_hex, 168 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 169 this=exp.Cast( 170 this=seq_get(args, 0), 171 to=exp.DataType(this=exp.DataType.Type.TEXT), 172 ), 173 start=exp.Literal.number(1), 174 length=exp.Literal.number(10), 175 ), 176 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 177 "UPPER": build_upper, 178 "VAR_MAP": build_var_map, 179 } 180 181 NO_PAREN_FUNCTIONS = { 182 TokenType.CURRENT_DATE: exp.CurrentDate, 183 TokenType.CURRENT_DATETIME: exp.CurrentDate, 184 TokenType.CURRENT_TIME: exp.CurrentTime, 185 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 186 TokenType.CURRENT_USER: exp.CurrentUser, 187 } 188 189 STRUCT_TYPE_TOKENS = { 190 TokenType.NESTED, 191 TokenType.OBJECT, 192 TokenType.STRUCT, 193 } 194 195 NESTED_TYPE_TOKENS = { 196 TokenType.ARRAY, 197 TokenType.LIST, 198 TokenType.LOWCARDINALITY, 199 TokenType.MAP, 200 TokenType.NULLABLE, 201 *STRUCT_TYPE_TOKENS, 202 } 203 204 ENUM_TYPE_TOKENS = { 205 TokenType.ENUM, 206 TokenType.ENUM8, 207 TokenType.ENUM16, 208 } 209 210 AGGREGATE_TYPE_TOKENS = { 211 TokenType.AGGREGATEFUNCTION, 212 TokenType.SIMPLEAGGREGATEFUNCTION, 213 } 214 215 TYPE_TOKENS = { 216 TokenType.BIT, 217 TokenType.BOOLEAN, 218 TokenType.TINYINT, 219 TokenType.UTINYINT, 220 TokenType.SMALLINT, 221 TokenType.USMALLINT, 222 TokenType.INT, 223 TokenType.UINT, 224 TokenType.BIGINT, 225 TokenType.UBIGINT, 226 TokenType.INT128, 227 TokenType.UINT128, 228 TokenType.INT256, 229 TokenType.UINT256, 230 TokenType.MEDIUMINT, 231 TokenType.UMEDIUMINT, 232 TokenType.FIXEDSTRING, 233 TokenType.FLOAT, 234 TokenType.DOUBLE, 235 TokenType.CHAR, 236 TokenType.NCHAR, 237 TokenType.VARCHAR, 238 TokenType.NVARCHAR, 239 TokenType.BPCHAR, 240 TokenType.TEXT, 241 TokenType.MEDIUMTEXT, 242 TokenType.LONGTEXT, 243 TokenType.MEDIUMBLOB, 244 TokenType.LONGBLOB, 245 TokenType.BINARY, 246 TokenType.VARBINARY, 247 TokenType.JSON, 248 TokenType.JSONB, 249 TokenType.INTERVAL, 250 TokenType.TINYBLOB, 251 TokenType.TINYTEXT, 252 TokenType.TIME, 253 TokenType.TIMETZ, 254 TokenType.TIMESTAMP, 255 TokenType.TIMESTAMP_S, 256 TokenType.TIMESTAMP_MS, 257 TokenType.TIMESTAMP_NS, 258 TokenType.TIMESTAMPTZ, 259 TokenType.TIMESTAMPLTZ, 260 TokenType.TIMESTAMPNTZ, 261 TokenType.DATETIME, 262 TokenType.DATETIME64, 263 TokenType.DATE, 264 TokenType.DATE32, 265 TokenType.INT4RANGE, 266 TokenType.INT4MULTIRANGE, 267 TokenType.INT8RANGE, 268 TokenType.INT8MULTIRANGE, 269 TokenType.NUMRANGE, 270 TokenType.NUMMULTIRANGE, 271 TokenType.TSRANGE, 272 TokenType.TSMULTIRANGE, 273 TokenType.TSTZRANGE, 274 TokenType.TSTZMULTIRANGE, 275 TokenType.DATERANGE, 276 TokenType.DATEMULTIRANGE, 277 TokenType.DECIMAL, 278 TokenType.UDECIMAL, 279 TokenType.BIGDECIMAL, 280 TokenType.UUID, 281 TokenType.GEOGRAPHY, 282 TokenType.GEOMETRY, 283 TokenType.HLLSKETCH, 284 TokenType.HSTORE, 285 TokenType.PSEUDO_TYPE, 286 TokenType.SUPER, 287 TokenType.SERIAL, 288 TokenType.SMALLSERIAL, 289 TokenType.BIGSERIAL, 290 TokenType.XML, 291 TokenType.YEAR, 292 TokenType.UNIQUEIDENTIFIER, 293 TokenType.USERDEFINED, 294 TokenType.MONEY, 295 TokenType.SMALLMONEY, 296 TokenType.ROWVERSION, 297 TokenType.IMAGE, 298 TokenType.VARIANT, 299 TokenType.OBJECT, 300 TokenType.OBJECT_IDENTIFIER, 301 TokenType.INET, 302 TokenType.IPADDRESS, 303 TokenType.IPPREFIX, 304 TokenType.IPV4, 305 TokenType.IPV6, 306 TokenType.UNKNOWN, 307 TokenType.NULL, 308 TokenType.NAME, 309 TokenType.TDIGEST, 310 *ENUM_TYPE_TOKENS, 311 *NESTED_TYPE_TOKENS, 312 *AGGREGATE_TYPE_TOKENS, 313 } 314 315 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 316 TokenType.BIGINT: TokenType.UBIGINT, 317 TokenType.INT: TokenType.UINT, 318 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 319 TokenType.SMALLINT: TokenType.USMALLINT, 320 TokenType.TINYINT: TokenType.UTINYINT, 321 TokenType.DECIMAL: TokenType.UDECIMAL, 322 } 323 324 SUBQUERY_PREDICATES = { 325 TokenType.ANY: exp.Any, 326 TokenType.ALL: exp.All, 327 TokenType.EXISTS: exp.Exists, 328 TokenType.SOME: exp.Any, 329 } 330 331 RESERVED_TOKENS = { 332 *Tokenizer.SINGLE_TOKENS.values(), 333 TokenType.SELECT, 334 } - {TokenType.IDENTIFIER} 335 336 DB_CREATABLES = { 337 TokenType.DATABASE, 338 TokenType.DICTIONARY, 339 TokenType.MODEL, 340 TokenType.SCHEMA, 341 TokenType.SEQUENCE, 342 TokenType.STORAGE_INTEGRATION, 343 TokenType.TABLE, 344 TokenType.TAG, 345 TokenType.VIEW, 346 TokenType.WAREHOUSE, 347 TokenType.STREAMLIT, 348 } 349 350 CREATABLES = { 351 TokenType.COLUMN, 352 TokenType.CONSTRAINT, 353 TokenType.FOREIGN_KEY, 354 TokenType.FUNCTION, 355 TokenType.INDEX, 356 TokenType.PROCEDURE, 357 *DB_CREATABLES, 358 } 359 360 # Tokens that can represent identifiers 361 ID_VAR_TOKENS = { 362 TokenType.VAR, 363 TokenType.ANTI, 364 TokenType.APPLY, 365 TokenType.ASC, 366 TokenType.ASOF, 367 TokenType.AUTO_INCREMENT, 368 TokenType.BEGIN, 369 TokenType.BPCHAR, 370 TokenType.CACHE, 371 TokenType.CASE, 372 TokenType.COLLATE, 373 TokenType.COMMAND, 374 TokenType.COMMENT, 375 TokenType.COMMIT, 376 TokenType.CONSTRAINT, 377 TokenType.COPY, 378 TokenType.DEFAULT, 379 TokenType.DELETE, 380 TokenType.DESC, 381 TokenType.DESCRIBE, 382 TokenType.DICTIONARY, 383 TokenType.DIV, 384 TokenType.END, 385 TokenType.EXECUTE, 386 TokenType.ESCAPE, 387 TokenType.FALSE, 388 TokenType.FIRST, 389 TokenType.FILTER, 390 TokenType.FINAL, 391 TokenType.FORMAT, 392 TokenType.FULL, 393 TokenType.IDENTIFIER, 394 TokenType.IS, 395 TokenType.ISNULL, 396 TokenType.INTERVAL, 397 TokenType.KEEP, 398 TokenType.KILL, 399 TokenType.LEFT, 400 TokenType.LOAD, 401 TokenType.MERGE, 402 TokenType.NATURAL, 403 TokenType.NEXT, 404 TokenType.OFFSET, 405 TokenType.OPERATOR, 406 TokenType.ORDINALITY, 407 TokenType.OVERLAPS, 408 TokenType.OVERWRITE, 409 TokenType.PARTITION, 410 TokenType.PERCENT, 411 TokenType.PIVOT, 412 TokenType.PRAGMA, 413 TokenType.RANGE, 414 TokenType.RECURSIVE, 415 TokenType.REFERENCES, 416 TokenType.REFRESH, 417 TokenType.REPLACE, 418 TokenType.RIGHT, 419 TokenType.ROLLUP, 420 TokenType.ROW, 421 TokenType.ROWS, 422 TokenType.SEMI, 423 TokenType.SET, 424 TokenType.SETTINGS, 425 TokenType.SHOW, 426 TokenType.TEMPORARY, 427 TokenType.TOP, 428 TokenType.TRUE, 429 TokenType.TRUNCATE, 430 TokenType.UNIQUE, 431 TokenType.UNNEST, 432 TokenType.UNPIVOT, 433 TokenType.UPDATE, 434 TokenType.USE, 435 TokenType.VOLATILE, 436 TokenType.WINDOW, 437 *CREATABLES, 438 *SUBQUERY_PREDICATES, 439 *TYPE_TOKENS, 440 *NO_PAREN_FUNCTIONS, 441 } 442 443 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 444 445 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 446 TokenType.ANTI, 447 TokenType.APPLY, 448 TokenType.ASOF, 449 TokenType.FULL, 450 TokenType.LEFT, 451 TokenType.LOCK, 452 TokenType.NATURAL, 453 TokenType.OFFSET, 454 TokenType.RIGHT, 455 TokenType.SEMI, 456 TokenType.WINDOW, 457 } 458 459 ALIAS_TOKENS = ID_VAR_TOKENS 460 461 ARRAY_CONSTRUCTORS = { 462 "ARRAY": exp.Array, 463 "LIST": exp.List, 464 } 465 466 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 467 468 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 469 470 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 471 472 FUNC_TOKENS = { 473 TokenType.COLLATE, 474 TokenType.COMMAND, 475 TokenType.CURRENT_DATE, 476 TokenType.CURRENT_DATETIME, 477 TokenType.CURRENT_TIMESTAMP, 478 TokenType.CURRENT_TIME, 479 TokenType.CURRENT_USER, 480 TokenType.FILTER, 481 TokenType.FIRST, 482 TokenType.FORMAT, 483 TokenType.GLOB, 484 TokenType.IDENTIFIER, 485 TokenType.INDEX, 486 TokenType.ISNULL, 487 TokenType.ILIKE, 488 TokenType.INSERT, 489 TokenType.LIKE, 490 TokenType.MERGE, 491 TokenType.OFFSET, 492 TokenType.PRIMARY_KEY, 493 TokenType.RANGE, 494 TokenType.REPLACE, 495 TokenType.RLIKE, 496 TokenType.ROW, 497 TokenType.UNNEST, 498 TokenType.VAR, 499 TokenType.LEFT, 500 TokenType.RIGHT, 501 TokenType.SEQUENCE, 502 TokenType.DATE, 503 TokenType.DATETIME, 504 TokenType.TABLE, 505 TokenType.TIMESTAMP, 506 TokenType.TIMESTAMPTZ, 507 TokenType.TRUNCATE, 508 TokenType.WINDOW, 509 TokenType.XOR, 510 *TYPE_TOKENS, 511 *SUBQUERY_PREDICATES, 512 } 513 514 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 515 TokenType.AND: exp.And, 516 } 517 518 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 519 TokenType.COLON_EQ: exp.PropertyEQ, 520 } 521 522 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 523 TokenType.OR: exp.Or, 524 } 525 526 EQUALITY = { 527 TokenType.EQ: exp.EQ, 528 TokenType.NEQ: exp.NEQ, 529 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 530 } 531 532 COMPARISON = { 533 TokenType.GT: exp.GT, 534 TokenType.GTE: exp.GTE, 535 TokenType.LT: exp.LT, 536 TokenType.LTE: exp.LTE, 537 } 538 539 BITWISE = { 540 TokenType.AMP: exp.BitwiseAnd, 541 TokenType.CARET: exp.BitwiseXor, 542 TokenType.PIPE: exp.BitwiseOr, 543 } 544 545 TERM = { 546 TokenType.DASH: exp.Sub, 547 TokenType.PLUS: exp.Add, 548 TokenType.MOD: exp.Mod, 549 TokenType.COLLATE: exp.Collate, 550 } 551 552 FACTOR = { 553 TokenType.DIV: exp.IntDiv, 554 TokenType.LR_ARROW: exp.Distance, 555 TokenType.SLASH: exp.Div, 556 TokenType.STAR: exp.Mul, 557 } 558 559 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 560 561 TIMES = { 562 TokenType.TIME, 563 TokenType.TIMETZ, 564 } 565 566 TIMESTAMPS = { 567 TokenType.TIMESTAMP, 568 TokenType.TIMESTAMPTZ, 569 TokenType.TIMESTAMPLTZ, 570 *TIMES, 571 } 572 573 SET_OPERATIONS = { 574 TokenType.UNION, 575 TokenType.INTERSECT, 576 TokenType.EXCEPT, 577 } 578 579 JOIN_METHODS = { 580 TokenType.ASOF, 581 TokenType.NATURAL, 582 TokenType.POSITIONAL, 583 } 584 585 JOIN_SIDES = { 586 TokenType.LEFT, 587 TokenType.RIGHT, 588 TokenType.FULL, 589 } 590 591 JOIN_KINDS = { 592 TokenType.ANTI, 593 TokenType.CROSS, 594 TokenType.INNER, 595 TokenType.OUTER, 596 TokenType.SEMI, 597 TokenType.STRAIGHT_JOIN, 598 } 599 600 JOIN_HINTS: t.Set[str] = set() 601 602 LAMBDAS = { 603 TokenType.ARROW: lambda self, expressions: self.expression( 604 exp.Lambda, 605 this=self._replace_lambda( 606 self._parse_assignment(), 607 expressions, 608 ), 609 expressions=expressions, 610 ), 611 TokenType.FARROW: lambda self, expressions: self.expression( 612 exp.Kwarg, 613 this=exp.var(expressions[0].name), 614 expression=self._parse_assignment(), 615 ), 616 } 617 618 COLUMN_OPERATORS = { 619 TokenType.DOT: None, 620 TokenType.DCOLON: lambda self, this, to: self.expression( 621 exp.Cast if self.STRICT_CAST else exp.TryCast, 622 this=this, 623 to=to, 624 ), 625 TokenType.ARROW: lambda self, this, path: self.expression( 626 exp.JSONExtract, 627 this=this, 628 expression=self.dialect.to_json_path(path), 629 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 630 ), 631 TokenType.DARROW: lambda self, this, path: self.expression( 632 exp.JSONExtractScalar, 633 this=this, 634 expression=self.dialect.to_json_path(path), 635 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 636 ), 637 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 638 exp.JSONBExtract, 639 this=this, 640 expression=path, 641 ), 642 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 643 exp.JSONBExtractScalar, 644 this=this, 645 expression=path, 646 ), 647 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 648 exp.JSONBContains, 649 this=this, 650 expression=key, 651 ), 652 } 653 654 EXPRESSION_PARSERS = { 655 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 656 exp.Column: lambda self: self._parse_column(), 657 exp.Condition: lambda self: self._parse_assignment(), 658 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 659 exp.Expression: lambda self: self._parse_expression(), 660 exp.From: lambda self: self._parse_from(joins=True), 661 exp.Group: lambda self: self._parse_group(), 662 exp.Having: lambda self: self._parse_having(), 663 exp.Identifier: lambda self: self._parse_id_var(), 664 exp.Join: lambda self: self._parse_join(), 665 exp.Lambda: lambda self: self._parse_lambda(), 666 exp.Lateral: lambda self: self._parse_lateral(), 667 exp.Limit: lambda self: self._parse_limit(), 668 exp.Offset: lambda self: self._parse_offset(), 669 exp.Order: lambda self: self._parse_order(), 670 exp.Ordered: lambda self: self._parse_ordered(), 671 exp.Properties: lambda self: self._parse_properties(), 672 exp.Qualify: lambda self: self._parse_qualify(), 673 exp.Returning: lambda self: self._parse_returning(), 674 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 675 exp.Table: lambda self: self._parse_table_parts(), 676 exp.TableAlias: lambda self: self._parse_table_alias(), 677 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 678 exp.Where: lambda self: self._parse_where(), 679 exp.Window: lambda self: self._parse_named_window(), 680 exp.With: lambda self: self._parse_with(), 681 "JOIN_TYPE": lambda self: self._parse_join_parts(), 682 } 683 684 STATEMENT_PARSERS = { 685 TokenType.ALTER: lambda self: self._parse_alter(), 686 TokenType.BEGIN: lambda self: self._parse_transaction(), 687 TokenType.CACHE: lambda self: self._parse_cache(), 688 TokenType.COMMENT: lambda self: self._parse_comment(), 689 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 690 TokenType.COPY: lambda self: self._parse_copy(), 691 TokenType.CREATE: lambda self: self._parse_create(), 692 TokenType.DELETE: lambda self: self._parse_delete(), 693 TokenType.DESC: lambda self: self._parse_describe(), 694 TokenType.DESCRIBE: lambda self: self._parse_describe(), 695 TokenType.DROP: lambda self: self._parse_drop(), 696 TokenType.INSERT: lambda self: self._parse_insert(), 697 TokenType.KILL: lambda self: self._parse_kill(), 698 TokenType.LOAD: lambda self: self._parse_load(), 699 TokenType.MERGE: lambda self: self._parse_merge(), 700 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 701 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 702 TokenType.REFRESH: lambda self: self._parse_refresh(), 703 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 704 TokenType.SET: lambda self: self._parse_set(), 705 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 706 TokenType.UNCACHE: lambda self: self._parse_uncache(), 707 TokenType.UPDATE: lambda self: self._parse_update(), 708 TokenType.USE: lambda self: self.expression( 709 exp.Use, 710 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 711 this=self._parse_table(schema=False), 712 ), 713 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 714 } 715 716 UNARY_PARSERS = { 717 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 718 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 719 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 720 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 721 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 722 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 723 } 724 725 STRING_PARSERS = { 726 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 727 exp.RawString, this=token.text 728 ), 729 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 730 exp.National, this=token.text 731 ), 732 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 733 TokenType.STRING: lambda self, token: self.expression( 734 exp.Literal, this=token.text, is_string=True 735 ), 736 TokenType.UNICODE_STRING: lambda self, token: self.expression( 737 exp.UnicodeString, 738 this=token.text, 739 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 740 ), 741 } 742 743 NUMERIC_PARSERS = { 744 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 745 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 746 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 747 TokenType.NUMBER: lambda self, token: self.expression( 748 exp.Literal, this=token.text, is_string=False 749 ), 750 } 751 752 PRIMARY_PARSERS = { 753 **STRING_PARSERS, 754 **NUMERIC_PARSERS, 755 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 756 TokenType.NULL: lambda self, _: self.expression(exp.Null), 757 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 758 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 759 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 760 TokenType.STAR: lambda self, _: self.expression( 761 exp.Star, 762 **{ 763 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 764 "replace": self._parse_star_op("REPLACE"), 765 "rename": self._parse_star_op("RENAME"), 766 }, 767 ), 768 } 769 770 PLACEHOLDER_PARSERS = { 771 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 772 TokenType.PARAMETER: lambda self: self._parse_parameter(), 773 TokenType.COLON: lambda self: ( 774 self.expression(exp.Placeholder, this=self._prev.text) 775 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 776 else None 777 ), 778 } 779 780 RANGE_PARSERS = { 781 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 782 TokenType.GLOB: binary_range_parser(exp.Glob), 783 TokenType.ILIKE: binary_range_parser(exp.ILike), 784 TokenType.IN: lambda self, this: self._parse_in(this), 785 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 786 TokenType.IS: lambda self, this: self._parse_is(this), 787 TokenType.LIKE: binary_range_parser(exp.Like), 788 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 789 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 790 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 791 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 792 } 793 794 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 795 "ALLOWED_VALUES": lambda self: self.expression( 796 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 797 ), 798 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 799 "AUTO": lambda self: self._parse_auto_property(), 800 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 801 "BACKUP": lambda self: self.expression( 802 exp.BackupProperty, this=self._parse_var(any_token=True) 803 ), 804 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 805 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 806 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 807 "CHECKSUM": lambda self: self._parse_checksum(), 808 "CLUSTER BY": lambda self: self._parse_cluster(), 809 "CLUSTERED": lambda self: self._parse_clustered_by(), 810 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 811 exp.CollateProperty, **kwargs 812 ), 813 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 814 "CONTAINS": lambda self: self._parse_contains_property(), 815 "COPY": lambda self: self._parse_copy_property(), 816 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 817 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 818 "DEFINER": lambda self: self._parse_definer(), 819 "DETERMINISTIC": lambda self: self.expression( 820 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 821 ), 822 "DISTKEY": lambda self: self._parse_distkey(), 823 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 824 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 825 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 826 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 827 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 828 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 829 "FREESPACE": lambda self: self._parse_freespace(), 830 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 831 "HEAP": lambda self: self.expression(exp.HeapProperty), 832 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 833 "IMMUTABLE": lambda self: self.expression( 834 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 835 ), 836 "INHERITS": lambda self: self.expression( 837 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 838 ), 839 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 840 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 841 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 842 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 843 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 844 "LIKE": lambda self: self._parse_create_like(), 845 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 846 "LOCK": lambda self: self._parse_locking(), 847 "LOCKING": lambda self: self._parse_locking(), 848 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 849 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 850 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 851 "MODIFIES": lambda self: self._parse_modifies_property(), 852 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 853 "NO": lambda self: self._parse_no_property(), 854 "ON": lambda self: self._parse_on_property(), 855 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 856 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 857 "PARTITION": lambda self: self._parse_partitioned_of(), 858 "PARTITION BY": lambda self: self._parse_partitioned_by(), 859 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 860 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 861 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 862 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 863 "READS": lambda self: self._parse_reads_property(), 864 "REMOTE": lambda self: self._parse_remote_with_connection(), 865 "RETURNS": lambda self: self._parse_returns(), 866 "STRICT": lambda self: self.expression(exp.StrictProperty), 867 "ROW": lambda self: self._parse_row(), 868 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 869 "SAMPLE": lambda self: self.expression( 870 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 871 ), 872 "SECURE": lambda self: self.expression(exp.SecureProperty), 873 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 874 "SETTINGS": lambda self: self.expression( 875 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 876 ), 877 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 878 "SORTKEY": lambda self: self._parse_sortkey(), 879 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 880 "STABLE": lambda self: self.expression( 881 exp.StabilityProperty, this=exp.Literal.string("STABLE") 882 ), 883 "STORED": lambda self: self._parse_stored(), 884 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 885 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 886 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 887 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 888 "TO": lambda self: self._parse_to_table(), 889 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 890 "TRANSFORM": lambda self: self.expression( 891 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 892 ), 893 "TTL": lambda self: self._parse_ttl(), 894 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 895 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 896 "VOLATILE": lambda self: self._parse_volatile_property(), 897 "WITH": lambda self: self._parse_with_property(), 898 } 899 900 CONSTRAINT_PARSERS = { 901 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 902 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 903 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 904 "CHARACTER SET": lambda self: self.expression( 905 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 906 ), 907 "CHECK": lambda self: self.expression( 908 exp.CheckColumnConstraint, 909 this=self._parse_wrapped(self._parse_assignment), 910 enforced=self._match_text_seq("ENFORCED"), 911 ), 912 "COLLATE": lambda self: self.expression( 913 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 914 ), 915 "COMMENT": lambda self: self.expression( 916 exp.CommentColumnConstraint, this=self._parse_string() 917 ), 918 "COMPRESS": lambda self: self._parse_compress(), 919 "CLUSTERED": lambda self: self.expression( 920 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 921 ), 922 "NONCLUSTERED": lambda self: self.expression( 923 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 924 ), 925 "DEFAULT": lambda self: self.expression( 926 exp.DefaultColumnConstraint, this=self._parse_bitwise() 927 ), 928 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 929 "EPHEMERAL": lambda self: self.expression( 930 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 931 ), 932 "EXCLUDE": lambda self: self.expression( 933 exp.ExcludeColumnConstraint, this=self._parse_index_params() 934 ), 935 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 936 "FORMAT": lambda self: self.expression( 937 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 938 ), 939 "GENERATED": lambda self: self._parse_generated_as_identity(), 940 "IDENTITY": lambda self: self._parse_auto_increment(), 941 "INLINE": lambda self: self._parse_inline(), 942 "LIKE": lambda self: self._parse_create_like(), 943 "NOT": lambda self: self._parse_not_constraint(), 944 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 945 "ON": lambda self: ( 946 self._match(TokenType.UPDATE) 947 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 948 ) 949 or self.expression(exp.OnProperty, this=self._parse_id_var()), 950 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 951 "PERIOD": lambda self: self._parse_period_for_system_time(), 952 "PRIMARY KEY": lambda self: self._parse_primary_key(), 953 "REFERENCES": lambda self: self._parse_references(match=False), 954 "TITLE": lambda self: self.expression( 955 exp.TitleColumnConstraint, this=self._parse_var_or_string() 956 ), 957 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 958 "UNIQUE": lambda self: self._parse_unique(), 959 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 960 "WITH": lambda self: self.expression( 961 exp.Properties, expressions=self._parse_wrapped_properties() 962 ), 963 } 964 965 ALTER_PARSERS = { 966 "ADD": lambda self: self._parse_alter_table_add(), 967 "ALTER": lambda self: self._parse_alter_table_alter(), 968 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 969 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 970 "DROP": lambda self: self._parse_alter_table_drop(), 971 "RENAME": lambda self: self._parse_alter_table_rename(), 972 "SET": lambda self: self._parse_alter_table_set(), 973 } 974 975 ALTER_ALTER_PARSERS = { 976 "DISTKEY": lambda self: self._parse_alter_diststyle(), 977 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 978 "SORTKEY": lambda self: self._parse_alter_sortkey(), 979 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 980 } 981 982 SCHEMA_UNNAMED_CONSTRAINTS = { 983 "CHECK", 984 "EXCLUDE", 985 "FOREIGN KEY", 986 "LIKE", 987 "PERIOD", 988 "PRIMARY KEY", 989 "UNIQUE", 990 } 991 992 NO_PAREN_FUNCTION_PARSERS = { 993 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 994 "CASE": lambda self: self._parse_case(), 995 "IF": lambda self: self._parse_if(), 996 "NEXT": lambda self: self._parse_next_value_for(), 997 } 998 999 INVALID_FUNC_NAME_TOKENS = { 1000 TokenType.IDENTIFIER, 1001 TokenType.STRING, 1002 } 1003 1004 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1005 1006 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1007 1008 FUNCTION_PARSERS = { 1009 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1010 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1011 "DECODE": lambda self: self._parse_decode(), 1012 "EXTRACT": lambda self: self._parse_extract(), 1013 "GAP_FILL": lambda self: self._parse_gap_fill(), 1014 "JSON_OBJECT": lambda self: self._parse_json_object(), 1015 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1016 "JSON_TABLE": lambda self: self._parse_json_table(), 1017 "MATCH": lambda self: self._parse_match_against(), 1018 "OPENJSON": lambda self: self._parse_open_json(), 1019 "POSITION": lambda self: self._parse_position(), 1020 "PREDICT": lambda self: self._parse_predict(), 1021 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1022 "STRING_AGG": lambda self: self._parse_string_agg(), 1023 "SUBSTRING": lambda self: self._parse_substring(), 1024 "TRIM": lambda self: self._parse_trim(), 1025 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1026 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1027 } 1028 1029 QUERY_MODIFIER_PARSERS = { 1030 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1031 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1032 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1033 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1034 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1035 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1036 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1037 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1038 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1039 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1040 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1041 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1042 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1043 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1044 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1045 TokenType.CLUSTER_BY: lambda self: ( 1046 "cluster", 1047 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1048 ), 1049 TokenType.DISTRIBUTE_BY: lambda self: ( 1050 "distribute", 1051 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1052 ), 1053 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1054 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1055 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1056 } 1057 1058 SET_PARSERS = { 1059 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1060 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1061 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1062 "TRANSACTION": lambda self: self._parse_set_transaction(), 1063 } 1064 1065 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1066 1067 TYPE_LITERAL_PARSERS = { 1068 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1069 } 1070 1071 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1072 1073 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1074 1075 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1076 1077 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1078 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1079 "ISOLATION": ( 1080 ("LEVEL", "REPEATABLE", "READ"), 1081 ("LEVEL", "READ", "COMMITTED"), 1082 ("LEVEL", "READ", "UNCOMITTED"), 1083 ("LEVEL", "SERIALIZABLE"), 1084 ), 1085 "READ": ("WRITE", "ONLY"), 1086 } 1087 1088 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1089 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1090 ) 1091 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1092 1093 CREATE_SEQUENCE: OPTIONS_TYPE = { 1094 "SCALE": ("EXTEND", "NOEXTEND"), 1095 "SHARD": ("EXTEND", "NOEXTEND"), 1096 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1097 **dict.fromkeys( 1098 ( 1099 "SESSION", 1100 "GLOBAL", 1101 "KEEP", 1102 "NOKEEP", 1103 "ORDER", 1104 "NOORDER", 1105 "NOCACHE", 1106 "CYCLE", 1107 "NOCYCLE", 1108 "NOMINVALUE", 1109 "NOMAXVALUE", 1110 "NOSCALE", 1111 "NOSHARD", 1112 ), 1113 tuple(), 1114 ), 1115 } 1116 1117 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1118 1119 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1120 1121 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1122 1123 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1124 1125 CLONE_KEYWORDS = {"CLONE", "COPY"} 1126 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1127 1128 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1129 1130 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1131 1132 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1133 1134 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1135 1136 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1137 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1138 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1139 1140 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1141 1142 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1143 1144 ADD_CONSTRAINT_TOKENS = { 1145 TokenType.CONSTRAINT, 1146 TokenType.FOREIGN_KEY, 1147 TokenType.INDEX, 1148 TokenType.KEY, 1149 TokenType.PRIMARY_KEY, 1150 TokenType.UNIQUE, 1151 } 1152 1153 DISTINCT_TOKENS = {TokenType.DISTINCT} 1154 1155 NULL_TOKENS = {TokenType.NULL} 1156 1157 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1158 1159 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1160 1161 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1162 1163 STRICT_CAST = True 1164 1165 PREFIXED_PIVOT_COLUMNS = False 1166 IDENTIFY_PIVOT_STRINGS = False 1167 1168 LOG_DEFAULTS_TO_LN = False 1169 1170 # Whether ADD is present for each column added by ALTER TABLE 1171 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1172 1173 # Whether the table sample clause expects CSV syntax 1174 TABLESAMPLE_CSV = False 1175 1176 # The default method used for table sampling 1177 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1178 1179 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1180 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1181 1182 # Whether the TRIM function expects the characters to trim as its first argument 1183 TRIM_PATTERN_FIRST = False 1184 1185 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1186 STRING_ALIASES = False 1187 1188 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1189 MODIFIERS_ATTACHED_TO_SET_OP = True 1190 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1191 1192 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1193 NO_PAREN_IF_COMMANDS = True 1194 1195 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1196 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1197 1198 # Whether the `:` operator is used to extract a value from a JSON document 1199 COLON_IS_JSON_EXTRACT = False 1200 1201 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1202 # If this is True and '(' is not found, the keyword will be treated as an identifier 1203 VALUES_FOLLOWED_BY_PAREN = True 1204 1205 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1206 SUPPORTS_IMPLICIT_UNNEST = False 1207 1208 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1209 INTERVAL_SPANS = True 1210 1211 # Whether a PARTITION clause can follow a table reference 1212 SUPPORTS_PARTITION_SELECTION = False 1213 1214 __slots__ = ( 1215 "error_level", 1216 "error_message_context", 1217 "max_errors", 1218 "dialect", 1219 "sql", 1220 "errors", 1221 "_tokens", 1222 "_index", 1223 "_curr", 1224 "_next", 1225 "_prev", 1226 "_prev_comments", 1227 ) 1228 1229 # Autofilled 1230 SHOW_TRIE: t.Dict = {} 1231 SET_TRIE: t.Dict = {} 1232 1233 def __init__( 1234 self, 1235 error_level: t.Optional[ErrorLevel] = None, 1236 error_message_context: int = 100, 1237 max_errors: int = 3, 1238 dialect: DialectType = None, 1239 ): 1240 from sqlglot.dialects import Dialect 1241 1242 self.error_level = error_level or ErrorLevel.IMMEDIATE 1243 self.error_message_context = error_message_context 1244 self.max_errors = max_errors 1245 self.dialect = Dialect.get_or_raise(dialect) 1246 self.reset() 1247 1248 def reset(self): 1249 self.sql = "" 1250 self.errors = [] 1251 self._tokens = [] 1252 self._index = 0 1253 self._curr = None 1254 self._next = None 1255 self._prev = None 1256 self._prev_comments = None 1257 1258 def parse( 1259 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1260 ) -> t.List[t.Optional[exp.Expression]]: 1261 """ 1262 Parses a list of tokens and returns a list of syntax trees, one tree 1263 per parsed SQL statement. 1264 1265 Args: 1266 raw_tokens: The list of tokens. 1267 sql: The original SQL string, used to produce helpful debug messages. 1268 1269 Returns: 1270 The list of the produced syntax trees. 1271 """ 1272 return self._parse( 1273 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1274 ) 1275 1276 def parse_into( 1277 self, 1278 expression_types: exp.IntoType, 1279 raw_tokens: t.List[Token], 1280 sql: t.Optional[str] = None, 1281 ) -> t.List[t.Optional[exp.Expression]]: 1282 """ 1283 Parses a list of tokens into a given Expression type. If a collection of Expression 1284 types is given instead, this method will try to parse the token list into each one 1285 of them, stopping at the first for which the parsing succeeds. 1286 1287 Args: 1288 expression_types: The expression type(s) to try and parse the token list into. 1289 raw_tokens: The list of tokens. 1290 sql: The original SQL string, used to produce helpful debug messages. 1291 1292 Returns: 1293 The target Expression. 1294 """ 1295 errors = [] 1296 for expression_type in ensure_list(expression_types): 1297 parser = self.EXPRESSION_PARSERS.get(expression_type) 1298 if not parser: 1299 raise TypeError(f"No parser registered for {expression_type}") 1300 1301 try: 1302 return self._parse(parser, raw_tokens, sql) 1303 except ParseError as e: 1304 e.errors[0]["into_expression"] = expression_type 1305 errors.append(e) 1306 1307 raise ParseError( 1308 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1309 errors=merge_errors(errors), 1310 ) from errors[-1] 1311 1312 def _parse( 1313 self, 1314 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1315 raw_tokens: t.List[Token], 1316 sql: t.Optional[str] = None, 1317 ) -> t.List[t.Optional[exp.Expression]]: 1318 self.reset() 1319 self.sql = sql or "" 1320 1321 total = len(raw_tokens) 1322 chunks: t.List[t.List[Token]] = [[]] 1323 1324 for i, token in enumerate(raw_tokens): 1325 if token.token_type == TokenType.SEMICOLON: 1326 if token.comments: 1327 chunks.append([token]) 1328 1329 if i < total - 1: 1330 chunks.append([]) 1331 else: 1332 chunks[-1].append(token) 1333 1334 expressions = [] 1335 1336 for tokens in chunks: 1337 self._index = -1 1338 self._tokens = tokens 1339 self._advance() 1340 1341 expressions.append(parse_method(self)) 1342 1343 if self._index < len(self._tokens): 1344 self.raise_error("Invalid expression / Unexpected token") 1345 1346 self.check_errors() 1347 1348 return expressions 1349 1350 def check_errors(self) -> None: 1351 """Logs or raises any found errors, depending on the chosen error level setting.""" 1352 if self.error_level == ErrorLevel.WARN: 1353 for error in self.errors: 1354 logger.error(str(error)) 1355 elif self.error_level == ErrorLevel.RAISE and self.errors: 1356 raise ParseError( 1357 concat_messages(self.errors, self.max_errors), 1358 errors=merge_errors(self.errors), 1359 ) 1360 1361 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1362 """ 1363 Appends an error in the list of recorded errors or raises it, depending on the chosen 1364 error level setting. 1365 """ 1366 token = token or self._curr or self._prev or Token.string("") 1367 start = token.start 1368 end = token.end + 1 1369 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1370 highlight = self.sql[start:end] 1371 end_context = self.sql[end : end + self.error_message_context] 1372 1373 error = ParseError.new( 1374 f"{message}. Line {token.line}, Col: {token.col}.\n" 1375 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1376 description=message, 1377 line=token.line, 1378 col=token.col, 1379 start_context=start_context, 1380 highlight=highlight, 1381 end_context=end_context, 1382 ) 1383 1384 if self.error_level == ErrorLevel.IMMEDIATE: 1385 raise error 1386 1387 self.errors.append(error) 1388 1389 def expression( 1390 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1391 ) -> E: 1392 """ 1393 Creates a new, validated Expression. 1394 1395 Args: 1396 exp_class: The expression class to instantiate. 1397 comments: An optional list of comments to attach to the expression. 1398 kwargs: The arguments to set for the expression along with their respective values. 1399 1400 Returns: 1401 The target expression. 1402 """ 1403 instance = exp_class(**kwargs) 1404 instance.add_comments(comments) if comments else self._add_comments(instance) 1405 return self.validate_expression(instance) 1406 1407 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1408 if expression and self._prev_comments: 1409 expression.add_comments(self._prev_comments) 1410 self._prev_comments = None 1411 1412 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1413 """ 1414 Validates an Expression, making sure that all its mandatory arguments are set. 1415 1416 Args: 1417 expression: The expression to validate. 1418 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1419 1420 Returns: 1421 The validated expression. 1422 """ 1423 if self.error_level != ErrorLevel.IGNORE: 1424 for error_message in expression.error_messages(args): 1425 self.raise_error(error_message) 1426 1427 return expression 1428 1429 def _find_sql(self, start: Token, end: Token) -> str: 1430 return self.sql[start.start : end.end + 1] 1431 1432 def _is_connected(self) -> bool: 1433 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1434 1435 def _advance(self, times: int = 1) -> None: 1436 self._index += times 1437 self._curr = seq_get(self._tokens, self._index) 1438 self._next = seq_get(self._tokens, self._index + 1) 1439 1440 if self._index > 0: 1441 self._prev = self._tokens[self._index - 1] 1442 self._prev_comments = self._prev.comments 1443 else: 1444 self._prev = None 1445 self._prev_comments = None 1446 1447 def _retreat(self, index: int) -> None: 1448 if index != self._index: 1449 self._advance(index - self._index) 1450 1451 def _warn_unsupported(self) -> None: 1452 if len(self._tokens) <= 1: 1453 return 1454 1455 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1456 # interested in emitting a warning for the one being currently processed. 1457 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1458 1459 logger.warning( 1460 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1461 ) 1462 1463 def _parse_command(self) -> exp.Command: 1464 self._warn_unsupported() 1465 return self.expression( 1466 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1467 ) 1468 1469 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1470 """ 1471 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1472 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1473 the parser state accordingly 1474 """ 1475 index = self._index 1476 error_level = self.error_level 1477 1478 self.error_level = ErrorLevel.IMMEDIATE 1479 try: 1480 this = parse_method() 1481 except ParseError: 1482 this = None 1483 finally: 1484 if not this or retreat: 1485 self._retreat(index) 1486 self.error_level = error_level 1487 1488 return this 1489 1490 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1491 start = self._prev 1492 exists = self._parse_exists() if allow_exists else None 1493 1494 self._match(TokenType.ON) 1495 1496 materialized = self._match_text_seq("MATERIALIZED") 1497 kind = self._match_set(self.CREATABLES) and self._prev 1498 if not kind: 1499 return self._parse_as_command(start) 1500 1501 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1502 this = self._parse_user_defined_function(kind=kind.token_type) 1503 elif kind.token_type == TokenType.TABLE: 1504 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1505 elif kind.token_type == TokenType.COLUMN: 1506 this = self._parse_column() 1507 else: 1508 this = self._parse_id_var() 1509 1510 self._match(TokenType.IS) 1511 1512 return self.expression( 1513 exp.Comment, 1514 this=this, 1515 kind=kind.text, 1516 expression=self._parse_string(), 1517 exists=exists, 1518 materialized=materialized, 1519 ) 1520 1521 def _parse_to_table( 1522 self, 1523 ) -> exp.ToTableProperty: 1524 table = self._parse_table_parts(schema=True) 1525 return self.expression(exp.ToTableProperty, this=table) 1526 1527 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1528 def _parse_ttl(self) -> exp.Expression: 1529 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1530 this = self._parse_bitwise() 1531 1532 if self._match_text_seq("DELETE"): 1533 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1534 if self._match_text_seq("RECOMPRESS"): 1535 return self.expression( 1536 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1537 ) 1538 if self._match_text_seq("TO", "DISK"): 1539 return self.expression( 1540 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1541 ) 1542 if self._match_text_seq("TO", "VOLUME"): 1543 return self.expression( 1544 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1545 ) 1546 1547 return this 1548 1549 expressions = self._parse_csv(_parse_ttl_action) 1550 where = self._parse_where() 1551 group = self._parse_group() 1552 1553 aggregates = None 1554 if group and self._match(TokenType.SET): 1555 aggregates = self._parse_csv(self._parse_set_item) 1556 1557 return self.expression( 1558 exp.MergeTreeTTL, 1559 expressions=expressions, 1560 where=where, 1561 group=group, 1562 aggregates=aggregates, 1563 ) 1564 1565 def _parse_statement(self) -> t.Optional[exp.Expression]: 1566 if self._curr is None: 1567 return None 1568 1569 if self._match_set(self.STATEMENT_PARSERS): 1570 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1571 1572 if self._match_set(self.dialect.tokenizer.COMMANDS): 1573 return self._parse_command() 1574 1575 expression = self._parse_expression() 1576 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1577 return self._parse_query_modifiers(expression) 1578 1579 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1580 start = self._prev 1581 temporary = self._match(TokenType.TEMPORARY) 1582 materialized = self._match_text_seq("MATERIALIZED") 1583 1584 kind = self._match_set(self.CREATABLES) and self._prev.text 1585 if not kind: 1586 return self._parse_as_command(start) 1587 1588 if_exists = exists or self._parse_exists() 1589 table = self._parse_table_parts( 1590 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1591 ) 1592 1593 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1594 1595 if self._match(TokenType.L_PAREN, advance=False): 1596 expressions = self._parse_wrapped_csv(self._parse_types) 1597 else: 1598 expressions = None 1599 1600 return self.expression( 1601 exp.Drop, 1602 comments=start.comments, 1603 exists=if_exists, 1604 this=table, 1605 expressions=expressions, 1606 kind=kind.upper(), 1607 temporary=temporary, 1608 materialized=materialized, 1609 cascade=self._match_text_seq("CASCADE"), 1610 constraints=self._match_text_seq("CONSTRAINTS"), 1611 purge=self._match_text_seq("PURGE"), 1612 cluster=cluster, 1613 ) 1614 1615 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1616 return ( 1617 self._match_text_seq("IF") 1618 and (not not_ or self._match(TokenType.NOT)) 1619 and self._match(TokenType.EXISTS) 1620 ) 1621 1622 def _parse_create(self) -> exp.Create | exp.Command: 1623 # Note: this can't be None because we've matched a statement parser 1624 start = self._prev 1625 comments = self._prev_comments 1626 1627 replace = ( 1628 start.token_type == TokenType.REPLACE 1629 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1630 or self._match_pair(TokenType.OR, TokenType.ALTER) 1631 ) 1632 1633 unique = self._match(TokenType.UNIQUE) 1634 1635 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1636 self._advance() 1637 1638 properties = None 1639 create_token = self._match_set(self.CREATABLES) and self._prev 1640 1641 if not create_token: 1642 # exp.Properties.Location.POST_CREATE 1643 properties = self._parse_properties() 1644 create_token = self._match_set(self.CREATABLES) and self._prev 1645 1646 if not properties or not create_token: 1647 return self._parse_as_command(start) 1648 1649 exists = self._parse_exists(not_=True) 1650 this = None 1651 expression: t.Optional[exp.Expression] = None 1652 indexes = None 1653 no_schema_binding = None 1654 begin = None 1655 end = None 1656 clone = None 1657 1658 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1659 nonlocal properties 1660 if properties and temp_props: 1661 properties.expressions.extend(temp_props.expressions) 1662 elif temp_props: 1663 properties = temp_props 1664 1665 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1666 this = self._parse_user_defined_function(kind=create_token.token_type) 1667 1668 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1669 extend_props(self._parse_properties()) 1670 1671 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1672 extend_props(self._parse_properties()) 1673 1674 if not expression: 1675 if self._match(TokenType.COMMAND): 1676 expression = self._parse_as_command(self._prev) 1677 else: 1678 begin = self._match(TokenType.BEGIN) 1679 return_ = self._match_text_seq("RETURN") 1680 1681 if self._match(TokenType.STRING, advance=False): 1682 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1683 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1684 expression = self._parse_string() 1685 extend_props(self._parse_properties()) 1686 else: 1687 expression = self._parse_statement() 1688 1689 end = self._match_text_seq("END") 1690 1691 if return_: 1692 expression = self.expression(exp.Return, this=expression) 1693 elif create_token.token_type == TokenType.INDEX: 1694 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1695 if not self._match(TokenType.ON): 1696 index = self._parse_id_var() 1697 anonymous = False 1698 else: 1699 index = None 1700 anonymous = True 1701 1702 this = self._parse_index(index=index, anonymous=anonymous) 1703 elif create_token.token_type in self.DB_CREATABLES: 1704 table_parts = self._parse_table_parts( 1705 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1706 ) 1707 1708 # exp.Properties.Location.POST_NAME 1709 self._match(TokenType.COMMA) 1710 extend_props(self._parse_properties(before=True)) 1711 1712 this = self._parse_schema(this=table_parts) 1713 1714 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1715 extend_props(self._parse_properties()) 1716 1717 self._match(TokenType.ALIAS) 1718 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1719 # exp.Properties.Location.POST_ALIAS 1720 extend_props(self._parse_properties()) 1721 1722 if create_token.token_type == TokenType.SEQUENCE: 1723 expression = self._parse_types() 1724 extend_props(self._parse_properties()) 1725 else: 1726 expression = self._parse_ddl_select() 1727 1728 if create_token.token_type == TokenType.TABLE: 1729 # exp.Properties.Location.POST_EXPRESSION 1730 extend_props(self._parse_properties()) 1731 1732 indexes = [] 1733 while True: 1734 index = self._parse_index() 1735 1736 # exp.Properties.Location.POST_INDEX 1737 extend_props(self._parse_properties()) 1738 1739 if not index: 1740 break 1741 else: 1742 self._match(TokenType.COMMA) 1743 indexes.append(index) 1744 elif create_token.token_type == TokenType.VIEW: 1745 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1746 no_schema_binding = True 1747 1748 shallow = self._match_text_seq("SHALLOW") 1749 1750 if self._match_texts(self.CLONE_KEYWORDS): 1751 copy = self._prev.text.lower() == "copy" 1752 clone = self.expression( 1753 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1754 ) 1755 1756 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1757 return self._parse_as_command(start) 1758 1759 return self.expression( 1760 exp.Create, 1761 comments=comments, 1762 this=this, 1763 kind=create_token.text.upper(), 1764 replace=replace, 1765 unique=unique, 1766 expression=expression, 1767 exists=exists, 1768 properties=properties, 1769 indexes=indexes, 1770 no_schema_binding=no_schema_binding, 1771 begin=begin, 1772 end=end, 1773 clone=clone, 1774 ) 1775 1776 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1777 seq = exp.SequenceProperties() 1778 1779 options = [] 1780 index = self._index 1781 1782 while self._curr: 1783 self._match(TokenType.COMMA) 1784 if self._match_text_seq("INCREMENT"): 1785 self._match_text_seq("BY") 1786 self._match_text_seq("=") 1787 seq.set("increment", self._parse_term()) 1788 elif self._match_text_seq("MINVALUE"): 1789 seq.set("minvalue", self._parse_term()) 1790 elif self._match_text_seq("MAXVALUE"): 1791 seq.set("maxvalue", self._parse_term()) 1792 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1793 self._match_text_seq("=") 1794 seq.set("start", self._parse_term()) 1795 elif self._match_text_seq("CACHE"): 1796 # T-SQL allows empty CACHE which is initialized dynamically 1797 seq.set("cache", self._parse_number() or True) 1798 elif self._match_text_seq("OWNED", "BY"): 1799 # "OWNED BY NONE" is the default 1800 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1801 else: 1802 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1803 if opt: 1804 options.append(opt) 1805 else: 1806 break 1807 1808 seq.set("options", options if options else None) 1809 return None if self._index == index else seq 1810 1811 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1812 # only used for teradata currently 1813 self._match(TokenType.COMMA) 1814 1815 kwargs = { 1816 "no": self._match_text_seq("NO"), 1817 "dual": self._match_text_seq("DUAL"), 1818 "before": self._match_text_seq("BEFORE"), 1819 "default": self._match_text_seq("DEFAULT"), 1820 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1821 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1822 "after": self._match_text_seq("AFTER"), 1823 "minimum": self._match_texts(("MIN", "MINIMUM")), 1824 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1825 } 1826 1827 if self._match_texts(self.PROPERTY_PARSERS): 1828 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1829 try: 1830 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1831 except TypeError: 1832 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1833 1834 return None 1835 1836 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1837 return self._parse_wrapped_csv(self._parse_property) 1838 1839 def _parse_property(self) -> t.Optional[exp.Expression]: 1840 if self._match_texts(self.PROPERTY_PARSERS): 1841 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1842 1843 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1844 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1845 1846 if self._match_text_seq("COMPOUND", "SORTKEY"): 1847 return self._parse_sortkey(compound=True) 1848 1849 if self._match_text_seq("SQL", "SECURITY"): 1850 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1851 1852 index = self._index 1853 key = self._parse_column() 1854 1855 if not self._match(TokenType.EQ): 1856 self._retreat(index) 1857 return self._parse_sequence_properties() 1858 1859 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1860 if isinstance(key, exp.Column): 1861 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1862 1863 value = self._parse_bitwise() or self._parse_var(any_token=True) 1864 1865 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1866 if isinstance(value, exp.Column): 1867 value = exp.var(value.name) 1868 1869 return self.expression(exp.Property, this=key, value=value) 1870 1871 def _parse_stored(self) -> exp.FileFormatProperty: 1872 self._match(TokenType.ALIAS) 1873 1874 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1875 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1876 1877 return self.expression( 1878 exp.FileFormatProperty, 1879 this=( 1880 self.expression( 1881 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1882 ) 1883 if input_format or output_format 1884 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1885 ), 1886 ) 1887 1888 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1889 field = self._parse_field() 1890 if isinstance(field, exp.Identifier) and not field.quoted: 1891 field = exp.var(field) 1892 1893 return field 1894 1895 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1896 self._match(TokenType.EQ) 1897 self._match(TokenType.ALIAS) 1898 1899 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1900 1901 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1902 properties = [] 1903 while True: 1904 if before: 1905 prop = self._parse_property_before() 1906 else: 1907 prop = self._parse_property() 1908 if not prop: 1909 break 1910 for p in ensure_list(prop): 1911 properties.append(p) 1912 1913 if properties: 1914 return self.expression(exp.Properties, expressions=properties) 1915 1916 return None 1917 1918 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1919 return self.expression( 1920 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1921 ) 1922 1923 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1924 if self._index >= 2: 1925 pre_volatile_token = self._tokens[self._index - 2] 1926 else: 1927 pre_volatile_token = None 1928 1929 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1930 return exp.VolatileProperty() 1931 1932 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1933 1934 def _parse_retention_period(self) -> exp.Var: 1935 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1936 number = self._parse_number() 1937 number_str = f"{number} " if number else "" 1938 unit = self._parse_var(any_token=True) 1939 return exp.var(f"{number_str}{unit}") 1940 1941 def _parse_system_versioning_property( 1942 self, with_: bool = False 1943 ) -> exp.WithSystemVersioningProperty: 1944 self._match(TokenType.EQ) 1945 prop = self.expression( 1946 exp.WithSystemVersioningProperty, 1947 **{ # type: ignore 1948 "on": True, 1949 "with": with_, 1950 }, 1951 ) 1952 1953 if self._match_text_seq("OFF"): 1954 prop.set("on", False) 1955 return prop 1956 1957 self._match(TokenType.ON) 1958 if self._match(TokenType.L_PAREN): 1959 while self._curr and not self._match(TokenType.R_PAREN): 1960 if self._match_text_seq("HISTORY_TABLE", "="): 1961 prop.set("this", self._parse_table_parts()) 1962 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1963 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1964 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1965 prop.set("retention_period", self._parse_retention_period()) 1966 1967 self._match(TokenType.COMMA) 1968 1969 return prop 1970 1971 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1972 self._match(TokenType.EQ) 1973 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1974 prop = self.expression(exp.DataDeletionProperty, on=on) 1975 1976 if self._match(TokenType.L_PAREN): 1977 while self._curr and not self._match(TokenType.R_PAREN): 1978 if self._match_text_seq("FILTER_COLUMN", "="): 1979 prop.set("filter_column", self._parse_column()) 1980 elif self._match_text_seq("RETENTION_PERIOD", "="): 1981 prop.set("retention_period", self._parse_retention_period()) 1982 1983 self._match(TokenType.COMMA) 1984 1985 return prop 1986 1987 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1988 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1989 prop = self._parse_system_versioning_property(with_=True) 1990 self._match_r_paren() 1991 return prop 1992 1993 if self._match(TokenType.L_PAREN, advance=False): 1994 return self._parse_wrapped_properties() 1995 1996 if self._match_text_seq("JOURNAL"): 1997 return self._parse_withjournaltable() 1998 1999 if self._match_texts(self.VIEW_ATTRIBUTES): 2000 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2001 2002 if self._match_text_seq("DATA"): 2003 return self._parse_withdata(no=False) 2004 elif self._match_text_seq("NO", "DATA"): 2005 return self._parse_withdata(no=True) 2006 2007 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2008 return self._parse_serde_properties(with_=True) 2009 2010 if not self._next: 2011 return None 2012 2013 return self._parse_withisolatedloading() 2014 2015 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2016 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2017 self._match(TokenType.EQ) 2018 2019 user = self._parse_id_var() 2020 self._match(TokenType.PARAMETER) 2021 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2022 2023 if not user or not host: 2024 return None 2025 2026 return exp.DefinerProperty(this=f"{user}@{host}") 2027 2028 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2029 self._match(TokenType.TABLE) 2030 self._match(TokenType.EQ) 2031 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2032 2033 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2034 return self.expression(exp.LogProperty, no=no) 2035 2036 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2037 return self.expression(exp.JournalProperty, **kwargs) 2038 2039 def _parse_checksum(self) -> exp.ChecksumProperty: 2040 self._match(TokenType.EQ) 2041 2042 on = None 2043 if self._match(TokenType.ON): 2044 on = True 2045 elif self._match_text_seq("OFF"): 2046 on = False 2047 2048 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2049 2050 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2051 return self.expression( 2052 exp.Cluster, 2053 expressions=( 2054 self._parse_wrapped_csv(self._parse_ordered) 2055 if wrapped 2056 else self._parse_csv(self._parse_ordered) 2057 ), 2058 ) 2059 2060 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2061 self._match_text_seq("BY") 2062 2063 self._match_l_paren() 2064 expressions = self._parse_csv(self._parse_column) 2065 self._match_r_paren() 2066 2067 if self._match_text_seq("SORTED", "BY"): 2068 self._match_l_paren() 2069 sorted_by = self._parse_csv(self._parse_ordered) 2070 self._match_r_paren() 2071 else: 2072 sorted_by = None 2073 2074 self._match(TokenType.INTO) 2075 buckets = self._parse_number() 2076 self._match_text_seq("BUCKETS") 2077 2078 return self.expression( 2079 exp.ClusteredByProperty, 2080 expressions=expressions, 2081 sorted_by=sorted_by, 2082 buckets=buckets, 2083 ) 2084 2085 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2086 if not self._match_text_seq("GRANTS"): 2087 self._retreat(self._index - 1) 2088 return None 2089 2090 return self.expression(exp.CopyGrantsProperty) 2091 2092 def _parse_freespace(self) -> exp.FreespaceProperty: 2093 self._match(TokenType.EQ) 2094 return self.expression( 2095 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2096 ) 2097 2098 def _parse_mergeblockratio( 2099 self, no: bool = False, default: bool = False 2100 ) -> exp.MergeBlockRatioProperty: 2101 if self._match(TokenType.EQ): 2102 return self.expression( 2103 exp.MergeBlockRatioProperty, 2104 this=self._parse_number(), 2105 percent=self._match(TokenType.PERCENT), 2106 ) 2107 2108 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2109 2110 def _parse_datablocksize( 2111 self, 2112 default: t.Optional[bool] = None, 2113 minimum: t.Optional[bool] = None, 2114 maximum: t.Optional[bool] = None, 2115 ) -> exp.DataBlocksizeProperty: 2116 self._match(TokenType.EQ) 2117 size = self._parse_number() 2118 2119 units = None 2120 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2121 units = self._prev.text 2122 2123 return self.expression( 2124 exp.DataBlocksizeProperty, 2125 size=size, 2126 units=units, 2127 default=default, 2128 minimum=minimum, 2129 maximum=maximum, 2130 ) 2131 2132 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2133 self._match(TokenType.EQ) 2134 always = self._match_text_seq("ALWAYS") 2135 manual = self._match_text_seq("MANUAL") 2136 never = self._match_text_seq("NEVER") 2137 default = self._match_text_seq("DEFAULT") 2138 2139 autotemp = None 2140 if self._match_text_seq("AUTOTEMP"): 2141 autotemp = self._parse_schema() 2142 2143 return self.expression( 2144 exp.BlockCompressionProperty, 2145 always=always, 2146 manual=manual, 2147 never=never, 2148 default=default, 2149 autotemp=autotemp, 2150 ) 2151 2152 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2153 index = self._index 2154 no = self._match_text_seq("NO") 2155 concurrent = self._match_text_seq("CONCURRENT") 2156 2157 if not self._match_text_seq("ISOLATED", "LOADING"): 2158 self._retreat(index) 2159 return None 2160 2161 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2162 return self.expression( 2163 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2164 ) 2165 2166 def _parse_locking(self) -> exp.LockingProperty: 2167 if self._match(TokenType.TABLE): 2168 kind = "TABLE" 2169 elif self._match(TokenType.VIEW): 2170 kind = "VIEW" 2171 elif self._match(TokenType.ROW): 2172 kind = "ROW" 2173 elif self._match_text_seq("DATABASE"): 2174 kind = "DATABASE" 2175 else: 2176 kind = None 2177 2178 if kind in ("DATABASE", "TABLE", "VIEW"): 2179 this = self._parse_table_parts() 2180 else: 2181 this = None 2182 2183 if self._match(TokenType.FOR): 2184 for_or_in = "FOR" 2185 elif self._match(TokenType.IN): 2186 for_or_in = "IN" 2187 else: 2188 for_or_in = None 2189 2190 if self._match_text_seq("ACCESS"): 2191 lock_type = "ACCESS" 2192 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2193 lock_type = "EXCLUSIVE" 2194 elif self._match_text_seq("SHARE"): 2195 lock_type = "SHARE" 2196 elif self._match_text_seq("READ"): 2197 lock_type = "READ" 2198 elif self._match_text_seq("WRITE"): 2199 lock_type = "WRITE" 2200 elif self._match_text_seq("CHECKSUM"): 2201 lock_type = "CHECKSUM" 2202 else: 2203 lock_type = None 2204 2205 override = self._match_text_seq("OVERRIDE") 2206 2207 return self.expression( 2208 exp.LockingProperty, 2209 this=this, 2210 kind=kind, 2211 for_or_in=for_or_in, 2212 lock_type=lock_type, 2213 override=override, 2214 ) 2215 2216 def _parse_partition_by(self) -> t.List[exp.Expression]: 2217 if self._match(TokenType.PARTITION_BY): 2218 return self._parse_csv(self._parse_assignment) 2219 return [] 2220 2221 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2222 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2223 if self._match_text_seq("MINVALUE"): 2224 return exp.var("MINVALUE") 2225 if self._match_text_seq("MAXVALUE"): 2226 return exp.var("MAXVALUE") 2227 return self._parse_bitwise() 2228 2229 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2230 expression = None 2231 from_expressions = None 2232 to_expressions = None 2233 2234 if self._match(TokenType.IN): 2235 this = self._parse_wrapped_csv(self._parse_bitwise) 2236 elif self._match(TokenType.FROM): 2237 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2238 self._match_text_seq("TO") 2239 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2240 elif self._match_text_seq("WITH", "(", "MODULUS"): 2241 this = self._parse_number() 2242 self._match_text_seq(",", "REMAINDER") 2243 expression = self._parse_number() 2244 self._match_r_paren() 2245 else: 2246 self.raise_error("Failed to parse partition bound spec.") 2247 2248 return self.expression( 2249 exp.PartitionBoundSpec, 2250 this=this, 2251 expression=expression, 2252 from_expressions=from_expressions, 2253 to_expressions=to_expressions, 2254 ) 2255 2256 # https://www.postgresql.org/docs/current/sql-createtable.html 2257 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2258 if not self._match_text_seq("OF"): 2259 self._retreat(self._index - 1) 2260 return None 2261 2262 this = self._parse_table(schema=True) 2263 2264 if self._match(TokenType.DEFAULT): 2265 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2266 elif self._match_text_seq("FOR", "VALUES"): 2267 expression = self._parse_partition_bound_spec() 2268 else: 2269 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2270 2271 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2272 2273 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2274 self._match(TokenType.EQ) 2275 return self.expression( 2276 exp.PartitionedByProperty, 2277 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2278 ) 2279 2280 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2281 if self._match_text_seq("AND", "STATISTICS"): 2282 statistics = True 2283 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2284 statistics = False 2285 else: 2286 statistics = None 2287 2288 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2289 2290 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2291 if self._match_text_seq("SQL"): 2292 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2293 return None 2294 2295 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2296 if self._match_text_seq("SQL", "DATA"): 2297 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2298 return None 2299 2300 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2301 if self._match_text_seq("PRIMARY", "INDEX"): 2302 return exp.NoPrimaryIndexProperty() 2303 if self._match_text_seq("SQL"): 2304 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2305 return None 2306 2307 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2308 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2309 return exp.OnCommitProperty() 2310 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2311 return exp.OnCommitProperty(delete=True) 2312 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2313 2314 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2315 if self._match_text_seq("SQL", "DATA"): 2316 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2317 return None 2318 2319 def _parse_distkey(self) -> exp.DistKeyProperty: 2320 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2321 2322 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2323 table = self._parse_table(schema=True) 2324 2325 options = [] 2326 while self._match_texts(("INCLUDING", "EXCLUDING")): 2327 this = self._prev.text.upper() 2328 2329 id_var = self._parse_id_var() 2330 if not id_var: 2331 return None 2332 2333 options.append( 2334 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2335 ) 2336 2337 return self.expression(exp.LikeProperty, this=table, expressions=options) 2338 2339 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2340 return self.expression( 2341 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2342 ) 2343 2344 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2345 self._match(TokenType.EQ) 2346 return self.expression( 2347 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2348 ) 2349 2350 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2351 self._match_text_seq("WITH", "CONNECTION") 2352 return self.expression( 2353 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2354 ) 2355 2356 def _parse_returns(self) -> exp.ReturnsProperty: 2357 value: t.Optional[exp.Expression] 2358 null = None 2359 is_table = self._match(TokenType.TABLE) 2360 2361 if is_table: 2362 if self._match(TokenType.LT): 2363 value = self.expression( 2364 exp.Schema, 2365 this="TABLE", 2366 expressions=self._parse_csv(self._parse_struct_types), 2367 ) 2368 if not self._match(TokenType.GT): 2369 self.raise_error("Expecting >") 2370 else: 2371 value = self._parse_schema(exp.var("TABLE")) 2372 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2373 null = True 2374 value = None 2375 else: 2376 value = self._parse_types() 2377 2378 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2379 2380 def _parse_describe(self) -> exp.Describe: 2381 kind = self._match_set(self.CREATABLES) and self._prev.text 2382 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2383 if self._match(TokenType.DOT): 2384 style = None 2385 self._retreat(self._index - 2) 2386 this = self._parse_table(schema=True) 2387 properties = self._parse_properties() 2388 expressions = properties.expressions if properties else None 2389 return self.expression( 2390 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2391 ) 2392 2393 def _parse_insert(self) -> exp.Insert: 2394 comments = ensure_list(self._prev_comments) 2395 hint = self._parse_hint() 2396 overwrite = self._match(TokenType.OVERWRITE) 2397 ignore = self._match(TokenType.IGNORE) 2398 local = self._match_text_seq("LOCAL") 2399 alternative = None 2400 is_function = None 2401 2402 if self._match_text_seq("DIRECTORY"): 2403 this: t.Optional[exp.Expression] = self.expression( 2404 exp.Directory, 2405 this=self._parse_var_or_string(), 2406 local=local, 2407 row_format=self._parse_row_format(match_row=True), 2408 ) 2409 else: 2410 if self._match(TokenType.OR): 2411 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2412 2413 self._match(TokenType.INTO) 2414 comments += ensure_list(self._prev_comments) 2415 self._match(TokenType.TABLE) 2416 is_function = self._match(TokenType.FUNCTION) 2417 2418 this = ( 2419 self._parse_table(schema=True, parse_partition=True) 2420 if not is_function 2421 else self._parse_function() 2422 ) 2423 2424 returning = self._parse_returning() 2425 2426 return self.expression( 2427 exp.Insert, 2428 comments=comments, 2429 hint=hint, 2430 is_function=is_function, 2431 this=this, 2432 stored=self._match_text_seq("STORED") and self._parse_stored(), 2433 by_name=self._match_text_seq("BY", "NAME"), 2434 exists=self._parse_exists(), 2435 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2436 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2437 conflict=self._parse_on_conflict(), 2438 returning=returning or self._parse_returning(), 2439 overwrite=overwrite, 2440 alternative=alternative, 2441 ignore=ignore, 2442 ) 2443 2444 def _parse_kill(self) -> exp.Kill: 2445 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2446 2447 return self.expression( 2448 exp.Kill, 2449 this=self._parse_primary(), 2450 kind=kind, 2451 ) 2452 2453 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2454 conflict = self._match_text_seq("ON", "CONFLICT") 2455 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2456 2457 if not conflict and not duplicate: 2458 return None 2459 2460 conflict_keys = None 2461 constraint = None 2462 2463 if conflict: 2464 if self._match_text_seq("ON", "CONSTRAINT"): 2465 constraint = self._parse_id_var() 2466 elif self._match(TokenType.L_PAREN): 2467 conflict_keys = self._parse_csv(self._parse_id_var) 2468 self._match_r_paren() 2469 2470 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2471 if self._prev.token_type == TokenType.UPDATE: 2472 self._match(TokenType.SET) 2473 expressions = self._parse_csv(self._parse_equality) 2474 else: 2475 expressions = None 2476 2477 return self.expression( 2478 exp.OnConflict, 2479 duplicate=duplicate, 2480 expressions=expressions, 2481 action=action, 2482 conflict_keys=conflict_keys, 2483 constraint=constraint, 2484 ) 2485 2486 def _parse_returning(self) -> t.Optional[exp.Returning]: 2487 if not self._match(TokenType.RETURNING): 2488 return None 2489 return self.expression( 2490 exp.Returning, 2491 expressions=self._parse_csv(self._parse_expression), 2492 into=self._match(TokenType.INTO) and self._parse_table_part(), 2493 ) 2494 2495 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2496 if not self._match(TokenType.FORMAT): 2497 return None 2498 return self._parse_row_format() 2499 2500 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2501 index = self._index 2502 with_ = with_ or self._match_text_seq("WITH") 2503 2504 if not self._match(TokenType.SERDE_PROPERTIES): 2505 self._retreat(index) 2506 return None 2507 return self.expression( 2508 exp.SerdeProperties, 2509 **{ # type: ignore 2510 "expressions": self._parse_wrapped_properties(), 2511 "with": with_, 2512 }, 2513 ) 2514 2515 def _parse_row_format( 2516 self, match_row: bool = False 2517 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2518 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2519 return None 2520 2521 if self._match_text_seq("SERDE"): 2522 this = self._parse_string() 2523 2524 serde_properties = self._parse_serde_properties() 2525 2526 return self.expression( 2527 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2528 ) 2529 2530 self._match_text_seq("DELIMITED") 2531 2532 kwargs = {} 2533 2534 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2535 kwargs["fields"] = self._parse_string() 2536 if self._match_text_seq("ESCAPED", "BY"): 2537 kwargs["escaped"] = self._parse_string() 2538 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2539 kwargs["collection_items"] = self._parse_string() 2540 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2541 kwargs["map_keys"] = self._parse_string() 2542 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2543 kwargs["lines"] = self._parse_string() 2544 if self._match_text_seq("NULL", "DEFINED", "AS"): 2545 kwargs["null"] = self._parse_string() 2546 2547 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2548 2549 def _parse_load(self) -> exp.LoadData | exp.Command: 2550 if self._match_text_seq("DATA"): 2551 local = self._match_text_seq("LOCAL") 2552 self._match_text_seq("INPATH") 2553 inpath = self._parse_string() 2554 overwrite = self._match(TokenType.OVERWRITE) 2555 self._match_pair(TokenType.INTO, TokenType.TABLE) 2556 2557 return self.expression( 2558 exp.LoadData, 2559 this=self._parse_table(schema=True), 2560 local=local, 2561 overwrite=overwrite, 2562 inpath=inpath, 2563 partition=self._parse_partition(), 2564 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2565 serde=self._match_text_seq("SERDE") and self._parse_string(), 2566 ) 2567 return self._parse_as_command(self._prev) 2568 2569 def _parse_delete(self) -> exp.Delete: 2570 # This handles MySQL's "Multiple-Table Syntax" 2571 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2572 tables = None 2573 comments = self._prev_comments 2574 if not self._match(TokenType.FROM, advance=False): 2575 tables = self._parse_csv(self._parse_table) or None 2576 2577 returning = self._parse_returning() 2578 2579 return self.expression( 2580 exp.Delete, 2581 comments=comments, 2582 tables=tables, 2583 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2584 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2585 where=self._parse_where(), 2586 returning=returning or self._parse_returning(), 2587 limit=self._parse_limit(), 2588 ) 2589 2590 def _parse_update(self) -> exp.Update: 2591 comments = self._prev_comments 2592 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2593 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2594 returning = self._parse_returning() 2595 return self.expression( 2596 exp.Update, 2597 comments=comments, 2598 **{ # type: ignore 2599 "this": this, 2600 "expressions": expressions, 2601 "from": self._parse_from(joins=True), 2602 "where": self._parse_where(), 2603 "returning": returning or self._parse_returning(), 2604 "order": self._parse_order(), 2605 "limit": self._parse_limit(), 2606 }, 2607 ) 2608 2609 def _parse_uncache(self) -> exp.Uncache: 2610 if not self._match(TokenType.TABLE): 2611 self.raise_error("Expecting TABLE after UNCACHE") 2612 2613 return self.expression( 2614 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2615 ) 2616 2617 def _parse_cache(self) -> exp.Cache: 2618 lazy = self._match_text_seq("LAZY") 2619 self._match(TokenType.TABLE) 2620 table = self._parse_table(schema=True) 2621 2622 options = [] 2623 if self._match_text_seq("OPTIONS"): 2624 self._match_l_paren() 2625 k = self._parse_string() 2626 self._match(TokenType.EQ) 2627 v = self._parse_string() 2628 options = [k, v] 2629 self._match_r_paren() 2630 2631 self._match(TokenType.ALIAS) 2632 return self.expression( 2633 exp.Cache, 2634 this=table, 2635 lazy=lazy, 2636 options=options, 2637 expression=self._parse_select(nested=True), 2638 ) 2639 2640 def _parse_partition(self) -> t.Optional[exp.Partition]: 2641 if not self._match(TokenType.PARTITION): 2642 return None 2643 2644 return self.expression( 2645 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2646 ) 2647 2648 def _parse_value(self) -> t.Optional[exp.Tuple]: 2649 if self._match(TokenType.L_PAREN): 2650 expressions = self._parse_csv(self._parse_expression) 2651 self._match_r_paren() 2652 return self.expression(exp.Tuple, expressions=expressions) 2653 2654 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2655 expression = self._parse_expression() 2656 if expression: 2657 return self.expression(exp.Tuple, expressions=[expression]) 2658 return None 2659 2660 def _parse_projections(self) -> t.List[exp.Expression]: 2661 return self._parse_expressions() 2662 2663 def _parse_select( 2664 self, 2665 nested: bool = False, 2666 table: bool = False, 2667 parse_subquery_alias: bool = True, 2668 parse_set_operation: bool = True, 2669 ) -> t.Optional[exp.Expression]: 2670 cte = self._parse_with() 2671 2672 if cte: 2673 this = self._parse_statement() 2674 2675 if not this: 2676 self.raise_error("Failed to parse any statement following CTE") 2677 return cte 2678 2679 if "with" in this.arg_types: 2680 this.set("with", cte) 2681 else: 2682 self.raise_error(f"{this.key} does not support CTE") 2683 this = cte 2684 2685 return this 2686 2687 # duckdb supports leading with FROM x 2688 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2689 2690 if self._match(TokenType.SELECT): 2691 comments = self._prev_comments 2692 2693 hint = self._parse_hint() 2694 all_ = self._match(TokenType.ALL) 2695 distinct = self._match_set(self.DISTINCT_TOKENS) 2696 2697 kind = ( 2698 self._match(TokenType.ALIAS) 2699 and self._match_texts(("STRUCT", "VALUE")) 2700 and self._prev.text.upper() 2701 ) 2702 2703 if distinct: 2704 distinct = self.expression( 2705 exp.Distinct, 2706 on=self._parse_value() if self._match(TokenType.ON) else None, 2707 ) 2708 2709 if all_ and distinct: 2710 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2711 2712 limit = self._parse_limit(top=True) 2713 projections = self._parse_projections() 2714 2715 this = self.expression( 2716 exp.Select, 2717 kind=kind, 2718 hint=hint, 2719 distinct=distinct, 2720 expressions=projections, 2721 limit=limit, 2722 ) 2723 this.comments = comments 2724 2725 into = self._parse_into() 2726 if into: 2727 this.set("into", into) 2728 2729 if not from_: 2730 from_ = self._parse_from() 2731 2732 if from_: 2733 this.set("from", from_) 2734 2735 this = self._parse_query_modifiers(this) 2736 elif (table or nested) and self._match(TokenType.L_PAREN): 2737 if self._match(TokenType.PIVOT): 2738 this = self._parse_simplified_pivot() 2739 elif self._match(TokenType.FROM): 2740 this = exp.select("*").from_( 2741 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2742 ) 2743 else: 2744 this = ( 2745 self._parse_table() 2746 if table 2747 else self._parse_select(nested=True, parse_set_operation=False) 2748 ) 2749 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2750 2751 self._match_r_paren() 2752 2753 # We return early here so that the UNION isn't attached to the subquery by the 2754 # following call to _parse_set_operations, but instead becomes the parent node 2755 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2756 elif self._match(TokenType.VALUES, advance=False): 2757 this = self._parse_derived_table_values() 2758 elif from_: 2759 this = exp.select("*").from_(from_.this, copy=False) 2760 else: 2761 this = None 2762 2763 if parse_set_operation: 2764 return self._parse_set_operations(this) 2765 return this 2766 2767 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2768 if not skip_with_token and not self._match(TokenType.WITH): 2769 return None 2770 2771 comments = self._prev_comments 2772 recursive = self._match(TokenType.RECURSIVE) 2773 2774 expressions = [] 2775 while True: 2776 expressions.append(self._parse_cte()) 2777 2778 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2779 break 2780 else: 2781 self._match(TokenType.WITH) 2782 2783 return self.expression( 2784 exp.With, comments=comments, expressions=expressions, recursive=recursive 2785 ) 2786 2787 def _parse_cte(self) -> exp.CTE: 2788 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2789 if not alias or not alias.this: 2790 self.raise_error("Expected CTE to have alias") 2791 2792 self._match(TokenType.ALIAS) 2793 2794 if self._match_text_seq("NOT", "MATERIALIZED"): 2795 materialized = False 2796 elif self._match_text_seq("MATERIALIZED"): 2797 materialized = True 2798 else: 2799 materialized = None 2800 2801 return self.expression( 2802 exp.CTE, 2803 this=self._parse_wrapped(self._parse_statement), 2804 alias=alias, 2805 materialized=materialized, 2806 ) 2807 2808 def _parse_table_alias( 2809 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2810 ) -> t.Optional[exp.TableAlias]: 2811 any_token = self._match(TokenType.ALIAS) 2812 alias = ( 2813 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2814 or self._parse_string_as_identifier() 2815 ) 2816 2817 index = self._index 2818 if self._match(TokenType.L_PAREN): 2819 columns = self._parse_csv(self._parse_function_parameter) 2820 self._match_r_paren() if columns else self._retreat(index) 2821 else: 2822 columns = None 2823 2824 if not alias and not columns: 2825 return None 2826 2827 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2828 2829 # We bubble up comments from the Identifier to the TableAlias 2830 if isinstance(alias, exp.Identifier): 2831 table_alias.add_comments(alias.pop_comments()) 2832 2833 return table_alias 2834 2835 def _parse_subquery( 2836 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2837 ) -> t.Optional[exp.Subquery]: 2838 if not this: 2839 return None 2840 2841 return self.expression( 2842 exp.Subquery, 2843 this=this, 2844 pivots=self._parse_pivots(), 2845 alias=self._parse_table_alias() if parse_alias else None, 2846 ) 2847 2848 def _implicit_unnests_to_explicit(self, this: E) -> E: 2849 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2850 2851 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2852 for i, join in enumerate(this.args.get("joins") or []): 2853 table = join.this 2854 normalized_table = table.copy() 2855 normalized_table.meta["maybe_column"] = True 2856 normalized_table = _norm(normalized_table, dialect=self.dialect) 2857 2858 if isinstance(table, exp.Table) and not join.args.get("on"): 2859 if normalized_table.parts[0].name in refs: 2860 table_as_column = table.to_column() 2861 unnest = exp.Unnest(expressions=[table_as_column]) 2862 2863 # Table.to_column creates a parent Alias node that we want to convert to 2864 # a TableAlias and attach to the Unnest, so it matches the parser's output 2865 if isinstance(table.args.get("alias"), exp.TableAlias): 2866 table_as_column.replace(table_as_column.this) 2867 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2868 2869 table.replace(unnest) 2870 2871 refs.add(normalized_table.alias_or_name) 2872 2873 return this 2874 2875 def _parse_query_modifiers( 2876 self, this: t.Optional[exp.Expression] 2877 ) -> t.Optional[exp.Expression]: 2878 if isinstance(this, (exp.Query, exp.Table)): 2879 for join in self._parse_joins(): 2880 this.append("joins", join) 2881 for lateral in iter(self._parse_lateral, None): 2882 this.append("laterals", lateral) 2883 2884 while True: 2885 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2886 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2887 key, expression = parser(self) 2888 2889 if expression: 2890 this.set(key, expression) 2891 if key == "limit": 2892 offset = expression.args.pop("offset", None) 2893 2894 if offset: 2895 offset = exp.Offset(expression=offset) 2896 this.set("offset", offset) 2897 2898 limit_by_expressions = expression.expressions 2899 expression.set("expressions", None) 2900 offset.set("expressions", limit_by_expressions) 2901 continue 2902 break 2903 2904 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 2905 this = self._implicit_unnests_to_explicit(this) 2906 2907 return this 2908 2909 def _parse_hint(self) -> t.Optional[exp.Hint]: 2910 if self._match(TokenType.HINT): 2911 hints = [] 2912 for hint in iter( 2913 lambda: self._parse_csv( 2914 lambda: self._parse_function() or self._parse_var(upper=True) 2915 ), 2916 [], 2917 ): 2918 hints.extend(hint) 2919 2920 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2921 self.raise_error("Expected */ after HINT") 2922 2923 return self.expression(exp.Hint, expressions=hints) 2924 2925 return None 2926 2927 def _parse_into(self) -> t.Optional[exp.Into]: 2928 if not self._match(TokenType.INTO): 2929 return None 2930 2931 temp = self._match(TokenType.TEMPORARY) 2932 unlogged = self._match_text_seq("UNLOGGED") 2933 self._match(TokenType.TABLE) 2934 2935 return self.expression( 2936 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2937 ) 2938 2939 def _parse_from( 2940 self, joins: bool = False, skip_from_token: bool = False 2941 ) -> t.Optional[exp.From]: 2942 if not skip_from_token and not self._match(TokenType.FROM): 2943 return None 2944 2945 return self.expression( 2946 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2947 ) 2948 2949 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2950 return self.expression( 2951 exp.MatchRecognizeMeasure, 2952 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2953 this=self._parse_expression(), 2954 ) 2955 2956 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2957 if not self._match(TokenType.MATCH_RECOGNIZE): 2958 return None 2959 2960 self._match_l_paren() 2961 2962 partition = self._parse_partition_by() 2963 order = self._parse_order() 2964 2965 measures = ( 2966 self._parse_csv(self._parse_match_recognize_measure) 2967 if self._match_text_seq("MEASURES") 2968 else None 2969 ) 2970 2971 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2972 rows = exp.var("ONE ROW PER MATCH") 2973 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2974 text = "ALL ROWS PER MATCH" 2975 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2976 text += " SHOW EMPTY MATCHES" 2977 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2978 text += " OMIT EMPTY MATCHES" 2979 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2980 text += " WITH UNMATCHED ROWS" 2981 rows = exp.var(text) 2982 else: 2983 rows = None 2984 2985 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2986 text = "AFTER MATCH SKIP" 2987 if self._match_text_seq("PAST", "LAST", "ROW"): 2988 text += " PAST LAST ROW" 2989 elif self._match_text_seq("TO", "NEXT", "ROW"): 2990 text += " TO NEXT ROW" 2991 elif self._match_text_seq("TO", "FIRST"): 2992 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2993 elif self._match_text_seq("TO", "LAST"): 2994 text += f" TO LAST {self._advance_any().text}" # type: ignore 2995 after = exp.var(text) 2996 else: 2997 after = None 2998 2999 if self._match_text_seq("PATTERN"): 3000 self._match_l_paren() 3001 3002 if not self._curr: 3003 self.raise_error("Expecting )", self._curr) 3004 3005 paren = 1 3006 start = self._curr 3007 3008 while self._curr and paren > 0: 3009 if self._curr.token_type == TokenType.L_PAREN: 3010 paren += 1 3011 if self._curr.token_type == TokenType.R_PAREN: 3012 paren -= 1 3013 3014 end = self._prev 3015 self._advance() 3016 3017 if paren > 0: 3018 self.raise_error("Expecting )", self._curr) 3019 3020 pattern = exp.var(self._find_sql(start, end)) 3021 else: 3022 pattern = None 3023 3024 define = ( 3025 self._parse_csv(self._parse_name_as_expression) 3026 if self._match_text_seq("DEFINE") 3027 else None 3028 ) 3029 3030 self._match_r_paren() 3031 3032 return self.expression( 3033 exp.MatchRecognize, 3034 partition_by=partition, 3035 order=order, 3036 measures=measures, 3037 rows=rows, 3038 after=after, 3039 pattern=pattern, 3040 define=define, 3041 alias=self._parse_table_alias(), 3042 ) 3043 3044 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3045 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3046 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3047 cross_apply = False 3048 3049 if cross_apply is not None: 3050 this = self._parse_select(table=True) 3051 view = None 3052 outer = None 3053 elif self._match(TokenType.LATERAL): 3054 this = self._parse_select(table=True) 3055 view = self._match(TokenType.VIEW) 3056 outer = self._match(TokenType.OUTER) 3057 else: 3058 return None 3059 3060 if not this: 3061 this = ( 3062 self._parse_unnest() 3063 or self._parse_function() 3064 or self._parse_id_var(any_token=False) 3065 ) 3066 3067 while self._match(TokenType.DOT): 3068 this = exp.Dot( 3069 this=this, 3070 expression=self._parse_function() or self._parse_id_var(any_token=False), 3071 ) 3072 3073 if view: 3074 table = self._parse_id_var(any_token=False) 3075 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3076 table_alias: t.Optional[exp.TableAlias] = self.expression( 3077 exp.TableAlias, this=table, columns=columns 3078 ) 3079 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3080 # We move the alias from the lateral's child node to the lateral itself 3081 table_alias = this.args["alias"].pop() 3082 else: 3083 table_alias = self._parse_table_alias() 3084 3085 return self.expression( 3086 exp.Lateral, 3087 this=this, 3088 view=view, 3089 outer=outer, 3090 alias=table_alias, 3091 cross_apply=cross_apply, 3092 ) 3093 3094 def _parse_join_parts( 3095 self, 3096 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3097 return ( 3098 self._match_set(self.JOIN_METHODS) and self._prev, 3099 self._match_set(self.JOIN_SIDES) and self._prev, 3100 self._match_set(self.JOIN_KINDS) and self._prev, 3101 ) 3102 3103 def _parse_join( 3104 self, skip_join_token: bool = False, parse_bracket: bool = False 3105 ) -> t.Optional[exp.Join]: 3106 if self._match(TokenType.COMMA): 3107 return self.expression(exp.Join, this=self._parse_table()) 3108 3109 index = self._index 3110 method, side, kind = self._parse_join_parts() 3111 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3112 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3113 3114 if not skip_join_token and not join: 3115 self._retreat(index) 3116 kind = None 3117 method = None 3118 side = None 3119 3120 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3121 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3122 3123 if not skip_join_token and not join and not outer_apply and not cross_apply: 3124 return None 3125 3126 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3127 3128 if method: 3129 kwargs["method"] = method.text 3130 if side: 3131 kwargs["side"] = side.text 3132 if kind: 3133 kwargs["kind"] = kind.text 3134 if hint: 3135 kwargs["hint"] = hint 3136 3137 if self._match(TokenType.MATCH_CONDITION): 3138 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3139 3140 if self._match(TokenType.ON): 3141 kwargs["on"] = self._parse_assignment() 3142 elif self._match(TokenType.USING): 3143 kwargs["using"] = self._parse_wrapped_id_vars() 3144 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3145 kind and kind.token_type == TokenType.CROSS 3146 ): 3147 index = self._index 3148 joins: t.Optional[list] = list(self._parse_joins()) 3149 3150 if joins and self._match(TokenType.ON): 3151 kwargs["on"] = self._parse_assignment() 3152 elif joins and self._match(TokenType.USING): 3153 kwargs["using"] = self._parse_wrapped_id_vars() 3154 else: 3155 joins = None 3156 self._retreat(index) 3157 3158 kwargs["this"].set("joins", joins if joins else None) 3159 3160 comments = [c for token in (method, side, kind) if token for c in token.comments] 3161 return self.expression(exp.Join, comments=comments, **kwargs) 3162 3163 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3164 this = self._parse_assignment() 3165 3166 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3167 return this 3168 3169 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3170 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3171 3172 return this 3173 3174 def _parse_index_params(self) -> exp.IndexParameters: 3175 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3176 3177 if self._match(TokenType.L_PAREN, advance=False): 3178 columns = self._parse_wrapped_csv(self._parse_with_operator) 3179 else: 3180 columns = None 3181 3182 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3183 partition_by = self._parse_partition_by() 3184 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3185 tablespace = ( 3186 self._parse_var(any_token=True) 3187 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3188 else None 3189 ) 3190 where = self._parse_where() 3191 3192 on = self._parse_field() if self._match(TokenType.ON) else None 3193 3194 return self.expression( 3195 exp.IndexParameters, 3196 using=using, 3197 columns=columns, 3198 include=include, 3199 partition_by=partition_by, 3200 where=where, 3201 with_storage=with_storage, 3202 tablespace=tablespace, 3203 on=on, 3204 ) 3205 3206 def _parse_index( 3207 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3208 ) -> t.Optional[exp.Index]: 3209 if index or anonymous: 3210 unique = None 3211 primary = None 3212 amp = None 3213 3214 self._match(TokenType.ON) 3215 self._match(TokenType.TABLE) # hive 3216 table = self._parse_table_parts(schema=True) 3217 else: 3218 unique = self._match(TokenType.UNIQUE) 3219 primary = self._match_text_seq("PRIMARY") 3220 amp = self._match_text_seq("AMP") 3221 3222 if not self._match(TokenType.INDEX): 3223 return None 3224 3225 index = self._parse_id_var() 3226 table = None 3227 3228 params = self._parse_index_params() 3229 3230 return self.expression( 3231 exp.Index, 3232 this=index, 3233 table=table, 3234 unique=unique, 3235 primary=primary, 3236 amp=amp, 3237 params=params, 3238 ) 3239 3240 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3241 hints: t.List[exp.Expression] = [] 3242 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3243 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3244 hints.append( 3245 self.expression( 3246 exp.WithTableHint, 3247 expressions=self._parse_csv( 3248 lambda: self._parse_function() or self._parse_var(any_token=True) 3249 ), 3250 ) 3251 ) 3252 self._match_r_paren() 3253 else: 3254 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3255 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3256 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3257 3258 self._match_set((TokenType.INDEX, TokenType.KEY)) 3259 if self._match(TokenType.FOR): 3260 hint.set("target", self._advance_any() and self._prev.text.upper()) 3261 3262 hint.set("expressions", self._parse_wrapped_id_vars()) 3263 hints.append(hint) 3264 3265 return hints or None 3266 3267 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3268 return ( 3269 (not schema and self._parse_function(optional_parens=False)) 3270 or self._parse_id_var(any_token=False) 3271 or self._parse_string_as_identifier() 3272 or self._parse_placeholder() 3273 ) 3274 3275 def _parse_table_parts( 3276 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3277 ) -> exp.Table: 3278 catalog = None 3279 db = None 3280 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3281 3282 while self._match(TokenType.DOT): 3283 if catalog: 3284 # This allows nesting the table in arbitrarily many dot expressions if needed 3285 table = self.expression( 3286 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3287 ) 3288 else: 3289 catalog = db 3290 db = table 3291 # "" used for tsql FROM a..b case 3292 table = self._parse_table_part(schema=schema) or "" 3293 3294 if ( 3295 wildcard 3296 and self._is_connected() 3297 and (isinstance(table, exp.Identifier) or not table) 3298 and self._match(TokenType.STAR) 3299 ): 3300 if isinstance(table, exp.Identifier): 3301 table.args["this"] += "*" 3302 else: 3303 table = exp.Identifier(this="*") 3304 3305 # We bubble up comments from the Identifier to the Table 3306 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3307 3308 if is_db_reference: 3309 catalog = db 3310 db = table 3311 table = None 3312 3313 if not table and not is_db_reference: 3314 self.raise_error(f"Expected table name but got {self._curr}") 3315 if not db and is_db_reference: 3316 self.raise_error(f"Expected database name but got {self._curr}") 3317 3318 return self.expression( 3319 exp.Table, 3320 comments=comments, 3321 this=table, 3322 db=db, 3323 catalog=catalog, 3324 pivots=self._parse_pivots(), 3325 ) 3326 3327 def _parse_table( 3328 self, 3329 schema: bool = False, 3330 joins: bool = False, 3331 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3332 parse_bracket: bool = False, 3333 is_db_reference: bool = False, 3334 parse_partition: bool = False, 3335 ) -> t.Optional[exp.Expression]: 3336 lateral = self._parse_lateral() 3337 if lateral: 3338 return lateral 3339 3340 unnest = self._parse_unnest() 3341 if unnest: 3342 return unnest 3343 3344 values = self._parse_derived_table_values() 3345 if values: 3346 return values 3347 3348 subquery = self._parse_select(table=True) 3349 if subquery: 3350 if not subquery.args.get("pivots"): 3351 subquery.set("pivots", self._parse_pivots()) 3352 return subquery 3353 3354 bracket = parse_bracket and self._parse_bracket(None) 3355 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3356 3357 only = self._match(TokenType.ONLY) 3358 3359 this = t.cast( 3360 exp.Expression, 3361 bracket 3362 or self._parse_bracket( 3363 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3364 ), 3365 ) 3366 3367 if only: 3368 this.set("only", only) 3369 3370 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3371 self._match_text_seq("*") 3372 3373 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3374 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3375 this.set("partition", self._parse_partition()) 3376 3377 if schema: 3378 return self._parse_schema(this=this) 3379 3380 version = self._parse_version() 3381 3382 if version: 3383 this.set("version", version) 3384 3385 if self.dialect.ALIAS_POST_TABLESAMPLE: 3386 table_sample = self._parse_table_sample() 3387 3388 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3389 if alias: 3390 this.set("alias", alias) 3391 3392 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3393 return self.expression( 3394 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3395 ) 3396 3397 this.set("hints", self._parse_table_hints()) 3398 3399 if not this.args.get("pivots"): 3400 this.set("pivots", self._parse_pivots()) 3401 3402 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3403 table_sample = self._parse_table_sample() 3404 3405 if table_sample: 3406 table_sample.set("this", this) 3407 this = table_sample 3408 3409 if joins: 3410 for join in self._parse_joins(): 3411 this.append("joins", join) 3412 3413 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3414 this.set("ordinality", True) 3415 this.set("alias", self._parse_table_alias()) 3416 3417 return this 3418 3419 def _parse_version(self) -> t.Optional[exp.Version]: 3420 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3421 this = "TIMESTAMP" 3422 elif self._match(TokenType.VERSION_SNAPSHOT): 3423 this = "VERSION" 3424 else: 3425 return None 3426 3427 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3428 kind = self._prev.text.upper() 3429 start = self._parse_bitwise() 3430 self._match_texts(("TO", "AND")) 3431 end = self._parse_bitwise() 3432 expression: t.Optional[exp.Expression] = self.expression( 3433 exp.Tuple, expressions=[start, end] 3434 ) 3435 elif self._match_text_seq("CONTAINED", "IN"): 3436 kind = "CONTAINED IN" 3437 expression = self.expression( 3438 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3439 ) 3440 elif self._match(TokenType.ALL): 3441 kind = "ALL" 3442 expression = None 3443 else: 3444 self._match_text_seq("AS", "OF") 3445 kind = "AS OF" 3446 expression = self._parse_type() 3447 3448 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3449 3450 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3451 if not self._match(TokenType.UNNEST): 3452 return None 3453 3454 expressions = self._parse_wrapped_csv(self._parse_equality) 3455 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3456 3457 alias = self._parse_table_alias() if with_alias else None 3458 3459 if alias: 3460 if self.dialect.UNNEST_COLUMN_ONLY: 3461 if alias.args.get("columns"): 3462 self.raise_error("Unexpected extra column alias in unnest.") 3463 3464 alias.set("columns", [alias.this]) 3465 alias.set("this", None) 3466 3467 columns = alias.args.get("columns") or [] 3468 if offset and len(expressions) < len(columns): 3469 offset = columns.pop() 3470 3471 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3472 self._match(TokenType.ALIAS) 3473 offset = self._parse_id_var( 3474 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3475 ) or exp.to_identifier("offset") 3476 3477 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3478 3479 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3480 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3481 if not is_derived and not self._match_text_seq("VALUES"): 3482 return None 3483 3484 expressions = self._parse_csv(self._parse_value) 3485 alias = self._parse_table_alias() 3486 3487 if is_derived: 3488 self._match_r_paren() 3489 3490 return self.expression( 3491 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3492 ) 3493 3494 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3495 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3496 as_modifier and self._match_text_seq("USING", "SAMPLE") 3497 ): 3498 return None 3499 3500 bucket_numerator = None 3501 bucket_denominator = None 3502 bucket_field = None 3503 percent = None 3504 size = None 3505 seed = None 3506 3507 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3508 matched_l_paren = self._match(TokenType.L_PAREN) 3509 3510 if self.TABLESAMPLE_CSV: 3511 num = None 3512 expressions = self._parse_csv(self._parse_primary) 3513 else: 3514 expressions = None 3515 num = ( 3516 self._parse_factor() 3517 if self._match(TokenType.NUMBER, advance=False) 3518 else self._parse_primary() or self._parse_placeholder() 3519 ) 3520 3521 if self._match_text_seq("BUCKET"): 3522 bucket_numerator = self._parse_number() 3523 self._match_text_seq("OUT", "OF") 3524 bucket_denominator = bucket_denominator = self._parse_number() 3525 self._match(TokenType.ON) 3526 bucket_field = self._parse_field() 3527 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3528 percent = num 3529 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3530 size = num 3531 else: 3532 percent = num 3533 3534 if matched_l_paren: 3535 self._match_r_paren() 3536 3537 if self._match(TokenType.L_PAREN): 3538 method = self._parse_var(upper=True) 3539 seed = self._match(TokenType.COMMA) and self._parse_number() 3540 self._match_r_paren() 3541 elif self._match_texts(("SEED", "REPEATABLE")): 3542 seed = self._parse_wrapped(self._parse_number) 3543 3544 if not method and self.DEFAULT_SAMPLING_METHOD: 3545 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3546 3547 return self.expression( 3548 exp.TableSample, 3549 expressions=expressions, 3550 method=method, 3551 bucket_numerator=bucket_numerator, 3552 bucket_denominator=bucket_denominator, 3553 bucket_field=bucket_field, 3554 percent=percent, 3555 size=size, 3556 seed=seed, 3557 ) 3558 3559 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3560 return list(iter(self._parse_pivot, None)) or None 3561 3562 def _parse_joins(self) -> t.Iterator[exp.Join]: 3563 return iter(self._parse_join, None) 3564 3565 # https://duckdb.org/docs/sql/statements/pivot 3566 def _parse_simplified_pivot(self) -> exp.Pivot: 3567 def _parse_on() -> t.Optional[exp.Expression]: 3568 this = self._parse_bitwise() 3569 return self._parse_in(this) if self._match(TokenType.IN) else this 3570 3571 this = self._parse_table() 3572 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3573 using = self._match(TokenType.USING) and self._parse_csv( 3574 lambda: self._parse_alias(self._parse_function()) 3575 ) 3576 group = self._parse_group() 3577 return self.expression( 3578 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3579 ) 3580 3581 def _parse_pivot_in(self) -> exp.In: 3582 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3583 this = self._parse_assignment() 3584 3585 self._match(TokenType.ALIAS) 3586 alias = self._parse_field() 3587 if alias: 3588 return self.expression(exp.PivotAlias, this=this, alias=alias) 3589 3590 return this 3591 3592 value = self._parse_column() 3593 3594 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3595 self.raise_error("Expecting IN (") 3596 3597 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3598 3599 self._match_r_paren() 3600 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3601 3602 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3603 index = self._index 3604 include_nulls = None 3605 3606 if self._match(TokenType.PIVOT): 3607 unpivot = False 3608 elif self._match(TokenType.UNPIVOT): 3609 unpivot = True 3610 3611 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3612 if self._match_text_seq("INCLUDE", "NULLS"): 3613 include_nulls = True 3614 elif self._match_text_seq("EXCLUDE", "NULLS"): 3615 include_nulls = False 3616 else: 3617 return None 3618 3619 expressions = [] 3620 3621 if not self._match(TokenType.L_PAREN): 3622 self._retreat(index) 3623 return None 3624 3625 if unpivot: 3626 expressions = self._parse_csv(self._parse_column) 3627 else: 3628 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3629 3630 if not expressions: 3631 self.raise_error("Failed to parse PIVOT's aggregation list") 3632 3633 if not self._match(TokenType.FOR): 3634 self.raise_error("Expecting FOR") 3635 3636 field = self._parse_pivot_in() 3637 3638 self._match_r_paren() 3639 3640 pivot = self.expression( 3641 exp.Pivot, 3642 expressions=expressions, 3643 field=field, 3644 unpivot=unpivot, 3645 include_nulls=include_nulls, 3646 ) 3647 3648 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3649 pivot.set("alias", self._parse_table_alias()) 3650 3651 if not unpivot: 3652 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3653 3654 columns: t.List[exp.Expression] = [] 3655 for fld in pivot.args["field"].expressions: 3656 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3657 for name in names: 3658 if self.PREFIXED_PIVOT_COLUMNS: 3659 name = f"{name}_{field_name}" if name else field_name 3660 else: 3661 name = f"{field_name}_{name}" if name else field_name 3662 3663 columns.append(exp.to_identifier(name)) 3664 3665 pivot.set("columns", columns) 3666 3667 return pivot 3668 3669 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3670 return [agg.alias for agg in aggregations] 3671 3672 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3673 if not skip_where_token and not self._match(TokenType.PREWHERE): 3674 return None 3675 3676 return self.expression( 3677 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3678 ) 3679 3680 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3681 if not skip_where_token and not self._match(TokenType.WHERE): 3682 return None 3683 3684 return self.expression( 3685 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3686 ) 3687 3688 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3689 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3690 return None 3691 3692 elements: t.Dict[str, t.Any] = defaultdict(list) 3693 3694 if self._match(TokenType.ALL): 3695 elements["all"] = True 3696 elif self._match(TokenType.DISTINCT): 3697 elements["all"] = False 3698 3699 while True: 3700 expressions = self._parse_csv( 3701 lambda: None 3702 if self._match(TokenType.ROLLUP, advance=False) 3703 else self._parse_assignment() 3704 ) 3705 if expressions: 3706 elements["expressions"].extend(expressions) 3707 3708 grouping_sets = self._parse_grouping_sets() 3709 if grouping_sets: 3710 elements["grouping_sets"].extend(grouping_sets) 3711 3712 rollup = None 3713 cube = None 3714 totals = None 3715 3716 index = self._index 3717 with_ = self._match(TokenType.WITH) 3718 if self._match(TokenType.ROLLUP): 3719 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3720 elements["rollup"].extend(ensure_list(rollup)) 3721 3722 if self._match(TokenType.CUBE): 3723 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3724 elements["cube"].extend(ensure_list(cube)) 3725 3726 if self._match_text_seq("TOTALS"): 3727 totals = True 3728 elements["totals"] = True # type: ignore 3729 3730 if not (grouping_sets or rollup or cube or totals): 3731 if with_: 3732 self._retreat(index) 3733 break 3734 3735 return self.expression(exp.Group, **elements) # type: ignore 3736 3737 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3738 if not self._match(TokenType.GROUPING_SETS): 3739 return None 3740 3741 return self._parse_wrapped_csv(self._parse_grouping_set) 3742 3743 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3744 if self._match(TokenType.L_PAREN): 3745 grouping_set = self._parse_csv(self._parse_column) 3746 self._match_r_paren() 3747 return self.expression(exp.Tuple, expressions=grouping_set) 3748 3749 return self._parse_column() 3750 3751 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3752 if not skip_having_token and not self._match(TokenType.HAVING): 3753 return None 3754 return self.expression(exp.Having, this=self._parse_assignment()) 3755 3756 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3757 if not self._match(TokenType.QUALIFY): 3758 return None 3759 return self.expression(exp.Qualify, this=self._parse_assignment()) 3760 3761 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3762 if skip_start_token: 3763 start = None 3764 elif self._match(TokenType.START_WITH): 3765 start = self._parse_assignment() 3766 else: 3767 return None 3768 3769 self._match(TokenType.CONNECT_BY) 3770 nocycle = self._match_text_seq("NOCYCLE") 3771 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3772 exp.Prior, this=self._parse_bitwise() 3773 ) 3774 connect = self._parse_assignment() 3775 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3776 3777 if not start and self._match(TokenType.START_WITH): 3778 start = self._parse_assignment() 3779 3780 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3781 3782 def _parse_name_as_expression(self) -> exp.Alias: 3783 return self.expression( 3784 exp.Alias, 3785 alias=self._parse_id_var(any_token=True), 3786 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3787 ) 3788 3789 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3790 if self._match_text_seq("INTERPOLATE"): 3791 return self._parse_wrapped_csv(self._parse_name_as_expression) 3792 return None 3793 3794 def _parse_order( 3795 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3796 ) -> t.Optional[exp.Expression]: 3797 siblings = None 3798 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3799 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3800 return this 3801 3802 siblings = True 3803 3804 return self.expression( 3805 exp.Order, 3806 this=this, 3807 expressions=self._parse_csv(self._parse_ordered), 3808 interpolate=self._parse_interpolate(), 3809 siblings=siblings, 3810 ) 3811 3812 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3813 if not self._match(token): 3814 return None 3815 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3816 3817 def _parse_ordered( 3818 self, parse_method: t.Optional[t.Callable] = None 3819 ) -> t.Optional[exp.Ordered]: 3820 this = parse_method() if parse_method else self._parse_assignment() 3821 if not this: 3822 return None 3823 3824 asc = self._match(TokenType.ASC) 3825 desc = self._match(TokenType.DESC) or (asc and False) 3826 3827 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3828 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3829 3830 nulls_first = is_nulls_first or False 3831 explicitly_null_ordered = is_nulls_first or is_nulls_last 3832 3833 if ( 3834 not explicitly_null_ordered 3835 and ( 3836 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3837 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3838 ) 3839 and self.dialect.NULL_ORDERING != "nulls_are_last" 3840 ): 3841 nulls_first = True 3842 3843 if self._match_text_seq("WITH", "FILL"): 3844 with_fill = self.expression( 3845 exp.WithFill, 3846 **{ # type: ignore 3847 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3848 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3849 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3850 }, 3851 ) 3852 else: 3853 with_fill = None 3854 3855 return self.expression( 3856 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3857 ) 3858 3859 def _parse_limit( 3860 self, 3861 this: t.Optional[exp.Expression] = None, 3862 top: bool = False, 3863 skip_limit_token: bool = False, 3864 ) -> t.Optional[exp.Expression]: 3865 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3866 comments = self._prev_comments 3867 if top: 3868 limit_paren = self._match(TokenType.L_PAREN) 3869 expression = self._parse_term() if limit_paren else self._parse_number() 3870 3871 if limit_paren: 3872 self._match_r_paren() 3873 else: 3874 expression = self._parse_term() 3875 3876 if self._match(TokenType.COMMA): 3877 offset = expression 3878 expression = self._parse_term() 3879 else: 3880 offset = None 3881 3882 limit_exp = self.expression( 3883 exp.Limit, 3884 this=this, 3885 expression=expression, 3886 offset=offset, 3887 comments=comments, 3888 expressions=self._parse_limit_by(), 3889 ) 3890 3891 return limit_exp 3892 3893 if self._match(TokenType.FETCH): 3894 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3895 direction = self._prev.text.upper() if direction else "FIRST" 3896 3897 count = self._parse_field(tokens=self.FETCH_TOKENS) 3898 percent = self._match(TokenType.PERCENT) 3899 3900 self._match_set((TokenType.ROW, TokenType.ROWS)) 3901 3902 only = self._match_text_seq("ONLY") 3903 with_ties = self._match_text_seq("WITH", "TIES") 3904 3905 if only and with_ties: 3906 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3907 3908 return self.expression( 3909 exp.Fetch, 3910 direction=direction, 3911 count=count, 3912 percent=percent, 3913 with_ties=with_ties, 3914 ) 3915 3916 return this 3917 3918 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3919 if not self._match(TokenType.OFFSET): 3920 return this 3921 3922 count = self._parse_term() 3923 self._match_set((TokenType.ROW, TokenType.ROWS)) 3924 3925 return self.expression( 3926 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3927 ) 3928 3929 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3930 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3931 3932 def _parse_locks(self) -> t.List[exp.Lock]: 3933 locks = [] 3934 while True: 3935 if self._match_text_seq("FOR", "UPDATE"): 3936 update = True 3937 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3938 "LOCK", "IN", "SHARE", "MODE" 3939 ): 3940 update = False 3941 else: 3942 break 3943 3944 expressions = None 3945 if self._match_text_seq("OF"): 3946 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3947 3948 wait: t.Optional[bool | exp.Expression] = None 3949 if self._match_text_seq("NOWAIT"): 3950 wait = True 3951 elif self._match_text_seq("WAIT"): 3952 wait = self._parse_primary() 3953 elif self._match_text_seq("SKIP", "LOCKED"): 3954 wait = False 3955 3956 locks.append( 3957 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3958 ) 3959 3960 return locks 3961 3962 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3963 while this and self._match_set(self.SET_OPERATIONS): 3964 token_type = self._prev.token_type 3965 3966 if token_type == TokenType.UNION: 3967 operation: t.Type[exp.SetOperation] = exp.Union 3968 elif token_type == TokenType.EXCEPT: 3969 operation = exp.Except 3970 else: 3971 operation = exp.Intersect 3972 3973 comments = self._prev.comments 3974 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3975 by_name = self._match_text_seq("BY", "NAME") 3976 expression = self._parse_select(nested=True, parse_set_operation=False) 3977 3978 this = self.expression( 3979 operation, 3980 comments=comments, 3981 this=this, 3982 distinct=distinct, 3983 by_name=by_name, 3984 expression=expression, 3985 ) 3986 3987 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 3988 expression = this.expression 3989 3990 if expression: 3991 for arg in self.SET_OP_MODIFIERS: 3992 expr = expression.args.get(arg) 3993 if expr: 3994 this.set(arg, expr.pop()) 3995 3996 return this 3997 3998 def _parse_expression(self) -> t.Optional[exp.Expression]: 3999 return self._parse_alias(self._parse_assignment()) 4000 4001 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4002 this = self._parse_disjunction() 4003 4004 while self._match_set(self.ASSIGNMENT): 4005 this = self.expression( 4006 self.ASSIGNMENT[self._prev.token_type], 4007 this=this, 4008 comments=self._prev_comments, 4009 expression=self._parse_assignment(), 4010 ) 4011 4012 return this 4013 4014 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4015 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4016 4017 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4018 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4019 4020 def _parse_equality(self) -> t.Optional[exp.Expression]: 4021 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4022 4023 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4024 return self._parse_tokens(self._parse_range, self.COMPARISON) 4025 4026 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4027 this = this or self._parse_bitwise() 4028 negate = self._match(TokenType.NOT) 4029 4030 if self._match_set(self.RANGE_PARSERS): 4031 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4032 if not expression: 4033 return this 4034 4035 this = expression 4036 elif self._match(TokenType.ISNULL): 4037 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4038 4039 # Postgres supports ISNULL and NOTNULL for conditions. 4040 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4041 if self._match(TokenType.NOTNULL): 4042 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4043 this = self.expression(exp.Not, this=this) 4044 4045 if negate: 4046 this = self.expression(exp.Not, this=this) 4047 4048 if self._match(TokenType.IS): 4049 this = self._parse_is(this) 4050 4051 return this 4052 4053 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4054 index = self._index - 1 4055 negate = self._match(TokenType.NOT) 4056 4057 if self._match_text_seq("DISTINCT", "FROM"): 4058 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4059 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4060 4061 expression = self._parse_null() or self._parse_boolean() 4062 if not expression: 4063 self._retreat(index) 4064 return None 4065 4066 this = self.expression(exp.Is, this=this, expression=expression) 4067 return self.expression(exp.Not, this=this) if negate else this 4068 4069 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4070 unnest = self._parse_unnest(with_alias=False) 4071 if unnest: 4072 this = self.expression(exp.In, this=this, unnest=unnest) 4073 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4074 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4075 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4076 4077 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4078 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4079 else: 4080 this = self.expression(exp.In, this=this, expressions=expressions) 4081 4082 if matched_l_paren: 4083 self._match_r_paren(this) 4084 elif not self._match(TokenType.R_BRACKET, expression=this): 4085 self.raise_error("Expecting ]") 4086 else: 4087 this = self.expression(exp.In, this=this, field=self._parse_field()) 4088 4089 return this 4090 4091 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4092 low = self._parse_bitwise() 4093 self._match(TokenType.AND) 4094 high = self._parse_bitwise() 4095 return self.expression(exp.Between, this=this, low=low, high=high) 4096 4097 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4098 if not self._match(TokenType.ESCAPE): 4099 return this 4100 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4101 4102 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4103 index = self._index 4104 4105 if not self._match(TokenType.INTERVAL) and match_interval: 4106 return None 4107 4108 if self._match(TokenType.STRING, advance=False): 4109 this = self._parse_primary() 4110 else: 4111 this = self._parse_term() 4112 4113 if not this or ( 4114 isinstance(this, exp.Column) 4115 and not this.table 4116 and not this.this.quoted 4117 and this.name.upper() == "IS" 4118 ): 4119 self._retreat(index) 4120 return None 4121 4122 unit = self._parse_function() or ( 4123 not self._match(TokenType.ALIAS, advance=False) 4124 and self._parse_var(any_token=True, upper=True) 4125 ) 4126 4127 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4128 # each INTERVAL expression into this canonical form so it's easy to transpile 4129 if this and this.is_number: 4130 this = exp.Literal.string(this.name) 4131 elif this and this.is_string: 4132 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4133 if len(parts) == 1: 4134 if unit: 4135 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4136 self._retreat(self._index - 1) 4137 4138 this = exp.Literal.string(parts[0][0]) 4139 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4140 4141 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4142 unit = self.expression( 4143 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4144 ) 4145 4146 interval = self.expression(exp.Interval, this=this, unit=unit) 4147 4148 index = self._index 4149 self._match(TokenType.PLUS) 4150 4151 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4152 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4153 return self.expression( 4154 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4155 ) 4156 4157 self._retreat(index) 4158 return interval 4159 4160 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4161 this = self._parse_term() 4162 4163 while True: 4164 if self._match_set(self.BITWISE): 4165 this = self.expression( 4166 self.BITWISE[self._prev.token_type], 4167 this=this, 4168 expression=self._parse_term(), 4169 ) 4170 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4171 this = self.expression( 4172 exp.DPipe, 4173 this=this, 4174 expression=self._parse_term(), 4175 safe=not self.dialect.STRICT_STRING_CONCAT, 4176 ) 4177 elif self._match(TokenType.DQMARK): 4178 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4179 elif self._match_pair(TokenType.LT, TokenType.LT): 4180 this = self.expression( 4181 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4182 ) 4183 elif self._match_pair(TokenType.GT, TokenType.GT): 4184 this = self.expression( 4185 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4186 ) 4187 else: 4188 break 4189 4190 return this 4191 4192 def _parse_term(self) -> t.Optional[exp.Expression]: 4193 return self._parse_tokens(self._parse_factor, self.TERM) 4194 4195 def _parse_factor(self) -> t.Optional[exp.Expression]: 4196 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4197 this = parse_method() 4198 4199 while self._match_set(self.FACTOR): 4200 klass = self.FACTOR[self._prev.token_type] 4201 comments = self._prev_comments 4202 expression = parse_method() 4203 4204 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4205 self._retreat(self._index - 1) 4206 return this 4207 4208 this = self.expression(klass, this=this, comments=comments, expression=expression) 4209 4210 if isinstance(this, exp.Div): 4211 this.args["typed"] = self.dialect.TYPED_DIVISION 4212 this.args["safe"] = self.dialect.SAFE_DIVISION 4213 4214 return this 4215 4216 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4217 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4218 4219 def _parse_unary(self) -> t.Optional[exp.Expression]: 4220 if self._match_set(self.UNARY_PARSERS): 4221 return self.UNARY_PARSERS[self._prev.token_type](self) 4222 return self._parse_at_time_zone(self._parse_type()) 4223 4224 def _parse_type( 4225 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4226 ) -> t.Optional[exp.Expression]: 4227 interval = parse_interval and self._parse_interval() 4228 if interval: 4229 return interval 4230 4231 index = self._index 4232 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4233 4234 if data_type: 4235 index2 = self._index 4236 this = self._parse_primary() 4237 4238 if isinstance(this, exp.Literal): 4239 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4240 if parser: 4241 return parser(self, this, data_type) 4242 4243 return self.expression(exp.Cast, this=this, to=data_type) 4244 4245 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4246 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4247 # 4248 # If the index difference here is greater than 1, that means the parser itself must have 4249 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4250 # 4251 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4252 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4253 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4254 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4255 # 4256 # In these cases, we don't really want to return the converted type, but instead retreat 4257 # and try to parse a Column or Identifier in the section below. 4258 if data_type.expressions and index2 - index > 1: 4259 self._retreat(index2) 4260 return self._parse_column_ops(data_type) 4261 4262 self._retreat(index) 4263 4264 if fallback_to_identifier: 4265 return self._parse_id_var() 4266 4267 this = self._parse_column() 4268 return this and self._parse_column_ops(this) 4269 4270 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4271 this = self._parse_type() 4272 if not this: 4273 return None 4274 4275 if isinstance(this, exp.Column) and not this.table: 4276 this = exp.var(this.name.upper()) 4277 4278 return self.expression( 4279 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4280 ) 4281 4282 def _parse_types( 4283 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4284 ) -> t.Optional[exp.Expression]: 4285 index = self._index 4286 4287 this: t.Optional[exp.Expression] = None 4288 prefix = self._match_text_seq("SYSUDTLIB", ".") 4289 4290 if not self._match_set(self.TYPE_TOKENS): 4291 identifier = allow_identifiers and self._parse_id_var( 4292 any_token=False, tokens=(TokenType.VAR,) 4293 ) 4294 if identifier: 4295 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4296 4297 if len(tokens) != 1: 4298 self.raise_error("Unexpected identifier", self._prev) 4299 4300 if tokens[0].token_type in self.TYPE_TOKENS: 4301 self._prev = tokens[0] 4302 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4303 type_name = identifier.name 4304 4305 while self._match(TokenType.DOT): 4306 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4307 4308 this = exp.DataType.build(type_name, udt=True) 4309 else: 4310 self._retreat(self._index - 1) 4311 return None 4312 else: 4313 return None 4314 4315 type_token = self._prev.token_type 4316 4317 if type_token == TokenType.PSEUDO_TYPE: 4318 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4319 4320 if type_token == TokenType.OBJECT_IDENTIFIER: 4321 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4322 4323 # https://materialize.com/docs/sql/types/map/ 4324 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4325 key_type = self._parse_types( 4326 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4327 ) 4328 if not self._match(TokenType.FARROW): 4329 self._retreat(index) 4330 return None 4331 4332 value_type = self._parse_types( 4333 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4334 ) 4335 if not self._match(TokenType.R_BRACKET): 4336 self._retreat(index) 4337 return None 4338 4339 return exp.DataType( 4340 this=exp.DataType.Type.MAP, 4341 expressions=[key_type, value_type], 4342 nested=True, 4343 prefix=prefix, 4344 ) 4345 4346 nested = type_token in self.NESTED_TYPE_TOKENS 4347 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4348 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4349 expressions = None 4350 maybe_func = False 4351 4352 if self._match(TokenType.L_PAREN): 4353 if is_struct: 4354 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4355 elif nested: 4356 expressions = self._parse_csv( 4357 lambda: self._parse_types( 4358 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4359 ) 4360 ) 4361 elif type_token in self.ENUM_TYPE_TOKENS: 4362 expressions = self._parse_csv(self._parse_equality) 4363 elif is_aggregate: 4364 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4365 any_token=False, tokens=(TokenType.VAR,) 4366 ) 4367 if not func_or_ident or not self._match(TokenType.COMMA): 4368 return None 4369 expressions = self._parse_csv( 4370 lambda: self._parse_types( 4371 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4372 ) 4373 ) 4374 expressions.insert(0, func_or_ident) 4375 else: 4376 expressions = self._parse_csv(self._parse_type_size) 4377 4378 if not expressions or not self._match(TokenType.R_PAREN): 4379 self._retreat(index) 4380 return None 4381 4382 maybe_func = True 4383 4384 values: t.Optional[t.List[exp.Expression]] = None 4385 4386 if nested and self._match(TokenType.LT): 4387 if is_struct: 4388 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4389 else: 4390 expressions = self._parse_csv( 4391 lambda: self._parse_types( 4392 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4393 ) 4394 ) 4395 4396 if not self._match(TokenType.GT): 4397 self.raise_error("Expecting >") 4398 4399 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4400 values = self._parse_csv(self._parse_assignment) 4401 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4402 4403 if type_token in self.TIMESTAMPS: 4404 if self._match_text_seq("WITH", "TIME", "ZONE"): 4405 maybe_func = False 4406 tz_type = ( 4407 exp.DataType.Type.TIMETZ 4408 if type_token in self.TIMES 4409 else exp.DataType.Type.TIMESTAMPTZ 4410 ) 4411 this = exp.DataType(this=tz_type, expressions=expressions) 4412 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4413 maybe_func = False 4414 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4415 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4416 maybe_func = False 4417 elif type_token == TokenType.INTERVAL: 4418 unit = self._parse_var(upper=True) 4419 if unit: 4420 if self._match_text_seq("TO"): 4421 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4422 4423 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4424 else: 4425 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4426 4427 if maybe_func and check_func: 4428 index2 = self._index 4429 peek = self._parse_string() 4430 4431 if not peek: 4432 self._retreat(index) 4433 return None 4434 4435 self._retreat(index2) 4436 4437 if not this: 4438 if self._match_text_seq("UNSIGNED"): 4439 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4440 if not unsigned_type_token: 4441 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4442 4443 type_token = unsigned_type_token or type_token 4444 4445 this = exp.DataType( 4446 this=exp.DataType.Type[type_token.value], 4447 expressions=expressions, 4448 nested=nested, 4449 values=values, 4450 prefix=prefix, 4451 ) 4452 elif expressions: 4453 this.set("expressions", expressions) 4454 4455 # https://materialize.com/docs/sql/types/list/#type-name 4456 while self._match(TokenType.LIST): 4457 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4458 4459 index = self._index 4460 4461 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4462 matched_array = self._match(TokenType.ARRAY) 4463 4464 while self._curr: 4465 matched_l_bracket = self._match(TokenType.L_BRACKET) 4466 if not matched_l_bracket and not matched_array: 4467 break 4468 4469 matched_array = False 4470 values = self._parse_csv(self._parse_assignment) or None 4471 if values and not schema: 4472 self._retreat(index) 4473 break 4474 4475 this = exp.DataType( 4476 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4477 ) 4478 self._match(TokenType.R_BRACKET) 4479 4480 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4481 converter = self.TYPE_CONVERTERS.get(this.this) 4482 if converter: 4483 this = converter(t.cast(exp.DataType, this)) 4484 4485 return this 4486 4487 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4488 index = self._index 4489 4490 if ( 4491 self._curr 4492 and self._next 4493 and self._curr.token_type in self.TYPE_TOKENS 4494 and self._next.token_type in self.TYPE_TOKENS 4495 ): 4496 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4497 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4498 this = self._parse_id_var() 4499 else: 4500 this = ( 4501 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4502 or self._parse_id_var() 4503 ) 4504 4505 self._match(TokenType.COLON) 4506 4507 if ( 4508 type_required 4509 and not isinstance(this, exp.DataType) 4510 and not self._match_set(self.TYPE_TOKENS, advance=False) 4511 ): 4512 self._retreat(index) 4513 return self._parse_types() 4514 4515 return self._parse_column_def(this) 4516 4517 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4518 if not self._match_text_seq("AT", "TIME", "ZONE"): 4519 return this 4520 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4521 4522 def _parse_column(self) -> t.Optional[exp.Expression]: 4523 this = self._parse_column_reference() 4524 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4525 4526 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4527 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4528 4529 return column 4530 4531 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4532 this = self._parse_field() 4533 if ( 4534 not this 4535 and self._match(TokenType.VALUES, advance=False) 4536 and self.VALUES_FOLLOWED_BY_PAREN 4537 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4538 ): 4539 this = self._parse_id_var() 4540 4541 if isinstance(this, exp.Identifier): 4542 # We bubble up comments from the Identifier to the Column 4543 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4544 4545 return this 4546 4547 def _parse_colon_as_json_extract( 4548 self, this: t.Optional[exp.Expression] 4549 ) -> t.Optional[exp.Expression]: 4550 casts = [] 4551 json_path = [] 4552 4553 while self._match(TokenType.COLON): 4554 start_index = self._index 4555 4556 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4557 path = self._parse_column_ops( 4558 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4559 ) 4560 4561 # The cast :: operator has a lower precedence than the extraction operator :, so 4562 # we rearrange the AST appropriately to avoid casting the JSON path 4563 while isinstance(path, exp.Cast): 4564 casts.append(path.to) 4565 path = path.this 4566 4567 if casts: 4568 dcolon_offset = next( 4569 i 4570 for i, t in enumerate(self._tokens[start_index:]) 4571 if t.token_type == TokenType.DCOLON 4572 ) 4573 end_token = self._tokens[start_index + dcolon_offset - 1] 4574 else: 4575 end_token = self._prev 4576 4577 if path: 4578 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4579 4580 if json_path: 4581 this = self.expression( 4582 exp.JSONExtract, 4583 this=this, 4584 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4585 ) 4586 4587 while casts: 4588 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4589 4590 return this 4591 4592 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4593 this = self._parse_bracket(this) 4594 4595 while self._match_set(self.COLUMN_OPERATORS): 4596 op_token = self._prev.token_type 4597 op = self.COLUMN_OPERATORS.get(op_token) 4598 4599 if op_token == TokenType.DCOLON: 4600 field = self._parse_types() 4601 if not field: 4602 self.raise_error("Expected type") 4603 elif op and self._curr: 4604 field = self._parse_column_reference() 4605 else: 4606 field = self._parse_field(any_token=True, anonymous_func=True) 4607 4608 if isinstance(field, exp.Func) and this: 4609 # bigquery allows function calls like x.y.count(...) 4610 # SAFE.SUBSTR(...) 4611 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4612 this = exp.replace_tree( 4613 this, 4614 lambda n: ( 4615 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4616 if n.table 4617 else n.this 4618 ) 4619 if isinstance(n, exp.Column) 4620 else n, 4621 ) 4622 4623 if op: 4624 this = op(self, this, field) 4625 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4626 this = self.expression( 4627 exp.Column, 4628 this=field, 4629 table=this.this, 4630 db=this.args.get("table"), 4631 catalog=this.args.get("db"), 4632 ) 4633 else: 4634 this = self.expression(exp.Dot, this=this, expression=field) 4635 4636 this = self._parse_bracket(this) 4637 4638 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4639 4640 def _parse_primary(self) -> t.Optional[exp.Expression]: 4641 if self._match_set(self.PRIMARY_PARSERS): 4642 token_type = self._prev.token_type 4643 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4644 4645 if token_type == TokenType.STRING: 4646 expressions = [primary] 4647 while self._match(TokenType.STRING): 4648 expressions.append(exp.Literal.string(self._prev.text)) 4649 4650 if len(expressions) > 1: 4651 return self.expression(exp.Concat, expressions=expressions) 4652 4653 return primary 4654 4655 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4656 return exp.Literal.number(f"0.{self._prev.text}") 4657 4658 if self._match(TokenType.L_PAREN): 4659 comments = self._prev_comments 4660 query = self._parse_select() 4661 4662 if query: 4663 expressions = [query] 4664 else: 4665 expressions = self._parse_expressions() 4666 4667 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4668 4669 if not this and self._match(TokenType.R_PAREN, advance=False): 4670 this = self.expression(exp.Tuple) 4671 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4672 this = self._parse_subquery(this=this, parse_alias=False) 4673 elif isinstance(this, exp.Subquery): 4674 this = self._parse_subquery( 4675 this=self._parse_set_operations(this), parse_alias=False 4676 ) 4677 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4678 this = self.expression(exp.Tuple, expressions=expressions) 4679 else: 4680 this = self.expression(exp.Paren, this=this) 4681 4682 if this: 4683 this.add_comments(comments) 4684 4685 self._match_r_paren(expression=this) 4686 return this 4687 4688 return None 4689 4690 def _parse_field( 4691 self, 4692 any_token: bool = False, 4693 tokens: t.Optional[t.Collection[TokenType]] = None, 4694 anonymous_func: bool = False, 4695 ) -> t.Optional[exp.Expression]: 4696 if anonymous_func: 4697 field = ( 4698 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4699 or self._parse_primary() 4700 ) 4701 else: 4702 field = self._parse_primary() or self._parse_function( 4703 anonymous=anonymous_func, any_token=any_token 4704 ) 4705 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4706 4707 def _parse_function( 4708 self, 4709 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4710 anonymous: bool = False, 4711 optional_parens: bool = True, 4712 any_token: bool = False, 4713 ) -> t.Optional[exp.Expression]: 4714 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4715 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4716 fn_syntax = False 4717 if ( 4718 self._match(TokenType.L_BRACE, advance=False) 4719 and self._next 4720 and self._next.text.upper() == "FN" 4721 ): 4722 self._advance(2) 4723 fn_syntax = True 4724 4725 func = self._parse_function_call( 4726 functions=functions, 4727 anonymous=anonymous, 4728 optional_parens=optional_parens, 4729 any_token=any_token, 4730 ) 4731 4732 if fn_syntax: 4733 self._match(TokenType.R_BRACE) 4734 4735 return func 4736 4737 def _parse_function_call( 4738 self, 4739 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4740 anonymous: bool = False, 4741 optional_parens: bool = True, 4742 any_token: bool = False, 4743 ) -> t.Optional[exp.Expression]: 4744 if not self._curr: 4745 return None 4746 4747 comments = self._curr.comments 4748 token_type = self._curr.token_type 4749 this = self._curr.text 4750 upper = this.upper() 4751 4752 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4753 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4754 self._advance() 4755 return self._parse_window(parser(self)) 4756 4757 if not self._next or self._next.token_type != TokenType.L_PAREN: 4758 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4759 self._advance() 4760 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4761 4762 return None 4763 4764 if any_token: 4765 if token_type in self.RESERVED_TOKENS: 4766 return None 4767 elif token_type not in self.FUNC_TOKENS: 4768 return None 4769 4770 self._advance(2) 4771 4772 parser = self.FUNCTION_PARSERS.get(upper) 4773 if parser and not anonymous: 4774 this = parser(self) 4775 else: 4776 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4777 4778 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4779 this = self.expression(subquery_predicate, this=self._parse_select()) 4780 self._match_r_paren() 4781 return this 4782 4783 if functions is None: 4784 functions = self.FUNCTIONS 4785 4786 function = functions.get(upper) 4787 4788 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4789 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4790 4791 if alias: 4792 args = self._kv_to_prop_eq(args) 4793 4794 if function and not anonymous: 4795 if "dialect" in function.__code__.co_varnames: 4796 func = function(args, dialect=self.dialect) 4797 else: 4798 func = function(args) 4799 4800 func = self.validate_expression(func, args) 4801 if not self.dialect.NORMALIZE_FUNCTIONS: 4802 func.meta["name"] = this 4803 4804 this = func 4805 else: 4806 if token_type == TokenType.IDENTIFIER: 4807 this = exp.Identifier(this=this, quoted=True) 4808 this = self.expression(exp.Anonymous, this=this, expressions=args) 4809 4810 if isinstance(this, exp.Expression): 4811 this.add_comments(comments) 4812 4813 self._match_r_paren(this) 4814 return self._parse_window(this) 4815 4816 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4817 transformed = [] 4818 4819 for e in expressions: 4820 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4821 if isinstance(e, exp.Alias): 4822 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4823 4824 if not isinstance(e, exp.PropertyEQ): 4825 e = self.expression( 4826 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4827 ) 4828 4829 if isinstance(e.this, exp.Column): 4830 e.this.replace(e.this.this) 4831 4832 transformed.append(e) 4833 4834 return transformed 4835 4836 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4837 return self._parse_column_def(self._parse_id_var()) 4838 4839 def _parse_user_defined_function( 4840 self, kind: t.Optional[TokenType] = None 4841 ) -> t.Optional[exp.Expression]: 4842 this = self._parse_id_var() 4843 4844 while self._match(TokenType.DOT): 4845 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4846 4847 if not self._match(TokenType.L_PAREN): 4848 return this 4849 4850 expressions = self._parse_csv(self._parse_function_parameter) 4851 self._match_r_paren() 4852 return self.expression( 4853 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4854 ) 4855 4856 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4857 literal = self._parse_primary() 4858 if literal: 4859 return self.expression(exp.Introducer, this=token.text, expression=literal) 4860 4861 return self.expression(exp.Identifier, this=token.text) 4862 4863 def _parse_session_parameter(self) -> exp.SessionParameter: 4864 kind = None 4865 this = self._parse_id_var() or self._parse_primary() 4866 4867 if this and self._match(TokenType.DOT): 4868 kind = this.name 4869 this = self._parse_var() or self._parse_primary() 4870 4871 return self.expression(exp.SessionParameter, this=this, kind=kind) 4872 4873 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4874 return self._parse_id_var() 4875 4876 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4877 index = self._index 4878 4879 if self._match(TokenType.L_PAREN): 4880 expressions = t.cast( 4881 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4882 ) 4883 4884 if not self._match(TokenType.R_PAREN): 4885 self._retreat(index) 4886 else: 4887 expressions = [self._parse_lambda_arg()] 4888 4889 if self._match_set(self.LAMBDAS): 4890 return self.LAMBDAS[self._prev.token_type](self, expressions) 4891 4892 self._retreat(index) 4893 4894 this: t.Optional[exp.Expression] 4895 4896 if self._match(TokenType.DISTINCT): 4897 this = self.expression( 4898 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4899 ) 4900 else: 4901 this = self._parse_select_or_expression(alias=alias) 4902 4903 return self._parse_limit( 4904 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4905 ) 4906 4907 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4908 index = self._index 4909 if not self._match(TokenType.L_PAREN): 4910 return this 4911 4912 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4913 # expr can be of both types 4914 if self._match_set(self.SELECT_START_TOKENS): 4915 self._retreat(index) 4916 return this 4917 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4918 self._match_r_paren() 4919 return self.expression(exp.Schema, this=this, expressions=args) 4920 4921 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4922 return self._parse_column_def(self._parse_field(any_token=True)) 4923 4924 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4925 # column defs are not really columns, they're identifiers 4926 if isinstance(this, exp.Column): 4927 this = this.this 4928 4929 kind = self._parse_types(schema=True) 4930 4931 if self._match_text_seq("FOR", "ORDINALITY"): 4932 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4933 4934 constraints: t.List[exp.Expression] = [] 4935 4936 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4937 ("ALIAS", "MATERIALIZED") 4938 ): 4939 persisted = self._prev.text.upper() == "MATERIALIZED" 4940 constraints.append( 4941 self.expression( 4942 exp.ComputedColumnConstraint, 4943 this=self._parse_assignment(), 4944 persisted=persisted or self._match_text_seq("PERSISTED"), 4945 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4946 ) 4947 ) 4948 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4949 self._match(TokenType.ALIAS) 4950 constraints.append( 4951 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4952 ) 4953 4954 while True: 4955 constraint = self._parse_column_constraint() 4956 if not constraint: 4957 break 4958 constraints.append(constraint) 4959 4960 if not kind and not constraints: 4961 return this 4962 4963 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4964 4965 def _parse_auto_increment( 4966 self, 4967 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4968 start = None 4969 increment = None 4970 4971 if self._match(TokenType.L_PAREN, advance=False): 4972 args = self._parse_wrapped_csv(self._parse_bitwise) 4973 start = seq_get(args, 0) 4974 increment = seq_get(args, 1) 4975 elif self._match_text_seq("START"): 4976 start = self._parse_bitwise() 4977 self._match_text_seq("INCREMENT") 4978 increment = self._parse_bitwise() 4979 4980 if start and increment: 4981 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4982 4983 return exp.AutoIncrementColumnConstraint() 4984 4985 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4986 if not self._match_text_seq("REFRESH"): 4987 self._retreat(self._index - 1) 4988 return None 4989 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4990 4991 def _parse_compress(self) -> exp.CompressColumnConstraint: 4992 if self._match(TokenType.L_PAREN, advance=False): 4993 return self.expression( 4994 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4995 ) 4996 4997 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4998 4999 def _parse_generated_as_identity( 5000 self, 5001 ) -> ( 5002 exp.GeneratedAsIdentityColumnConstraint 5003 | exp.ComputedColumnConstraint 5004 | exp.GeneratedAsRowColumnConstraint 5005 ): 5006 if self._match_text_seq("BY", "DEFAULT"): 5007 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5008 this = self.expression( 5009 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5010 ) 5011 else: 5012 self._match_text_seq("ALWAYS") 5013 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5014 5015 self._match(TokenType.ALIAS) 5016 5017 if self._match_text_seq("ROW"): 5018 start = self._match_text_seq("START") 5019 if not start: 5020 self._match(TokenType.END) 5021 hidden = self._match_text_seq("HIDDEN") 5022 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5023 5024 identity = self._match_text_seq("IDENTITY") 5025 5026 if self._match(TokenType.L_PAREN): 5027 if self._match(TokenType.START_WITH): 5028 this.set("start", self._parse_bitwise()) 5029 if self._match_text_seq("INCREMENT", "BY"): 5030 this.set("increment", self._parse_bitwise()) 5031 if self._match_text_seq("MINVALUE"): 5032 this.set("minvalue", self._parse_bitwise()) 5033 if self._match_text_seq("MAXVALUE"): 5034 this.set("maxvalue", self._parse_bitwise()) 5035 5036 if self._match_text_seq("CYCLE"): 5037 this.set("cycle", True) 5038 elif self._match_text_seq("NO", "CYCLE"): 5039 this.set("cycle", False) 5040 5041 if not identity: 5042 this.set("expression", self._parse_range()) 5043 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5044 args = self._parse_csv(self._parse_bitwise) 5045 this.set("start", seq_get(args, 0)) 5046 this.set("increment", seq_get(args, 1)) 5047 5048 self._match_r_paren() 5049 5050 return this 5051 5052 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5053 self._match_text_seq("LENGTH") 5054 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5055 5056 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5057 if self._match_text_seq("NULL"): 5058 return self.expression(exp.NotNullColumnConstraint) 5059 if self._match_text_seq("CASESPECIFIC"): 5060 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5061 if self._match_text_seq("FOR", "REPLICATION"): 5062 return self.expression(exp.NotForReplicationColumnConstraint) 5063 return None 5064 5065 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5066 if self._match(TokenType.CONSTRAINT): 5067 this = self._parse_id_var() 5068 else: 5069 this = None 5070 5071 if self._match_texts(self.CONSTRAINT_PARSERS): 5072 return self.expression( 5073 exp.ColumnConstraint, 5074 this=this, 5075 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5076 ) 5077 5078 return this 5079 5080 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5081 if not self._match(TokenType.CONSTRAINT): 5082 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5083 5084 return self.expression( 5085 exp.Constraint, 5086 this=self._parse_id_var(), 5087 expressions=self._parse_unnamed_constraints(), 5088 ) 5089 5090 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5091 constraints = [] 5092 while True: 5093 constraint = self._parse_unnamed_constraint() or self._parse_function() 5094 if not constraint: 5095 break 5096 constraints.append(constraint) 5097 5098 return constraints 5099 5100 def _parse_unnamed_constraint( 5101 self, constraints: t.Optional[t.Collection[str]] = None 5102 ) -> t.Optional[exp.Expression]: 5103 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5104 constraints or self.CONSTRAINT_PARSERS 5105 ): 5106 return None 5107 5108 constraint = self._prev.text.upper() 5109 if constraint not in self.CONSTRAINT_PARSERS: 5110 self.raise_error(f"No parser found for schema constraint {constraint}.") 5111 5112 return self.CONSTRAINT_PARSERS[constraint](self) 5113 5114 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5115 self._match_text_seq("KEY") 5116 return self.expression( 5117 exp.UniqueColumnConstraint, 5118 this=self._parse_schema(self._parse_id_var(any_token=False)), 5119 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5120 on_conflict=self._parse_on_conflict(), 5121 ) 5122 5123 def _parse_key_constraint_options(self) -> t.List[str]: 5124 options = [] 5125 while True: 5126 if not self._curr: 5127 break 5128 5129 if self._match(TokenType.ON): 5130 action = None 5131 on = self._advance_any() and self._prev.text 5132 5133 if self._match_text_seq("NO", "ACTION"): 5134 action = "NO ACTION" 5135 elif self._match_text_seq("CASCADE"): 5136 action = "CASCADE" 5137 elif self._match_text_seq("RESTRICT"): 5138 action = "RESTRICT" 5139 elif self._match_pair(TokenType.SET, TokenType.NULL): 5140 action = "SET NULL" 5141 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5142 action = "SET DEFAULT" 5143 else: 5144 self.raise_error("Invalid key constraint") 5145 5146 options.append(f"ON {on} {action}") 5147 elif self._match_text_seq("NOT", "ENFORCED"): 5148 options.append("NOT ENFORCED") 5149 elif self._match_text_seq("DEFERRABLE"): 5150 options.append("DEFERRABLE") 5151 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5152 options.append("INITIALLY DEFERRED") 5153 elif self._match_text_seq("NORELY"): 5154 options.append("NORELY") 5155 elif self._match_text_seq("MATCH", "FULL"): 5156 options.append("MATCH FULL") 5157 else: 5158 break 5159 5160 return options 5161 5162 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5163 if match and not self._match(TokenType.REFERENCES): 5164 return None 5165 5166 expressions = None 5167 this = self._parse_table(schema=True) 5168 options = self._parse_key_constraint_options() 5169 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5170 5171 def _parse_foreign_key(self) -> exp.ForeignKey: 5172 expressions = self._parse_wrapped_id_vars() 5173 reference = self._parse_references() 5174 options = {} 5175 5176 while self._match(TokenType.ON): 5177 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5178 self.raise_error("Expected DELETE or UPDATE") 5179 5180 kind = self._prev.text.lower() 5181 5182 if self._match_text_seq("NO", "ACTION"): 5183 action = "NO ACTION" 5184 elif self._match(TokenType.SET): 5185 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5186 action = "SET " + self._prev.text.upper() 5187 else: 5188 self._advance() 5189 action = self._prev.text.upper() 5190 5191 options[kind] = action 5192 5193 return self.expression( 5194 exp.ForeignKey, 5195 expressions=expressions, 5196 reference=reference, 5197 **options, # type: ignore 5198 ) 5199 5200 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5201 return self._parse_field() 5202 5203 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5204 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5205 self._retreat(self._index - 1) 5206 return None 5207 5208 id_vars = self._parse_wrapped_id_vars() 5209 return self.expression( 5210 exp.PeriodForSystemTimeConstraint, 5211 this=seq_get(id_vars, 0), 5212 expression=seq_get(id_vars, 1), 5213 ) 5214 5215 def _parse_primary_key( 5216 self, wrapped_optional: bool = False, in_props: bool = False 5217 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5218 desc = ( 5219 self._match_set((TokenType.ASC, TokenType.DESC)) 5220 and self._prev.token_type == TokenType.DESC 5221 ) 5222 5223 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5224 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5225 5226 expressions = self._parse_wrapped_csv( 5227 self._parse_primary_key_part, optional=wrapped_optional 5228 ) 5229 options = self._parse_key_constraint_options() 5230 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5231 5232 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5233 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5234 5235 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5236 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5237 return this 5238 5239 bracket_kind = self._prev.token_type 5240 expressions = self._parse_csv( 5241 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5242 ) 5243 5244 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5245 self.raise_error("Expected ]") 5246 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5247 self.raise_error("Expected }") 5248 5249 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5250 if bracket_kind == TokenType.L_BRACE: 5251 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5252 elif not this: 5253 this = self.expression(exp.Array, expressions=expressions) 5254 else: 5255 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5256 if constructor_type: 5257 return self.expression(constructor_type, expressions=expressions) 5258 5259 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5260 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5261 5262 self._add_comments(this) 5263 return self._parse_bracket(this) 5264 5265 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5266 if self._match(TokenType.COLON): 5267 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5268 return this 5269 5270 def _parse_case(self) -> t.Optional[exp.Expression]: 5271 ifs = [] 5272 default = None 5273 5274 comments = self._prev_comments 5275 expression = self._parse_assignment() 5276 5277 while self._match(TokenType.WHEN): 5278 this = self._parse_assignment() 5279 self._match(TokenType.THEN) 5280 then = self._parse_assignment() 5281 ifs.append(self.expression(exp.If, this=this, true=then)) 5282 5283 if self._match(TokenType.ELSE): 5284 default = self._parse_assignment() 5285 5286 if not self._match(TokenType.END): 5287 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5288 default = exp.column("interval") 5289 else: 5290 self.raise_error("Expected END after CASE", self._prev) 5291 5292 return self.expression( 5293 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5294 ) 5295 5296 def _parse_if(self) -> t.Optional[exp.Expression]: 5297 if self._match(TokenType.L_PAREN): 5298 args = self._parse_csv(self._parse_assignment) 5299 this = self.validate_expression(exp.If.from_arg_list(args), args) 5300 self._match_r_paren() 5301 else: 5302 index = self._index - 1 5303 5304 if self.NO_PAREN_IF_COMMANDS and index == 0: 5305 return self._parse_as_command(self._prev) 5306 5307 condition = self._parse_assignment() 5308 5309 if not condition: 5310 self._retreat(index) 5311 return None 5312 5313 self._match(TokenType.THEN) 5314 true = self._parse_assignment() 5315 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5316 self._match(TokenType.END) 5317 this = self.expression(exp.If, this=condition, true=true, false=false) 5318 5319 return this 5320 5321 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5322 if not self._match_text_seq("VALUE", "FOR"): 5323 self._retreat(self._index - 1) 5324 return None 5325 5326 return self.expression( 5327 exp.NextValueFor, 5328 this=self._parse_column(), 5329 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5330 ) 5331 5332 def _parse_extract(self) -> exp.Extract: 5333 this = self._parse_function() or self._parse_var() or self._parse_type() 5334 5335 if self._match(TokenType.FROM): 5336 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5337 5338 if not self._match(TokenType.COMMA): 5339 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5340 5341 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5342 5343 def _parse_gap_fill(self) -> exp.GapFill: 5344 self._match(TokenType.TABLE) 5345 this = self._parse_table() 5346 5347 self._match(TokenType.COMMA) 5348 args = [this, *self._parse_csv(self._parse_lambda)] 5349 5350 gap_fill = exp.GapFill.from_arg_list(args) 5351 return self.validate_expression(gap_fill, args) 5352 5353 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5354 this = self._parse_assignment() 5355 5356 if not self._match(TokenType.ALIAS): 5357 if self._match(TokenType.COMMA): 5358 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5359 5360 self.raise_error("Expected AS after CAST") 5361 5362 fmt = None 5363 to = self._parse_types() 5364 5365 if self._match(TokenType.FORMAT): 5366 fmt_string = self._parse_string() 5367 fmt = self._parse_at_time_zone(fmt_string) 5368 5369 if not to: 5370 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5371 if to.this in exp.DataType.TEMPORAL_TYPES: 5372 this = self.expression( 5373 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5374 this=this, 5375 format=exp.Literal.string( 5376 format_time( 5377 fmt_string.this if fmt_string else "", 5378 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5379 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5380 ) 5381 ), 5382 ) 5383 5384 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5385 this.set("zone", fmt.args["zone"]) 5386 return this 5387 elif not to: 5388 self.raise_error("Expected TYPE after CAST") 5389 elif isinstance(to, exp.Identifier): 5390 to = exp.DataType.build(to.name, udt=True) 5391 elif to.this == exp.DataType.Type.CHAR: 5392 if self._match(TokenType.CHARACTER_SET): 5393 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5394 5395 return self.expression( 5396 exp.Cast if strict else exp.TryCast, 5397 this=this, 5398 to=to, 5399 format=fmt, 5400 safe=safe, 5401 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5402 ) 5403 5404 def _parse_string_agg(self) -> exp.Expression: 5405 if self._match(TokenType.DISTINCT): 5406 args: t.List[t.Optional[exp.Expression]] = [ 5407 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5408 ] 5409 if self._match(TokenType.COMMA): 5410 args.extend(self._parse_csv(self._parse_assignment)) 5411 else: 5412 args = self._parse_csv(self._parse_assignment) # type: ignore 5413 5414 index = self._index 5415 if not self._match(TokenType.R_PAREN) and args: 5416 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5417 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5418 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5419 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5420 5421 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5422 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5423 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5424 if not self._match_text_seq("WITHIN", "GROUP"): 5425 self._retreat(index) 5426 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5427 5428 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5429 order = self._parse_order(this=seq_get(args, 0)) 5430 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5431 5432 def _parse_convert( 5433 self, strict: bool, safe: t.Optional[bool] = None 5434 ) -> t.Optional[exp.Expression]: 5435 this = self._parse_bitwise() 5436 5437 if self._match(TokenType.USING): 5438 to: t.Optional[exp.Expression] = self.expression( 5439 exp.CharacterSet, this=self._parse_var() 5440 ) 5441 elif self._match(TokenType.COMMA): 5442 to = self._parse_types() 5443 else: 5444 to = None 5445 5446 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5447 5448 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5449 """ 5450 There are generally two variants of the DECODE function: 5451 5452 - DECODE(bin, charset) 5453 - DECODE(expression, search, result [, search, result] ... [, default]) 5454 5455 The second variant will always be parsed into a CASE expression. Note that NULL 5456 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5457 instead of relying on pattern matching. 5458 """ 5459 args = self._parse_csv(self._parse_assignment) 5460 5461 if len(args) < 3: 5462 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5463 5464 expression, *expressions = args 5465 if not expression: 5466 return None 5467 5468 ifs = [] 5469 for search, result in zip(expressions[::2], expressions[1::2]): 5470 if not search or not result: 5471 return None 5472 5473 if isinstance(search, exp.Literal): 5474 ifs.append( 5475 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5476 ) 5477 elif isinstance(search, exp.Null): 5478 ifs.append( 5479 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5480 ) 5481 else: 5482 cond = exp.or_( 5483 exp.EQ(this=expression.copy(), expression=search), 5484 exp.and_( 5485 exp.Is(this=expression.copy(), expression=exp.Null()), 5486 exp.Is(this=search.copy(), expression=exp.Null()), 5487 copy=False, 5488 ), 5489 copy=False, 5490 ) 5491 ifs.append(exp.If(this=cond, true=result)) 5492 5493 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5494 5495 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5496 self._match_text_seq("KEY") 5497 key = self._parse_column() 5498 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5499 self._match_text_seq("VALUE") 5500 value = self._parse_bitwise() 5501 5502 if not key and not value: 5503 return None 5504 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5505 5506 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5507 if not this or not self._match_text_seq("FORMAT", "JSON"): 5508 return this 5509 5510 return self.expression(exp.FormatJson, this=this) 5511 5512 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5513 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5514 for value in values: 5515 if self._match_text_seq(value, "ON", on): 5516 return f"{value} ON {on}" 5517 5518 return None 5519 5520 @t.overload 5521 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5522 5523 @t.overload 5524 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5525 5526 def _parse_json_object(self, agg=False): 5527 star = self._parse_star() 5528 expressions = ( 5529 [star] 5530 if star 5531 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5532 ) 5533 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5534 5535 unique_keys = None 5536 if self._match_text_seq("WITH", "UNIQUE"): 5537 unique_keys = True 5538 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5539 unique_keys = False 5540 5541 self._match_text_seq("KEYS") 5542 5543 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5544 self._parse_type() 5545 ) 5546 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5547 5548 return self.expression( 5549 exp.JSONObjectAgg if agg else exp.JSONObject, 5550 expressions=expressions, 5551 null_handling=null_handling, 5552 unique_keys=unique_keys, 5553 return_type=return_type, 5554 encoding=encoding, 5555 ) 5556 5557 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5558 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5559 if not self._match_text_seq("NESTED"): 5560 this = self._parse_id_var() 5561 kind = self._parse_types(allow_identifiers=False) 5562 nested = None 5563 else: 5564 this = None 5565 kind = None 5566 nested = True 5567 5568 path = self._match_text_seq("PATH") and self._parse_string() 5569 nested_schema = nested and self._parse_json_schema() 5570 5571 return self.expression( 5572 exp.JSONColumnDef, 5573 this=this, 5574 kind=kind, 5575 path=path, 5576 nested_schema=nested_schema, 5577 ) 5578 5579 def _parse_json_schema(self) -> exp.JSONSchema: 5580 self._match_text_seq("COLUMNS") 5581 return self.expression( 5582 exp.JSONSchema, 5583 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5584 ) 5585 5586 def _parse_json_table(self) -> exp.JSONTable: 5587 this = self._parse_format_json(self._parse_bitwise()) 5588 path = self._match(TokenType.COMMA) and self._parse_string() 5589 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5590 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5591 schema = self._parse_json_schema() 5592 5593 return exp.JSONTable( 5594 this=this, 5595 schema=schema, 5596 path=path, 5597 error_handling=error_handling, 5598 empty_handling=empty_handling, 5599 ) 5600 5601 def _parse_match_against(self) -> exp.MatchAgainst: 5602 expressions = self._parse_csv(self._parse_column) 5603 5604 self._match_text_seq(")", "AGAINST", "(") 5605 5606 this = self._parse_string() 5607 5608 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5609 modifier = "IN NATURAL LANGUAGE MODE" 5610 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5611 modifier = f"{modifier} WITH QUERY EXPANSION" 5612 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5613 modifier = "IN BOOLEAN MODE" 5614 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5615 modifier = "WITH QUERY EXPANSION" 5616 else: 5617 modifier = None 5618 5619 return self.expression( 5620 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5621 ) 5622 5623 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5624 def _parse_open_json(self) -> exp.OpenJSON: 5625 this = self._parse_bitwise() 5626 path = self._match(TokenType.COMMA) and self._parse_string() 5627 5628 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5629 this = self._parse_field(any_token=True) 5630 kind = self._parse_types() 5631 path = self._parse_string() 5632 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5633 5634 return self.expression( 5635 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5636 ) 5637 5638 expressions = None 5639 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5640 self._match_l_paren() 5641 expressions = self._parse_csv(_parse_open_json_column_def) 5642 5643 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5644 5645 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5646 args = self._parse_csv(self._parse_bitwise) 5647 5648 if self._match(TokenType.IN): 5649 return self.expression( 5650 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5651 ) 5652 5653 if haystack_first: 5654 haystack = seq_get(args, 0) 5655 needle = seq_get(args, 1) 5656 else: 5657 needle = seq_get(args, 0) 5658 haystack = seq_get(args, 1) 5659 5660 return self.expression( 5661 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5662 ) 5663 5664 def _parse_predict(self) -> exp.Predict: 5665 self._match_text_seq("MODEL") 5666 this = self._parse_table() 5667 5668 self._match(TokenType.COMMA) 5669 self._match_text_seq("TABLE") 5670 5671 return self.expression( 5672 exp.Predict, 5673 this=this, 5674 expression=self._parse_table(), 5675 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5676 ) 5677 5678 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5679 args = self._parse_csv(self._parse_table) 5680 return exp.JoinHint(this=func_name.upper(), expressions=args) 5681 5682 def _parse_substring(self) -> exp.Substring: 5683 # Postgres supports the form: substring(string [from int] [for int]) 5684 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5685 5686 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5687 5688 if self._match(TokenType.FROM): 5689 args.append(self._parse_bitwise()) 5690 if self._match(TokenType.FOR): 5691 if len(args) == 1: 5692 args.append(exp.Literal.number(1)) 5693 args.append(self._parse_bitwise()) 5694 5695 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5696 5697 def _parse_trim(self) -> exp.Trim: 5698 # https://www.w3resource.com/sql/character-functions/trim.php 5699 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5700 5701 position = None 5702 collation = None 5703 expression = None 5704 5705 if self._match_texts(self.TRIM_TYPES): 5706 position = self._prev.text.upper() 5707 5708 this = self._parse_bitwise() 5709 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5710 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5711 expression = self._parse_bitwise() 5712 5713 if invert_order: 5714 this, expression = expression, this 5715 5716 if self._match(TokenType.COLLATE): 5717 collation = self._parse_bitwise() 5718 5719 return self.expression( 5720 exp.Trim, this=this, position=position, expression=expression, collation=collation 5721 ) 5722 5723 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5724 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5725 5726 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5727 return self._parse_window(self._parse_id_var(), alias=True) 5728 5729 def _parse_respect_or_ignore_nulls( 5730 self, this: t.Optional[exp.Expression] 5731 ) -> t.Optional[exp.Expression]: 5732 if self._match_text_seq("IGNORE", "NULLS"): 5733 return self.expression(exp.IgnoreNulls, this=this) 5734 if self._match_text_seq("RESPECT", "NULLS"): 5735 return self.expression(exp.RespectNulls, this=this) 5736 return this 5737 5738 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5739 if self._match(TokenType.HAVING): 5740 self._match_texts(("MAX", "MIN")) 5741 max = self._prev.text.upper() != "MIN" 5742 return self.expression( 5743 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5744 ) 5745 5746 return this 5747 5748 def _parse_window( 5749 self, this: t.Optional[exp.Expression], alias: bool = False 5750 ) -> t.Optional[exp.Expression]: 5751 func = this 5752 comments = func.comments if isinstance(func, exp.Expression) else None 5753 5754 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5755 self._match(TokenType.WHERE) 5756 this = self.expression( 5757 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5758 ) 5759 self._match_r_paren() 5760 5761 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5762 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5763 if self._match_text_seq("WITHIN", "GROUP"): 5764 order = self._parse_wrapped(self._parse_order) 5765 this = self.expression(exp.WithinGroup, this=this, expression=order) 5766 5767 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5768 # Some dialects choose to implement and some do not. 5769 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5770 5771 # There is some code above in _parse_lambda that handles 5772 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5773 5774 # The below changes handle 5775 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5776 5777 # Oracle allows both formats 5778 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5779 # and Snowflake chose to do the same for familiarity 5780 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5781 if isinstance(this, exp.AggFunc): 5782 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5783 5784 if ignore_respect and ignore_respect is not this: 5785 ignore_respect.replace(ignore_respect.this) 5786 this = self.expression(ignore_respect.__class__, this=this) 5787 5788 this = self._parse_respect_or_ignore_nulls(this) 5789 5790 # bigquery select from window x AS (partition by ...) 5791 if alias: 5792 over = None 5793 self._match(TokenType.ALIAS) 5794 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5795 return this 5796 else: 5797 over = self._prev.text.upper() 5798 5799 if comments and isinstance(func, exp.Expression): 5800 func.pop_comments() 5801 5802 if not self._match(TokenType.L_PAREN): 5803 return self.expression( 5804 exp.Window, 5805 comments=comments, 5806 this=this, 5807 alias=self._parse_id_var(False), 5808 over=over, 5809 ) 5810 5811 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5812 5813 first = self._match(TokenType.FIRST) 5814 if self._match_text_seq("LAST"): 5815 first = False 5816 5817 partition, order = self._parse_partition_and_order() 5818 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5819 5820 if kind: 5821 self._match(TokenType.BETWEEN) 5822 start = self._parse_window_spec() 5823 self._match(TokenType.AND) 5824 end = self._parse_window_spec() 5825 5826 spec = self.expression( 5827 exp.WindowSpec, 5828 kind=kind, 5829 start=start["value"], 5830 start_side=start["side"], 5831 end=end["value"], 5832 end_side=end["side"], 5833 ) 5834 else: 5835 spec = None 5836 5837 self._match_r_paren() 5838 5839 window = self.expression( 5840 exp.Window, 5841 comments=comments, 5842 this=this, 5843 partition_by=partition, 5844 order=order, 5845 spec=spec, 5846 alias=window_alias, 5847 over=over, 5848 first=first, 5849 ) 5850 5851 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5852 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5853 return self._parse_window(window, alias=alias) 5854 5855 return window 5856 5857 def _parse_partition_and_order( 5858 self, 5859 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5860 return self._parse_partition_by(), self._parse_order() 5861 5862 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5863 self._match(TokenType.BETWEEN) 5864 5865 return { 5866 "value": ( 5867 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5868 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5869 or self._parse_bitwise() 5870 ), 5871 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5872 } 5873 5874 def _parse_alias( 5875 self, this: t.Optional[exp.Expression], explicit: bool = False 5876 ) -> t.Optional[exp.Expression]: 5877 any_token = self._match(TokenType.ALIAS) 5878 comments = self._prev_comments or [] 5879 5880 if explicit and not any_token: 5881 return this 5882 5883 if self._match(TokenType.L_PAREN): 5884 aliases = self.expression( 5885 exp.Aliases, 5886 comments=comments, 5887 this=this, 5888 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5889 ) 5890 self._match_r_paren(aliases) 5891 return aliases 5892 5893 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5894 self.STRING_ALIASES and self._parse_string_as_identifier() 5895 ) 5896 5897 if alias: 5898 comments.extend(alias.pop_comments()) 5899 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5900 column = this.this 5901 5902 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5903 if not this.comments and column and column.comments: 5904 this.comments = column.pop_comments() 5905 5906 return this 5907 5908 def _parse_id_var( 5909 self, 5910 any_token: bool = True, 5911 tokens: t.Optional[t.Collection[TokenType]] = None, 5912 ) -> t.Optional[exp.Expression]: 5913 expression = self._parse_identifier() 5914 if not expression and ( 5915 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5916 ): 5917 quoted = self._prev.token_type == TokenType.STRING 5918 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5919 5920 return expression 5921 5922 def _parse_string(self) -> t.Optional[exp.Expression]: 5923 if self._match_set(self.STRING_PARSERS): 5924 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5925 return self._parse_placeholder() 5926 5927 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5928 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5929 5930 def _parse_number(self) -> t.Optional[exp.Expression]: 5931 if self._match_set(self.NUMERIC_PARSERS): 5932 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5933 return self._parse_placeholder() 5934 5935 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5936 if self._match(TokenType.IDENTIFIER): 5937 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5938 return self._parse_placeholder() 5939 5940 def _parse_var( 5941 self, 5942 any_token: bool = False, 5943 tokens: t.Optional[t.Collection[TokenType]] = None, 5944 upper: bool = False, 5945 ) -> t.Optional[exp.Expression]: 5946 if ( 5947 (any_token and self._advance_any()) 5948 or self._match(TokenType.VAR) 5949 or (self._match_set(tokens) if tokens else False) 5950 ): 5951 return self.expression( 5952 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5953 ) 5954 return self._parse_placeholder() 5955 5956 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5957 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5958 self._advance() 5959 return self._prev 5960 return None 5961 5962 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5963 return self._parse_var() or self._parse_string() 5964 5965 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5966 return self._parse_primary() or self._parse_var(any_token=True) 5967 5968 def _parse_null(self) -> t.Optional[exp.Expression]: 5969 if self._match_set(self.NULL_TOKENS): 5970 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5971 return self._parse_placeholder() 5972 5973 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5974 if self._match(TokenType.TRUE): 5975 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5976 if self._match(TokenType.FALSE): 5977 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5978 return self._parse_placeholder() 5979 5980 def _parse_star(self) -> t.Optional[exp.Expression]: 5981 if self._match(TokenType.STAR): 5982 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5983 return self._parse_placeholder() 5984 5985 def _parse_parameter(self) -> exp.Parameter: 5986 this = self._parse_identifier() or self._parse_primary_or_var() 5987 return self.expression(exp.Parameter, this=this) 5988 5989 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5990 if self._match_set(self.PLACEHOLDER_PARSERS): 5991 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5992 if placeholder: 5993 return placeholder 5994 self._advance(-1) 5995 return None 5996 5997 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5998 if not self._match_texts(keywords): 5999 return None 6000 if self._match(TokenType.L_PAREN, advance=False): 6001 return self._parse_wrapped_csv(self._parse_expression) 6002 6003 expression = self._parse_expression() 6004 return [expression] if expression else None 6005 6006 def _parse_csv( 6007 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6008 ) -> t.List[exp.Expression]: 6009 parse_result = parse_method() 6010 items = [parse_result] if parse_result is not None else [] 6011 6012 while self._match(sep): 6013 self._add_comments(parse_result) 6014 parse_result = parse_method() 6015 if parse_result is not None: 6016 items.append(parse_result) 6017 6018 return items 6019 6020 def _parse_tokens( 6021 self, parse_method: t.Callable, expressions: t.Dict 6022 ) -> t.Optional[exp.Expression]: 6023 this = parse_method() 6024 6025 while self._match_set(expressions): 6026 this = self.expression( 6027 expressions[self._prev.token_type], 6028 this=this, 6029 comments=self._prev_comments, 6030 expression=parse_method(), 6031 ) 6032 6033 return this 6034 6035 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6036 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6037 6038 def _parse_wrapped_csv( 6039 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6040 ) -> t.List[exp.Expression]: 6041 return self._parse_wrapped( 6042 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6043 ) 6044 6045 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6046 wrapped = self._match(TokenType.L_PAREN) 6047 if not wrapped and not optional: 6048 self.raise_error("Expecting (") 6049 parse_result = parse_method() 6050 if wrapped: 6051 self._match_r_paren() 6052 return parse_result 6053 6054 def _parse_expressions(self) -> t.List[exp.Expression]: 6055 return self._parse_csv(self._parse_expression) 6056 6057 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6058 return self._parse_select() or self._parse_set_operations( 6059 self._parse_expression() if alias else self._parse_assignment() 6060 ) 6061 6062 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6063 return self._parse_query_modifiers( 6064 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6065 ) 6066 6067 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6068 this = None 6069 if self._match_texts(self.TRANSACTION_KIND): 6070 this = self._prev.text 6071 6072 self._match_texts(("TRANSACTION", "WORK")) 6073 6074 modes = [] 6075 while True: 6076 mode = [] 6077 while self._match(TokenType.VAR): 6078 mode.append(self._prev.text) 6079 6080 if mode: 6081 modes.append(" ".join(mode)) 6082 if not self._match(TokenType.COMMA): 6083 break 6084 6085 return self.expression(exp.Transaction, this=this, modes=modes) 6086 6087 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6088 chain = None 6089 savepoint = None 6090 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6091 6092 self._match_texts(("TRANSACTION", "WORK")) 6093 6094 if self._match_text_seq("TO"): 6095 self._match_text_seq("SAVEPOINT") 6096 savepoint = self._parse_id_var() 6097 6098 if self._match(TokenType.AND): 6099 chain = not self._match_text_seq("NO") 6100 self._match_text_seq("CHAIN") 6101 6102 if is_rollback: 6103 return self.expression(exp.Rollback, savepoint=savepoint) 6104 6105 return self.expression(exp.Commit, chain=chain) 6106 6107 def _parse_refresh(self) -> exp.Refresh: 6108 self._match(TokenType.TABLE) 6109 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6110 6111 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6112 if not self._match_text_seq("ADD"): 6113 return None 6114 6115 self._match(TokenType.COLUMN) 6116 exists_column = self._parse_exists(not_=True) 6117 expression = self._parse_field_def() 6118 6119 if expression: 6120 expression.set("exists", exists_column) 6121 6122 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6123 if self._match_texts(("FIRST", "AFTER")): 6124 position = self._prev.text 6125 column_position = self.expression( 6126 exp.ColumnPosition, this=self._parse_column(), position=position 6127 ) 6128 expression.set("position", column_position) 6129 6130 return expression 6131 6132 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6133 drop = self._match(TokenType.DROP) and self._parse_drop() 6134 if drop and not isinstance(drop, exp.Command): 6135 drop.set("kind", drop.args.get("kind", "COLUMN")) 6136 return drop 6137 6138 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6139 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6140 return self.expression( 6141 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6142 ) 6143 6144 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6145 index = self._index - 1 6146 6147 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6148 return self._parse_csv( 6149 lambda: self.expression( 6150 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6151 ) 6152 ) 6153 6154 self._retreat(index) 6155 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6156 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6157 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6158 6159 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6160 if self._match_texts(self.ALTER_ALTER_PARSERS): 6161 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6162 6163 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6164 # keyword after ALTER we default to parsing this statement 6165 self._match(TokenType.COLUMN) 6166 column = self._parse_field(any_token=True) 6167 6168 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6169 return self.expression(exp.AlterColumn, this=column, drop=True) 6170 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6171 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6172 if self._match(TokenType.COMMENT): 6173 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6174 if self._match_text_seq("DROP", "NOT", "NULL"): 6175 return self.expression( 6176 exp.AlterColumn, 6177 this=column, 6178 drop=True, 6179 allow_null=True, 6180 ) 6181 if self._match_text_seq("SET", "NOT", "NULL"): 6182 return self.expression( 6183 exp.AlterColumn, 6184 this=column, 6185 allow_null=False, 6186 ) 6187 self._match_text_seq("SET", "DATA") 6188 self._match_text_seq("TYPE") 6189 return self.expression( 6190 exp.AlterColumn, 6191 this=column, 6192 dtype=self._parse_types(), 6193 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6194 using=self._match(TokenType.USING) and self._parse_assignment(), 6195 ) 6196 6197 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6198 if self._match_texts(("ALL", "EVEN", "AUTO")): 6199 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6200 6201 self._match_text_seq("KEY", "DISTKEY") 6202 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6203 6204 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6205 if compound: 6206 self._match_text_seq("SORTKEY") 6207 6208 if self._match(TokenType.L_PAREN, advance=False): 6209 return self.expression( 6210 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6211 ) 6212 6213 self._match_texts(("AUTO", "NONE")) 6214 return self.expression( 6215 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6216 ) 6217 6218 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6219 index = self._index - 1 6220 6221 partition_exists = self._parse_exists() 6222 if self._match(TokenType.PARTITION, advance=False): 6223 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6224 6225 self._retreat(index) 6226 return self._parse_csv(self._parse_drop_column) 6227 6228 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6229 if self._match(TokenType.COLUMN): 6230 exists = self._parse_exists() 6231 old_column = self._parse_column() 6232 to = self._match_text_seq("TO") 6233 new_column = self._parse_column() 6234 6235 if old_column is None or to is None or new_column is None: 6236 return None 6237 6238 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6239 6240 self._match_text_seq("TO") 6241 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6242 6243 def _parse_alter_table_set(self) -> exp.AlterSet: 6244 alter_set = self.expression(exp.AlterSet) 6245 6246 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6247 "TABLE", "PROPERTIES" 6248 ): 6249 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6250 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6251 alter_set.set("expressions", [self._parse_assignment()]) 6252 elif self._match_texts(("LOGGED", "UNLOGGED")): 6253 alter_set.set("option", exp.var(self._prev.text.upper())) 6254 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6255 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6256 elif self._match_text_seq("LOCATION"): 6257 alter_set.set("location", self._parse_field()) 6258 elif self._match_text_seq("ACCESS", "METHOD"): 6259 alter_set.set("access_method", self._parse_field()) 6260 elif self._match_text_seq("TABLESPACE"): 6261 alter_set.set("tablespace", self._parse_field()) 6262 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6263 alter_set.set("file_format", [self._parse_field()]) 6264 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6265 alter_set.set("file_format", self._parse_wrapped_options()) 6266 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6267 alter_set.set("copy_options", self._parse_wrapped_options()) 6268 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6269 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6270 else: 6271 if self._match_text_seq("SERDE"): 6272 alter_set.set("serde", self._parse_field()) 6273 6274 alter_set.set("expressions", [self._parse_properties()]) 6275 6276 return alter_set 6277 6278 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6279 start = self._prev 6280 6281 if not self._match(TokenType.TABLE): 6282 return self._parse_as_command(start) 6283 6284 exists = self._parse_exists() 6285 only = self._match_text_seq("ONLY") 6286 this = self._parse_table(schema=True) 6287 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6288 6289 if self._next: 6290 self._advance() 6291 6292 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6293 if parser: 6294 actions = ensure_list(parser(self)) 6295 options = self._parse_csv(self._parse_property) 6296 6297 if not self._curr and actions: 6298 return self.expression( 6299 exp.AlterTable, 6300 this=this, 6301 exists=exists, 6302 actions=actions, 6303 only=only, 6304 options=options, 6305 cluster=cluster, 6306 ) 6307 6308 return self._parse_as_command(start) 6309 6310 def _parse_merge(self) -> exp.Merge: 6311 self._match(TokenType.INTO) 6312 target = self._parse_table() 6313 6314 if target and self._match(TokenType.ALIAS, advance=False): 6315 target.set("alias", self._parse_table_alias()) 6316 6317 self._match(TokenType.USING) 6318 using = self._parse_table() 6319 6320 self._match(TokenType.ON) 6321 on = self._parse_assignment() 6322 6323 return self.expression( 6324 exp.Merge, 6325 this=target, 6326 using=using, 6327 on=on, 6328 expressions=self._parse_when_matched(), 6329 ) 6330 6331 def _parse_when_matched(self) -> t.List[exp.When]: 6332 whens = [] 6333 6334 while self._match(TokenType.WHEN): 6335 matched = not self._match(TokenType.NOT) 6336 self._match_text_seq("MATCHED") 6337 source = ( 6338 False 6339 if self._match_text_seq("BY", "TARGET") 6340 else self._match_text_seq("BY", "SOURCE") 6341 ) 6342 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6343 6344 self._match(TokenType.THEN) 6345 6346 if self._match(TokenType.INSERT): 6347 _this = self._parse_star() 6348 if _this: 6349 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6350 else: 6351 then = self.expression( 6352 exp.Insert, 6353 this=self._parse_value(), 6354 expression=self._match_text_seq("VALUES") and self._parse_value(), 6355 ) 6356 elif self._match(TokenType.UPDATE): 6357 expressions = self._parse_star() 6358 if expressions: 6359 then = self.expression(exp.Update, expressions=expressions) 6360 else: 6361 then = self.expression( 6362 exp.Update, 6363 expressions=self._match(TokenType.SET) 6364 and self._parse_csv(self._parse_equality), 6365 ) 6366 elif self._match(TokenType.DELETE): 6367 then = self.expression(exp.Var, this=self._prev.text) 6368 else: 6369 then = None 6370 6371 whens.append( 6372 self.expression( 6373 exp.When, 6374 matched=matched, 6375 source=source, 6376 condition=condition, 6377 then=then, 6378 ) 6379 ) 6380 return whens 6381 6382 def _parse_show(self) -> t.Optional[exp.Expression]: 6383 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6384 if parser: 6385 return parser(self) 6386 return self._parse_as_command(self._prev) 6387 6388 def _parse_set_item_assignment( 6389 self, kind: t.Optional[str] = None 6390 ) -> t.Optional[exp.Expression]: 6391 index = self._index 6392 6393 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6394 return self._parse_set_transaction(global_=kind == "GLOBAL") 6395 6396 left = self._parse_primary() or self._parse_column() 6397 assignment_delimiter = self._match_texts(("=", "TO")) 6398 6399 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6400 self._retreat(index) 6401 return None 6402 6403 right = self._parse_statement() or self._parse_id_var() 6404 if isinstance(right, (exp.Column, exp.Identifier)): 6405 right = exp.var(right.name) 6406 6407 this = self.expression(exp.EQ, this=left, expression=right) 6408 return self.expression(exp.SetItem, this=this, kind=kind) 6409 6410 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6411 self._match_text_seq("TRANSACTION") 6412 characteristics = self._parse_csv( 6413 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6414 ) 6415 return self.expression( 6416 exp.SetItem, 6417 expressions=characteristics, 6418 kind="TRANSACTION", 6419 **{"global": global_}, # type: ignore 6420 ) 6421 6422 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6423 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6424 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6425 6426 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6427 index = self._index 6428 set_ = self.expression( 6429 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6430 ) 6431 6432 if self._curr: 6433 self._retreat(index) 6434 return self._parse_as_command(self._prev) 6435 6436 return set_ 6437 6438 def _parse_var_from_options( 6439 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6440 ) -> t.Optional[exp.Var]: 6441 start = self._curr 6442 if not start: 6443 return None 6444 6445 option = start.text.upper() 6446 continuations = options.get(option) 6447 6448 index = self._index 6449 self._advance() 6450 for keywords in continuations or []: 6451 if isinstance(keywords, str): 6452 keywords = (keywords,) 6453 6454 if self._match_text_seq(*keywords): 6455 option = f"{option} {' '.join(keywords)}" 6456 break 6457 else: 6458 if continuations or continuations is None: 6459 if raise_unmatched: 6460 self.raise_error(f"Unknown option {option}") 6461 6462 self._retreat(index) 6463 return None 6464 6465 return exp.var(option) 6466 6467 def _parse_as_command(self, start: Token) -> exp.Command: 6468 while self._curr: 6469 self._advance() 6470 text = self._find_sql(start, self._prev) 6471 size = len(start.text) 6472 self._warn_unsupported() 6473 return exp.Command(this=text[:size], expression=text[size:]) 6474 6475 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6476 settings = [] 6477 6478 self._match_l_paren() 6479 kind = self._parse_id_var() 6480 6481 if self._match(TokenType.L_PAREN): 6482 while True: 6483 key = self._parse_id_var() 6484 value = self._parse_primary() 6485 6486 if not key and value is None: 6487 break 6488 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6489 self._match(TokenType.R_PAREN) 6490 6491 self._match_r_paren() 6492 6493 return self.expression( 6494 exp.DictProperty, 6495 this=this, 6496 kind=kind.this if kind else None, 6497 settings=settings, 6498 ) 6499 6500 def _parse_dict_range(self, this: str) -> exp.DictRange: 6501 self._match_l_paren() 6502 has_min = self._match_text_seq("MIN") 6503 if has_min: 6504 min = self._parse_var() or self._parse_primary() 6505 self._match_text_seq("MAX") 6506 max = self._parse_var() or self._parse_primary() 6507 else: 6508 max = self._parse_var() or self._parse_primary() 6509 min = exp.Literal.number(0) 6510 self._match_r_paren() 6511 return self.expression(exp.DictRange, this=this, min=min, max=max) 6512 6513 def _parse_comprehension( 6514 self, this: t.Optional[exp.Expression] 6515 ) -> t.Optional[exp.Comprehension]: 6516 index = self._index 6517 expression = self._parse_column() 6518 if not self._match(TokenType.IN): 6519 self._retreat(index - 1) 6520 return None 6521 iterator = self._parse_column() 6522 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6523 return self.expression( 6524 exp.Comprehension, 6525 this=this, 6526 expression=expression, 6527 iterator=iterator, 6528 condition=condition, 6529 ) 6530 6531 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6532 if self._match(TokenType.HEREDOC_STRING): 6533 return self.expression(exp.Heredoc, this=self._prev.text) 6534 6535 if not self._match_text_seq("$"): 6536 return None 6537 6538 tags = ["$"] 6539 tag_text = None 6540 6541 if self._is_connected(): 6542 self._advance() 6543 tags.append(self._prev.text.upper()) 6544 else: 6545 self.raise_error("No closing $ found") 6546 6547 if tags[-1] != "$": 6548 if self._is_connected() and self._match_text_seq("$"): 6549 tag_text = tags[-1] 6550 tags.append("$") 6551 else: 6552 self.raise_error("No closing $ found") 6553 6554 heredoc_start = self._curr 6555 6556 while self._curr: 6557 if self._match_text_seq(*tags, advance=False): 6558 this = self._find_sql(heredoc_start, self._prev) 6559 self._advance(len(tags)) 6560 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6561 6562 self._advance() 6563 6564 self.raise_error(f"No closing {''.join(tags)} found") 6565 return None 6566 6567 def _find_parser( 6568 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6569 ) -> t.Optional[t.Callable]: 6570 if not self._curr: 6571 return None 6572 6573 index = self._index 6574 this = [] 6575 while True: 6576 # The current token might be multiple words 6577 curr = self._curr.text.upper() 6578 key = curr.split(" ") 6579 this.append(curr) 6580 6581 self._advance() 6582 result, trie = in_trie(trie, key) 6583 if result == TrieResult.FAILED: 6584 break 6585 6586 if result == TrieResult.EXISTS: 6587 subparser = parsers[" ".join(this)] 6588 return subparser 6589 6590 self._retreat(index) 6591 return None 6592 6593 def _match(self, token_type, advance=True, expression=None): 6594 if not self._curr: 6595 return None 6596 6597 if self._curr.token_type == token_type: 6598 if advance: 6599 self._advance() 6600 self._add_comments(expression) 6601 return True 6602 6603 return None 6604 6605 def _match_set(self, types, advance=True): 6606 if not self._curr: 6607 return None 6608 6609 if self._curr.token_type in types: 6610 if advance: 6611 self._advance() 6612 return True 6613 6614 return None 6615 6616 def _match_pair(self, token_type_a, token_type_b, advance=True): 6617 if not self._curr or not self._next: 6618 return None 6619 6620 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6621 if advance: 6622 self._advance(2) 6623 return True 6624 6625 return None 6626 6627 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6628 if not self._match(TokenType.L_PAREN, expression=expression): 6629 self.raise_error("Expecting (") 6630 6631 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6632 if not self._match(TokenType.R_PAREN, expression=expression): 6633 self.raise_error("Expecting )") 6634 6635 def _match_texts(self, texts, advance=True): 6636 if self._curr and self._curr.text.upper() in texts: 6637 if advance: 6638 self._advance() 6639 return True 6640 return None 6641 6642 def _match_text_seq(self, *texts, advance=True): 6643 index = self._index 6644 for text in texts: 6645 if self._curr and self._curr.text.upper() == text: 6646 self._advance() 6647 else: 6648 self._retreat(index) 6649 return None 6650 6651 if not advance: 6652 self._retreat(index) 6653 6654 return True 6655 6656 def _replace_lambda( 6657 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6658 ) -> t.Optional[exp.Expression]: 6659 if not node: 6660 return node 6661 6662 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6663 6664 for column in node.find_all(exp.Column): 6665 typ = lambda_types.get(column.parts[0].name) 6666 if typ is not None: 6667 dot_or_id = column.to_dot() if column.table else column.this 6668 6669 if typ: 6670 dot_or_id = self.expression( 6671 exp.Cast, 6672 this=dot_or_id, 6673 to=typ, 6674 ) 6675 6676 parent = column.parent 6677 6678 while isinstance(parent, exp.Dot): 6679 if not isinstance(parent.parent, exp.Dot): 6680 parent.replace(dot_or_id) 6681 break 6682 parent = parent.parent 6683 else: 6684 if column is node: 6685 node = dot_or_id 6686 else: 6687 column.replace(dot_or_id) 6688 return node 6689 6690 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6691 start = self._prev 6692 6693 # Not to be confused with TRUNCATE(number, decimals) function call 6694 if self._match(TokenType.L_PAREN): 6695 self._retreat(self._index - 2) 6696 return self._parse_function() 6697 6698 # Clickhouse supports TRUNCATE DATABASE as well 6699 is_database = self._match(TokenType.DATABASE) 6700 6701 self._match(TokenType.TABLE) 6702 6703 exists = self._parse_exists(not_=False) 6704 6705 expressions = self._parse_csv( 6706 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6707 ) 6708 6709 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6710 6711 if self._match_text_seq("RESTART", "IDENTITY"): 6712 identity = "RESTART" 6713 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6714 identity = "CONTINUE" 6715 else: 6716 identity = None 6717 6718 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6719 option = self._prev.text 6720 else: 6721 option = None 6722 6723 partition = self._parse_partition() 6724 6725 # Fallback case 6726 if self._curr: 6727 return self._parse_as_command(start) 6728 6729 return self.expression( 6730 exp.TruncateTable, 6731 expressions=expressions, 6732 is_database=is_database, 6733 exists=exists, 6734 cluster=cluster, 6735 identity=identity, 6736 option=option, 6737 partition=partition, 6738 ) 6739 6740 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6741 this = self._parse_ordered(self._parse_opclass) 6742 6743 if not self._match(TokenType.WITH): 6744 return this 6745 6746 op = self._parse_var(any_token=True) 6747 6748 return self.expression(exp.WithOperator, this=this, op=op) 6749 6750 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6751 self._match(TokenType.EQ) 6752 self._match(TokenType.L_PAREN) 6753 6754 opts: t.List[t.Optional[exp.Expression]] = [] 6755 while self._curr and not self._match(TokenType.R_PAREN): 6756 if self._match_text_seq("FORMAT_NAME", "="): 6757 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6758 # so we parse it separately to use _parse_field() 6759 prop = self.expression( 6760 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6761 ) 6762 opts.append(prop) 6763 else: 6764 opts.append(self._parse_property()) 6765 6766 self._match(TokenType.COMMA) 6767 6768 return opts 6769 6770 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6771 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6772 6773 options = [] 6774 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6775 option = self._parse_var(any_token=True) 6776 prev = self._prev.text.upper() 6777 6778 # Different dialects might separate options and values by white space, "=" and "AS" 6779 self._match(TokenType.EQ) 6780 self._match(TokenType.ALIAS) 6781 6782 param = self.expression(exp.CopyParameter, this=option) 6783 6784 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6785 TokenType.L_PAREN, advance=False 6786 ): 6787 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6788 param.set("expressions", self._parse_wrapped_options()) 6789 elif prev == "FILE_FORMAT": 6790 # T-SQL's external file format case 6791 param.set("expression", self._parse_field()) 6792 else: 6793 param.set("expression", self._parse_unquoted_field()) 6794 6795 options.append(param) 6796 self._match(sep) 6797 6798 return options 6799 6800 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6801 expr = self.expression(exp.Credentials) 6802 6803 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6804 expr.set("storage", self._parse_field()) 6805 if self._match_text_seq("CREDENTIALS"): 6806 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6807 creds = ( 6808 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6809 ) 6810 expr.set("credentials", creds) 6811 if self._match_text_seq("ENCRYPTION"): 6812 expr.set("encryption", self._parse_wrapped_options()) 6813 if self._match_text_seq("IAM_ROLE"): 6814 expr.set("iam_role", self._parse_field()) 6815 if self._match_text_seq("REGION"): 6816 expr.set("region", self._parse_field()) 6817 6818 return expr 6819 6820 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6821 return self._parse_field() 6822 6823 def _parse_copy(self) -> exp.Copy | exp.Command: 6824 start = self._prev 6825 6826 self._match(TokenType.INTO) 6827 6828 this = ( 6829 self._parse_select(nested=True, parse_subquery_alias=False) 6830 if self._match(TokenType.L_PAREN, advance=False) 6831 else self._parse_table(schema=True) 6832 ) 6833 6834 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6835 6836 files = self._parse_csv(self._parse_file_location) 6837 credentials = self._parse_credentials() 6838 6839 self._match_text_seq("WITH") 6840 6841 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6842 6843 # Fallback case 6844 if self._curr: 6845 return self._parse_as_command(start) 6846 6847 return self.expression( 6848 exp.Copy, 6849 this=this, 6850 kind=kind, 6851 credentials=credentials, 6852 files=files, 6853 params=params, 6854 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
122class Parser(metaclass=_Parser): 123 """ 124 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 125 126 Args: 127 error_level: The desired error level. 128 Default: ErrorLevel.IMMEDIATE 129 error_message_context: The amount of context to capture from a query string when displaying 130 the error message (in number of characters). 131 Default: 100 132 max_errors: Maximum number of error messages to include in a raised ParseError. 133 This is only relevant if error_level is ErrorLevel.RAISE. 134 Default: 3 135 """ 136 137 FUNCTIONS: t.Dict[str, t.Callable] = { 138 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 139 "CONCAT": lambda args, dialect: exp.Concat( 140 expressions=args, 141 safe=not dialect.STRICT_STRING_CONCAT, 142 coalesce=dialect.CONCAT_COALESCE, 143 ), 144 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 145 expressions=args, 146 safe=not dialect.STRICT_STRING_CONCAT, 147 coalesce=dialect.CONCAT_COALESCE, 148 ), 149 "DATE_TO_DATE_STR": lambda args: exp.Cast( 150 this=seq_get(args, 0), 151 to=exp.DataType(this=exp.DataType.Type.TEXT), 152 ), 153 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 154 "HEX": build_hex, 155 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 156 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 157 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 158 "LIKE": build_like, 159 "LOG": build_logarithm, 160 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 161 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 162 "LOWER": build_lower, 163 "MOD": build_mod, 164 "TIME_TO_TIME_STR": lambda args: exp.Cast( 165 this=seq_get(args, 0), 166 to=exp.DataType(this=exp.DataType.Type.TEXT), 167 ), 168 "TO_HEX": build_hex, 169 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 170 this=exp.Cast( 171 this=seq_get(args, 0), 172 to=exp.DataType(this=exp.DataType.Type.TEXT), 173 ), 174 start=exp.Literal.number(1), 175 length=exp.Literal.number(10), 176 ), 177 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 178 "UPPER": build_upper, 179 "VAR_MAP": build_var_map, 180 } 181 182 NO_PAREN_FUNCTIONS = { 183 TokenType.CURRENT_DATE: exp.CurrentDate, 184 TokenType.CURRENT_DATETIME: exp.CurrentDate, 185 TokenType.CURRENT_TIME: exp.CurrentTime, 186 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 187 TokenType.CURRENT_USER: exp.CurrentUser, 188 } 189 190 STRUCT_TYPE_TOKENS = { 191 TokenType.NESTED, 192 TokenType.OBJECT, 193 TokenType.STRUCT, 194 } 195 196 NESTED_TYPE_TOKENS = { 197 TokenType.ARRAY, 198 TokenType.LIST, 199 TokenType.LOWCARDINALITY, 200 TokenType.MAP, 201 TokenType.NULLABLE, 202 *STRUCT_TYPE_TOKENS, 203 } 204 205 ENUM_TYPE_TOKENS = { 206 TokenType.ENUM, 207 TokenType.ENUM8, 208 TokenType.ENUM16, 209 } 210 211 AGGREGATE_TYPE_TOKENS = { 212 TokenType.AGGREGATEFUNCTION, 213 TokenType.SIMPLEAGGREGATEFUNCTION, 214 } 215 216 TYPE_TOKENS = { 217 TokenType.BIT, 218 TokenType.BOOLEAN, 219 TokenType.TINYINT, 220 TokenType.UTINYINT, 221 TokenType.SMALLINT, 222 TokenType.USMALLINT, 223 TokenType.INT, 224 TokenType.UINT, 225 TokenType.BIGINT, 226 TokenType.UBIGINT, 227 TokenType.INT128, 228 TokenType.UINT128, 229 TokenType.INT256, 230 TokenType.UINT256, 231 TokenType.MEDIUMINT, 232 TokenType.UMEDIUMINT, 233 TokenType.FIXEDSTRING, 234 TokenType.FLOAT, 235 TokenType.DOUBLE, 236 TokenType.CHAR, 237 TokenType.NCHAR, 238 TokenType.VARCHAR, 239 TokenType.NVARCHAR, 240 TokenType.BPCHAR, 241 TokenType.TEXT, 242 TokenType.MEDIUMTEXT, 243 TokenType.LONGTEXT, 244 TokenType.MEDIUMBLOB, 245 TokenType.LONGBLOB, 246 TokenType.BINARY, 247 TokenType.VARBINARY, 248 TokenType.JSON, 249 TokenType.JSONB, 250 TokenType.INTERVAL, 251 TokenType.TINYBLOB, 252 TokenType.TINYTEXT, 253 TokenType.TIME, 254 TokenType.TIMETZ, 255 TokenType.TIMESTAMP, 256 TokenType.TIMESTAMP_S, 257 TokenType.TIMESTAMP_MS, 258 TokenType.TIMESTAMP_NS, 259 TokenType.TIMESTAMPTZ, 260 TokenType.TIMESTAMPLTZ, 261 TokenType.TIMESTAMPNTZ, 262 TokenType.DATETIME, 263 TokenType.DATETIME64, 264 TokenType.DATE, 265 TokenType.DATE32, 266 TokenType.INT4RANGE, 267 TokenType.INT4MULTIRANGE, 268 TokenType.INT8RANGE, 269 TokenType.INT8MULTIRANGE, 270 TokenType.NUMRANGE, 271 TokenType.NUMMULTIRANGE, 272 TokenType.TSRANGE, 273 TokenType.TSMULTIRANGE, 274 TokenType.TSTZRANGE, 275 TokenType.TSTZMULTIRANGE, 276 TokenType.DATERANGE, 277 TokenType.DATEMULTIRANGE, 278 TokenType.DECIMAL, 279 TokenType.UDECIMAL, 280 TokenType.BIGDECIMAL, 281 TokenType.UUID, 282 TokenType.GEOGRAPHY, 283 TokenType.GEOMETRY, 284 TokenType.HLLSKETCH, 285 TokenType.HSTORE, 286 TokenType.PSEUDO_TYPE, 287 TokenType.SUPER, 288 TokenType.SERIAL, 289 TokenType.SMALLSERIAL, 290 TokenType.BIGSERIAL, 291 TokenType.XML, 292 TokenType.YEAR, 293 TokenType.UNIQUEIDENTIFIER, 294 TokenType.USERDEFINED, 295 TokenType.MONEY, 296 TokenType.SMALLMONEY, 297 TokenType.ROWVERSION, 298 TokenType.IMAGE, 299 TokenType.VARIANT, 300 TokenType.OBJECT, 301 TokenType.OBJECT_IDENTIFIER, 302 TokenType.INET, 303 TokenType.IPADDRESS, 304 TokenType.IPPREFIX, 305 TokenType.IPV4, 306 TokenType.IPV6, 307 TokenType.UNKNOWN, 308 TokenType.NULL, 309 TokenType.NAME, 310 TokenType.TDIGEST, 311 *ENUM_TYPE_TOKENS, 312 *NESTED_TYPE_TOKENS, 313 *AGGREGATE_TYPE_TOKENS, 314 } 315 316 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 317 TokenType.BIGINT: TokenType.UBIGINT, 318 TokenType.INT: TokenType.UINT, 319 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 320 TokenType.SMALLINT: TokenType.USMALLINT, 321 TokenType.TINYINT: TokenType.UTINYINT, 322 TokenType.DECIMAL: TokenType.UDECIMAL, 323 } 324 325 SUBQUERY_PREDICATES = { 326 TokenType.ANY: exp.Any, 327 TokenType.ALL: exp.All, 328 TokenType.EXISTS: exp.Exists, 329 TokenType.SOME: exp.Any, 330 } 331 332 RESERVED_TOKENS = { 333 *Tokenizer.SINGLE_TOKENS.values(), 334 TokenType.SELECT, 335 } - {TokenType.IDENTIFIER} 336 337 DB_CREATABLES = { 338 TokenType.DATABASE, 339 TokenType.DICTIONARY, 340 TokenType.MODEL, 341 TokenType.SCHEMA, 342 TokenType.SEQUENCE, 343 TokenType.STORAGE_INTEGRATION, 344 TokenType.TABLE, 345 TokenType.TAG, 346 TokenType.VIEW, 347 TokenType.WAREHOUSE, 348 TokenType.STREAMLIT, 349 } 350 351 CREATABLES = { 352 TokenType.COLUMN, 353 TokenType.CONSTRAINT, 354 TokenType.FOREIGN_KEY, 355 TokenType.FUNCTION, 356 TokenType.INDEX, 357 TokenType.PROCEDURE, 358 *DB_CREATABLES, 359 } 360 361 # Tokens that can represent identifiers 362 ID_VAR_TOKENS = { 363 TokenType.VAR, 364 TokenType.ANTI, 365 TokenType.APPLY, 366 TokenType.ASC, 367 TokenType.ASOF, 368 TokenType.AUTO_INCREMENT, 369 TokenType.BEGIN, 370 TokenType.BPCHAR, 371 TokenType.CACHE, 372 TokenType.CASE, 373 TokenType.COLLATE, 374 TokenType.COMMAND, 375 TokenType.COMMENT, 376 TokenType.COMMIT, 377 TokenType.CONSTRAINT, 378 TokenType.COPY, 379 TokenType.DEFAULT, 380 TokenType.DELETE, 381 TokenType.DESC, 382 TokenType.DESCRIBE, 383 TokenType.DICTIONARY, 384 TokenType.DIV, 385 TokenType.END, 386 TokenType.EXECUTE, 387 TokenType.ESCAPE, 388 TokenType.FALSE, 389 TokenType.FIRST, 390 TokenType.FILTER, 391 TokenType.FINAL, 392 TokenType.FORMAT, 393 TokenType.FULL, 394 TokenType.IDENTIFIER, 395 TokenType.IS, 396 TokenType.ISNULL, 397 TokenType.INTERVAL, 398 TokenType.KEEP, 399 TokenType.KILL, 400 TokenType.LEFT, 401 TokenType.LOAD, 402 TokenType.MERGE, 403 TokenType.NATURAL, 404 TokenType.NEXT, 405 TokenType.OFFSET, 406 TokenType.OPERATOR, 407 TokenType.ORDINALITY, 408 TokenType.OVERLAPS, 409 TokenType.OVERWRITE, 410 TokenType.PARTITION, 411 TokenType.PERCENT, 412 TokenType.PIVOT, 413 TokenType.PRAGMA, 414 TokenType.RANGE, 415 TokenType.RECURSIVE, 416 TokenType.REFERENCES, 417 TokenType.REFRESH, 418 TokenType.REPLACE, 419 TokenType.RIGHT, 420 TokenType.ROLLUP, 421 TokenType.ROW, 422 TokenType.ROWS, 423 TokenType.SEMI, 424 TokenType.SET, 425 TokenType.SETTINGS, 426 TokenType.SHOW, 427 TokenType.TEMPORARY, 428 TokenType.TOP, 429 TokenType.TRUE, 430 TokenType.TRUNCATE, 431 TokenType.UNIQUE, 432 TokenType.UNNEST, 433 TokenType.UNPIVOT, 434 TokenType.UPDATE, 435 TokenType.USE, 436 TokenType.VOLATILE, 437 TokenType.WINDOW, 438 *CREATABLES, 439 *SUBQUERY_PREDICATES, 440 *TYPE_TOKENS, 441 *NO_PAREN_FUNCTIONS, 442 } 443 444 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 445 446 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 447 TokenType.ANTI, 448 TokenType.APPLY, 449 TokenType.ASOF, 450 TokenType.FULL, 451 TokenType.LEFT, 452 TokenType.LOCK, 453 TokenType.NATURAL, 454 TokenType.OFFSET, 455 TokenType.RIGHT, 456 TokenType.SEMI, 457 TokenType.WINDOW, 458 } 459 460 ALIAS_TOKENS = ID_VAR_TOKENS 461 462 ARRAY_CONSTRUCTORS = { 463 "ARRAY": exp.Array, 464 "LIST": exp.List, 465 } 466 467 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 468 469 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 470 471 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 472 473 FUNC_TOKENS = { 474 TokenType.COLLATE, 475 TokenType.COMMAND, 476 TokenType.CURRENT_DATE, 477 TokenType.CURRENT_DATETIME, 478 TokenType.CURRENT_TIMESTAMP, 479 TokenType.CURRENT_TIME, 480 TokenType.CURRENT_USER, 481 TokenType.FILTER, 482 TokenType.FIRST, 483 TokenType.FORMAT, 484 TokenType.GLOB, 485 TokenType.IDENTIFIER, 486 TokenType.INDEX, 487 TokenType.ISNULL, 488 TokenType.ILIKE, 489 TokenType.INSERT, 490 TokenType.LIKE, 491 TokenType.MERGE, 492 TokenType.OFFSET, 493 TokenType.PRIMARY_KEY, 494 TokenType.RANGE, 495 TokenType.REPLACE, 496 TokenType.RLIKE, 497 TokenType.ROW, 498 TokenType.UNNEST, 499 TokenType.VAR, 500 TokenType.LEFT, 501 TokenType.RIGHT, 502 TokenType.SEQUENCE, 503 TokenType.DATE, 504 TokenType.DATETIME, 505 TokenType.TABLE, 506 TokenType.TIMESTAMP, 507 TokenType.TIMESTAMPTZ, 508 TokenType.TRUNCATE, 509 TokenType.WINDOW, 510 TokenType.XOR, 511 *TYPE_TOKENS, 512 *SUBQUERY_PREDICATES, 513 } 514 515 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 516 TokenType.AND: exp.And, 517 } 518 519 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 520 TokenType.COLON_EQ: exp.PropertyEQ, 521 } 522 523 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 524 TokenType.OR: exp.Or, 525 } 526 527 EQUALITY = { 528 TokenType.EQ: exp.EQ, 529 TokenType.NEQ: exp.NEQ, 530 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 531 } 532 533 COMPARISON = { 534 TokenType.GT: exp.GT, 535 TokenType.GTE: exp.GTE, 536 TokenType.LT: exp.LT, 537 TokenType.LTE: exp.LTE, 538 } 539 540 BITWISE = { 541 TokenType.AMP: exp.BitwiseAnd, 542 TokenType.CARET: exp.BitwiseXor, 543 TokenType.PIPE: exp.BitwiseOr, 544 } 545 546 TERM = { 547 TokenType.DASH: exp.Sub, 548 TokenType.PLUS: exp.Add, 549 TokenType.MOD: exp.Mod, 550 TokenType.COLLATE: exp.Collate, 551 } 552 553 FACTOR = { 554 TokenType.DIV: exp.IntDiv, 555 TokenType.LR_ARROW: exp.Distance, 556 TokenType.SLASH: exp.Div, 557 TokenType.STAR: exp.Mul, 558 } 559 560 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 561 562 TIMES = { 563 TokenType.TIME, 564 TokenType.TIMETZ, 565 } 566 567 TIMESTAMPS = { 568 TokenType.TIMESTAMP, 569 TokenType.TIMESTAMPTZ, 570 TokenType.TIMESTAMPLTZ, 571 *TIMES, 572 } 573 574 SET_OPERATIONS = { 575 TokenType.UNION, 576 TokenType.INTERSECT, 577 TokenType.EXCEPT, 578 } 579 580 JOIN_METHODS = { 581 TokenType.ASOF, 582 TokenType.NATURAL, 583 TokenType.POSITIONAL, 584 } 585 586 JOIN_SIDES = { 587 TokenType.LEFT, 588 TokenType.RIGHT, 589 TokenType.FULL, 590 } 591 592 JOIN_KINDS = { 593 TokenType.ANTI, 594 TokenType.CROSS, 595 TokenType.INNER, 596 TokenType.OUTER, 597 TokenType.SEMI, 598 TokenType.STRAIGHT_JOIN, 599 } 600 601 JOIN_HINTS: t.Set[str] = set() 602 603 LAMBDAS = { 604 TokenType.ARROW: lambda self, expressions: self.expression( 605 exp.Lambda, 606 this=self._replace_lambda( 607 self._parse_assignment(), 608 expressions, 609 ), 610 expressions=expressions, 611 ), 612 TokenType.FARROW: lambda self, expressions: self.expression( 613 exp.Kwarg, 614 this=exp.var(expressions[0].name), 615 expression=self._parse_assignment(), 616 ), 617 } 618 619 COLUMN_OPERATORS = { 620 TokenType.DOT: None, 621 TokenType.DCOLON: lambda self, this, to: self.expression( 622 exp.Cast if self.STRICT_CAST else exp.TryCast, 623 this=this, 624 to=to, 625 ), 626 TokenType.ARROW: lambda self, this, path: self.expression( 627 exp.JSONExtract, 628 this=this, 629 expression=self.dialect.to_json_path(path), 630 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 631 ), 632 TokenType.DARROW: lambda self, this, path: self.expression( 633 exp.JSONExtractScalar, 634 this=this, 635 expression=self.dialect.to_json_path(path), 636 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 637 ), 638 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 639 exp.JSONBExtract, 640 this=this, 641 expression=path, 642 ), 643 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 644 exp.JSONBExtractScalar, 645 this=this, 646 expression=path, 647 ), 648 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 649 exp.JSONBContains, 650 this=this, 651 expression=key, 652 ), 653 } 654 655 EXPRESSION_PARSERS = { 656 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 657 exp.Column: lambda self: self._parse_column(), 658 exp.Condition: lambda self: self._parse_assignment(), 659 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 660 exp.Expression: lambda self: self._parse_expression(), 661 exp.From: lambda self: self._parse_from(joins=True), 662 exp.Group: lambda self: self._parse_group(), 663 exp.Having: lambda self: self._parse_having(), 664 exp.Identifier: lambda self: self._parse_id_var(), 665 exp.Join: lambda self: self._parse_join(), 666 exp.Lambda: lambda self: self._parse_lambda(), 667 exp.Lateral: lambda self: self._parse_lateral(), 668 exp.Limit: lambda self: self._parse_limit(), 669 exp.Offset: lambda self: self._parse_offset(), 670 exp.Order: lambda self: self._parse_order(), 671 exp.Ordered: lambda self: self._parse_ordered(), 672 exp.Properties: lambda self: self._parse_properties(), 673 exp.Qualify: lambda self: self._parse_qualify(), 674 exp.Returning: lambda self: self._parse_returning(), 675 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 676 exp.Table: lambda self: self._parse_table_parts(), 677 exp.TableAlias: lambda self: self._parse_table_alias(), 678 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 679 exp.Where: lambda self: self._parse_where(), 680 exp.Window: lambda self: self._parse_named_window(), 681 exp.With: lambda self: self._parse_with(), 682 "JOIN_TYPE": lambda self: self._parse_join_parts(), 683 } 684 685 STATEMENT_PARSERS = { 686 TokenType.ALTER: lambda self: self._parse_alter(), 687 TokenType.BEGIN: lambda self: self._parse_transaction(), 688 TokenType.CACHE: lambda self: self._parse_cache(), 689 TokenType.COMMENT: lambda self: self._parse_comment(), 690 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 691 TokenType.COPY: lambda self: self._parse_copy(), 692 TokenType.CREATE: lambda self: self._parse_create(), 693 TokenType.DELETE: lambda self: self._parse_delete(), 694 TokenType.DESC: lambda self: self._parse_describe(), 695 TokenType.DESCRIBE: lambda self: self._parse_describe(), 696 TokenType.DROP: lambda self: self._parse_drop(), 697 TokenType.INSERT: lambda self: self._parse_insert(), 698 TokenType.KILL: lambda self: self._parse_kill(), 699 TokenType.LOAD: lambda self: self._parse_load(), 700 TokenType.MERGE: lambda self: self._parse_merge(), 701 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 702 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 703 TokenType.REFRESH: lambda self: self._parse_refresh(), 704 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 705 TokenType.SET: lambda self: self._parse_set(), 706 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 707 TokenType.UNCACHE: lambda self: self._parse_uncache(), 708 TokenType.UPDATE: lambda self: self._parse_update(), 709 TokenType.USE: lambda self: self.expression( 710 exp.Use, 711 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 712 this=self._parse_table(schema=False), 713 ), 714 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 715 } 716 717 UNARY_PARSERS = { 718 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 719 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 720 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 721 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 722 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 723 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 724 } 725 726 STRING_PARSERS = { 727 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 728 exp.RawString, this=token.text 729 ), 730 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 731 exp.National, this=token.text 732 ), 733 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 734 TokenType.STRING: lambda self, token: self.expression( 735 exp.Literal, this=token.text, is_string=True 736 ), 737 TokenType.UNICODE_STRING: lambda self, token: self.expression( 738 exp.UnicodeString, 739 this=token.text, 740 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 741 ), 742 } 743 744 NUMERIC_PARSERS = { 745 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 746 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 747 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 748 TokenType.NUMBER: lambda self, token: self.expression( 749 exp.Literal, this=token.text, is_string=False 750 ), 751 } 752 753 PRIMARY_PARSERS = { 754 **STRING_PARSERS, 755 **NUMERIC_PARSERS, 756 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 757 TokenType.NULL: lambda self, _: self.expression(exp.Null), 758 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 759 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 760 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 761 TokenType.STAR: lambda self, _: self.expression( 762 exp.Star, 763 **{ 764 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 765 "replace": self._parse_star_op("REPLACE"), 766 "rename": self._parse_star_op("RENAME"), 767 }, 768 ), 769 } 770 771 PLACEHOLDER_PARSERS = { 772 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 773 TokenType.PARAMETER: lambda self: self._parse_parameter(), 774 TokenType.COLON: lambda self: ( 775 self.expression(exp.Placeholder, this=self._prev.text) 776 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 777 else None 778 ), 779 } 780 781 RANGE_PARSERS = { 782 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 783 TokenType.GLOB: binary_range_parser(exp.Glob), 784 TokenType.ILIKE: binary_range_parser(exp.ILike), 785 TokenType.IN: lambda self, this: self._parse_in(this), 786 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 787 TokenType.IS: lambda self, this: self._parse_is(this), 788 TokenType.LIKE: binary_range_parser(exp.Like), 789 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 790 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 791 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 792 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 793 } 794 795 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 796 "ALLOWED_VALUES": lambda self: self.expression( 797 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 798 ), 799 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 800 "AUTO": lambda self: self._parse_auto_property(), 801 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 802 "BACKUP": lambda self: self.expression( 803 exp.BackupProperty, this=self._parse_var(any_token=True) 804 ), 805 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 806 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 807 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 808 "CHECKSUM": lambda self: self._parse_checksum(), 809 "CLUSTER BY": lambda self: self._parse_cluster(), 810 "CLUSTERED": lambda self: self._parse_clustered_by(), 811 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 812 exp.CollateProperty, **kwargs 813 ), 814 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 815 "CONTAINS": lambda self: self._parse_contains_property(), 816 "COPY": lambda self: self._parse_copy_property(), 817 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 818 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 819 "DEFINER": lambda self: self._parse_definer(), 820 "DETERMINISTIC": lambda self: self.expression( 821 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 822 ), 823 "DISTKEY": lambda self: self._parse_distkey(), 824 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 825 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 826 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 827 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 828 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 829 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 830 "FREESPACE": lambda self: self._parse_freespace(), 831 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 832 "HEAP": lambda self: self.expression(exp.HeapProperty), 833 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 834 "IMMUTABLE": lambda self: self.expression( 835 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 836 ), 837 "INHERITS": lambda self: self.expression( 838 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 839 ), 840 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 841 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 842 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 843 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 844 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 845 "LIKE": lambda self: self._parse_create_like(), 846 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 847 "LOCK": lambda self: self._parse_locking(), 848 "LOCKING": lambda self: self._parse_locking(), 849 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 850 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 851 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 852 "MODIFIES": lambda self: self._parse_modifies_property(), 853 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 854 "NO": lambda self: self._parse_no_property(), 855 "ON": lambda self: self._parse_on_property(), 856 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 857 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 858 "PARTITION": lambda self: self._parse_partitioned_of(), 859 "PARTITION BY": lambda self: self._parse_partitioned_by(), 860 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 861 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 862 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 863 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 864 "READS": lambda self: self._parse_reads_property(), 865 "REMOTE": lambda self: self._parse_remote_with_connection(), 866 "RETURNS": lambda self: self._parse_returns(), 867 "STRICT": lambda self: self.expression(exp.StrictProperty), 868 "ROW": lambda self: self._parse_row(), 869 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 870 "SAMPLE": lambda self: self.expression( 871 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 872 ), 873 "SECURE": lambda self: self.expression(exp.SecureProperty), 874 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 875 "SETTINGS": lambda self: self.expression( 876 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 877 ), 878 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 879 "SORTKEY": lambda self: self._parse_sortkey(), 880 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 881 "STABLE": lambda self: self.expression( 882 exp.StabilityProperty, this=exp.Literal.string("STABLE") 883 ), 884 "STORED": lambda self: self._parse_stored(), 885 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 886 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 887 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 888 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 889 "TO": lambda self: self._parse_to_table(), 890 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 891 "TRANSFORM": lambda self: self.expression( 892 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 893 ), 894 "TTL": lambda self: self._parse_ttl(), 895 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 896 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 897 "VOLATILE": lambda self: self._parse_volatile_property(), 898 "WITH": lambda self: self._parse_with_property(), 899 } 900 901 CONSTRAINT_PARSERS = { 902 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 903 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 904 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 905 "CHARACTER SET": lambda self: self.expression( 906 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 907 ), 908 "CHECK": lambda self: self.expression( 909 exp.CheckColumnConstraint, 910 this=self._parse_wrapped(self._parse_assignment), 911 enforced=self._match_text_seq("ENFORCED"), 912 ), 913 "COLLATE": lambda self: self.expression( 914 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 915 ), 916 "COMMENT": lambda self: self.expression( 917 exp.CommentColumnConstraint, this=self._parse_string() 918 ), 919 "COMPRESS": lambda self: self._parse_compress(), 920 "CLUSTERED": lambda self: self.expression( 921 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 922 ), 923 "NONCLUSTERED": lambda self: self.expression( 924 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 925 ), 926 "DEFAULT": lambda self: self.expression( 927 exp.DefaultColumnConstraint, this=self._parse_bitwise() 928 ), 929 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 930 "EPHEMERAL": lambda self: self.expression( 931 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 932 ), 933 "EXCLUDE": lambda self: self.expression( 934 exp.ExcludeColumnConstraint, this=self._parse_index_params() 935 ), 936 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 937 "FORMAT": lambda self: self.expression( 938 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 939 ), 940 "GENERATED": lambda self: self._parse_generated_as_identity(), 941 "IDENTITY": lambda self: self._parse_auto_increment(), 942 "INLINE": lambda self: self._parse_inline(), 943 "LIKE": lambda self: self._parse_create_like(), 944 "NOT": lambda self: self._parse_not_constraint(), 945 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 946 "ON": lambda self: ( 947 self._match(TokenType.UPDATE) 948 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 949 ) 950 or self.expression(exp.OnProperty, this=self._parse_id_var()), 951 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 952 "PERIOD": lambda self: self._parse_period_for_system_time(), 953 "PRIMARY KEY": lambda self: self._parse_primary_key(), 954 "REFERENCES": lambda self: self._parse_references(match=False), 955 "TITLE": lambda self: self.expression( 956 exp.TitleColumnConstraint, this=self._parse_var_or_string() 957 ), 958 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 959 "UNIQUE": lambda self: self._parse_unique(), 960 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 961 "WITH": lambda self: self.expression( 962 exp.Properties, expressions=self._parse_wrapped_properties() 963 ), 964 } 965 966 ALTER_PARSERS = { 967 "ADD": lambda self: self._parse_alter_table_add(), 968 "ALTER": lambda self: self._parse_alter_table_alter(), 969 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 970 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 971 "DROP": lambda self: self._parse_alter_table_drop(), 972 "RENAME": lambda self: self._parse_alter_table_rename(), 973 "SET": lambda self: self._parse_alter_table_set(), 974 } 975 976 ALTER_ALTER_PARSERS = { 977 "DISTKEY": lambda self: self._parse_alter_diststyle(), 978 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 979 "SORTKEY": lambda self: self._parse_alter_sortkey(), 980 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 981 } 982 983 SCHEMA_UNNAMED_CONSTRAINTS = { 984 "CHECK", 985 "EXCLUDE", 986 "FOREIGN KEY", 987 "LIKE", 988 "PERIOD", 989 "PRIMARY KEY", 990 "UNIQUE", 991 } 992 993 NO_PAREN_FUNCTION_PARSERS = { 994 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 995 "CASE": lambda self: self._parse_case(), 996 "IF": lambda self: self._parse_if(), 997 "NEXT": lambda self: self._parse_next_value_for(), 998 } 999 1000 INVALID_FUNC_NAME_TOKENS = { 1001 TokenType.IDENTIFIER, 1002 TokenType.STRING, 1003 } 1004 1005 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1006 1007 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1008 1009 FUNCTION_PARSERS = { 1010 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1011 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1012 "DECODE": lambda self: self._parse_decode(), 1013 "EXTRACT": lambda self: self._parse_extract(), 1014 "GAP_FILL": lambda self: self._parse_gap_fill(), 1015 "JSON_OBJECT": lambda self: self._parse_json_object(), 1016 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1017 "JSON_TABLE": lambda self: self._parse_json_table(), 1018 "MATCH": lambda self: self._parse_match_against(), 1019 "OPENJSON": lambda self: self._parse_open_json(), 1020 "POSITION": lambda self: self._parse_position(), 1021 "PREDICT": lambda self: self._parse_predict(), 1022 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1023 "STRING_AGG": lambda self: self._parse_string_agg(), 1024 "SUBSTRING": lambda self: self._parse_substring(), 1025 "TRIM": lambda self: self._parse_trim(), 1026 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1027 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1028 } 1029 1030 QUERY_MODIFIER_PARSERS = { 1031 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1032 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1033 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1034 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1035 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1036 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1037 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1038 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1039 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1040 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1041 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1042 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1043 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1044 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1045 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1046 TokenType.CLUSTER_BY: lambda self: ( 1047 "cluster", 1048 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1049 ), 1050 TokenType.DISTRIBUTE_BY: lambda self: ( 1051 "distribute", 1052 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1053 ), 1054 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1055 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1056 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1057 } 1058 1059 SET_PARSERS = { 1060 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1061 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1062 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1063 "TRANSACTION": lambda self: self._parse_set_transaction(), 1064 } 1065 1066 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1067 1068 TYPE_LITERAL_PARSERS = { 1069 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1070 } 1071 1072 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1073 1074 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1075 1076 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1077 1078 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1079 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1080 "ISOLATION": ( 1081 ("LEVEL", "REPEATABLE", "READ"), 1082 ("LEVEL", "READ", "COMMITTED"), 1083 ("LEVEL", "READ", "UNCOMITTED"), 1084 ("LEVEL", "SERIALIZABLE"), 1085 ), 1086 "READ": ("WRITE", "ONLY"), 1087 } 1088 1089 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1090 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1091 ) 1092 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1093 1094 CREATE_SEQUENCE: OPTIONS_TYPE = { 1095 "SCALE": ("EXTEND", "NOEXTEND"), 1096 "SHARD": ("EXTEND", "NOEXTEND"), 1097 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1098 **dict.fromkeys( 1099 ( 1100 "SESSION", 1101 "GLOBAL", 1102 "KEEP", 1103 "NOKEEP", 1104 "ORDER", 1105 "NOORDER", 1106 "NOCACHE", 1107 "CYCLE", 1108 "NOCYCLE", 1109 "NOMINVALUE", 1110 "NOMAXVALUE", 1111 "NOSCALE", 1112 "NOSHARD", 1113 ), 1114 tuple(), 1115 ), 1116 } 1117 1118 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1119 1120 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1121 1122 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1123 1124 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1125 1126 CLONE_KEYWORDS = {"CLONE", "COPY"} 1127 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1128 1129 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1130 1131 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1132 1133 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1134 1135 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1136 1137 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1138 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1139 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1140 1141 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1142 1143 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1144 1145 ADD_CONSTRAINT_TOKENS = { 1146 TokenType.CONSTRAINT, 1147 TokenType.FOREIGN_KEY, 1148 TokenType.INDEX, 1149 TokenType.KEY, 1150 TokenType.PRIMARY_KEY, 1151 TokenType.UNIQUE, 1152 } 1153 1154 DISTINCT_TOKENS = {TokenType.DISTINCT} 1155 1156 NULL_TOKENS = {TokenType.NULL} 1157 1158 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1159 1160 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1161 1162 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1163 1164 STRICT_CAST = True 1165 1166 PREFIXED_PIVOT_COLUMNS = False 1167 IDENTIFY_PIVOT_STRINGS = False 1168 1169 LOG_DEFAULTS_TO_LN = False 1170 1171 # Whether ADD is present for each column added by ALTER TABLE 1172 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1173 1174 # Whether the table sample clause expects CSV syntax 1175 TABLESAMPLE_CSV = False 1176 1177 # The default method used for table sampling 1178 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1179 1180 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1181 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1182 1183 # Whether the TRIM function expects the characters to trim as its first argument 1184 TRIM_PATTERN_FIRST = False 1185 1186 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1187 STRING_ALIASES = False 1188 1189 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1190 MODIFIERS_ATTACHED_TO_SET_OP = True 1191 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1192 1193 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1194 NO_PAREN_IF_COMMANDS = True 1195 1196 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1197 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1198 1199 # Whether the `:` operator is used to extract a value from a JSON document 1200 COLON_IS_JSON_EXTRACT = False 1201 1202 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1203 # If this is True and '(' is not found, the keyword will be treated as an identifier 1204 VALUES_FOLLOWED_BY_PAREN = True 1205 1206 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1207 SUPPORTS_IMPLICIT_UNNEST = False 1208 1209 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1210 INTERVAL_SPANS = True 1211 1212 # Whether a PARTITION clause can follow a table reference 1213 SUPPORTS_PARTITION_SELECTION = False 1214 1215 __slots__ = ( 1216 "error_level", 1217 "error_message_context", 1218 "max_errors", 1219 "dialect", 1220 "sql", 1221 "errors", 1222 "_tokens", 1223 "_index", 1224 "_curr", 1225 "_next", 1226 "_prev", 1227 "_prev_comments", 1228 ) 1229 1230 # Autofilled 1231 SHOW_TRIE: t.Dict = {} 1232 SET_TRIE: t.Dict = {} 1233 1234 def __init__( 1235 self, 1236 error_level: t.Optional[ErrorLevel] = None, 1237 error_message_context: int = 100, 1238 max_errors: int = 3, 1239 dialect: DialectType = None, 1240 ): 1241 from sqlglot.dialects import Dialect 1242 1243 self.error_level = error_level or ErrorLevel.IMMEDIATE 1244 self.error_message_context = error_message_context 1245 self.max_errors = max_errors 1246 self.dialect = Dialect.get_or_raise(dialect) 1247 self.reset() 1248 1249 def reset(self): 1250 self.sql = "" 1251 self.errors = [] 1252 self._tokens = [] 1253 self._index = 0 1254 self._curr = None 1255 self._next = None 1256 self._prev = None 1257 self._prev_comments = None 1258 1259 def parse( 1260 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1261 ) -> t.List[t.Optional[exp.Expression]]: 1262 """ 1263 Parses a list of tokens and returns a list of syntax trees, one tree 1264 per parsed SQL statement. 1265 1266 Args: 1267 raw_tokens: The list of tokens. 1268 sql: The original SQL string, used to produce helpful debug messages. 1269 1270 Returns: 1271 The list of the produced syntax trees. 1272 """ 1273 return self._parse( 1274 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1275 ) 1276 1277 def parse_into( 1278 self, 1279 expression_types: exp.IntoType, 1280 raw_tokens: t.List[Token], 1281 sql: t.Optional[str] = None, 1282 ) -> t.List[t.Optional[exp.Expression]]: 1283 """ 1284 Parses a list of tokens into a given Expression type. If a collection of Expression 1285 types is given instead, this method will try to parse the token list into each one 1286 of them, stopping at the first for which the parsing succeeds. 1287 1288 Args: 1289 expression_types: The expression type(s) to try and parse the token list into. 1290 raw_tokens: The list of tokens. 1291 sql: The original SQL string, used to produce helpful debug messages. 1292 1293 Returns: 1294 The target Expression. 1295 """ 1296 errors = [] 1297 for expression_type in ensure_list(expression_types): 1298 parser = self.EXPRESSION_PARSERS.get(expression_type) 1299 if not parser: 1300 raise TypeError(f"No parser registered for {expression_type}") 1301 1302 try: 1303 return self._parse(parser, raw_tokens, sql) 1304 except ParseError as e: 1305 e.errors[0]["into_expression"] = expression_type 1306 errors.append(e) 1307 1308 raise ParseError( 1309 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1310 errors=merge_errors(errors), 1311 ) from errors[-1] 1312 1313 def _parse( 1314 self, 1315 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1316 raw_tokens: t.List[Token], 1317 sql: t.Optional[str] = None, 1318 ) -> t.List[t.Optional[exp.Expression]]: 1319 self.reset() 1320 self.sql = sql or "" 1321 1322 total = len(raw_tokens) 1323 chunks: t.List[t.List[Token]] = [[]] 1324 1325 for i, token in enumerate(raw_tokens): 1326 if token.token_type == TokenType.SEMICOLON: 1327 if token.comments: 1328 chunks.append([token]) 1329 1330 if i < total - 1: 1331 chunks.append([]) 1332 else: 1333 chunks[-1].append(token) 1334 1335 expressions = [] 1336 1337 for tokens in chunks: 1338 self._index = -1 1339 self._tokens = tokens 1340 self._advance() 1341 1342 expressions.append(parse_method(self)) 1343 1344 if self._index < len(self._tokens): 1345 self.raise_error("Invalid expression / Unexpected token") 1346 1347 self.check_errors() 1348 1349 return expressions 1350 1351 def check_errors(self) -> None: 1352 """Logs or raises any found errors, depending on the chosen error level setting.""" 1353 if self.error_level == ErrorLevel.WARN: 1354 for error in self.errors: 1355 logger.error(str(error)) 1356 elif self.error_level == ErrorLevel.RAISE and self.errors: 1357 raise ParseError( 1358 concat_messages(self.errors, self.max_errors), 1359 errors=merge_errors(self.errors), 1360 ) 1361 1362 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1363 """ 1364 Appends an error in the list of recorded errors or raises it, depending on the chosen 1365 error level setting. 1366 """ 1367 token = token or self._curr or self._prev or Token.string("") 1368 start = token.start 1369 end = token.end + 1 1370 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1371 highlight = self.sql[start:end] 1372 end_context = self.sql[end : end + self.error_message_context] 1373 1374 error = ParseError.new( 1375 f"{message}. Line {token.line}, Col: {token.col}.\n" 1376 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1377 description=message, 1378 line=token.line, 1379 col=token.col, 1380 start_context=start_context, 1381 highlight=highlight, 1382 end_context=end_context, 1383 ) 1384 1385 if self.error_level == ErrorLevel.IMMEDIATE: 1386 raise error 1387 1388 self.errors.append(error) 1389 1390 def expression( 1391 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1392 ) -> E: 1393 """ 1394 Creates a new, validated Expression. 1395 1396 Args: 1397 exp_class: The expression class to instantiate. 1398 comments: An optional list of comments to attach to the expression. 1399 kwargs: The arguments to set for the expression along with their respective values. 1400 1401 Returns: 1402 The target expression. 1403 """ 1404 instance = exp_class(**kwargs) 1405 instance.add_comments(comments) if comments else self._add_comments(instance) 1406 return self.validate_expression(instance) 1407 1408 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1409 if expression and self._prev_comments: 1410 expression.add_comments(self._prev_comments) 1411 self._prev_comments = None 1412 1413 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1414 """ 1415 Validates an Expression, making sure that all its mandatory arguments are set. 1416 1417 Args: 1418 expression: The expression to validate. 1419 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1420 1421 Returns: 1422 The validated expression. 1423 """ 1424 if self.error_level != ErrorLevel.IGNORE: 1425 for error_message in expression.error_messages(args): 1426 self.raise_error(error_message) 1427 1428 return expression 1429 1430 def _find_sql(self, start: Token, end: Token) -> str: 1431 return self.sql[start.start : end.end + 1] 1432 1433 def _is_connected(self) -> bool: 1434 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1435 1436 def _advance(self, times: int = 1) -> None: 1437 self._index += times 1438 self._curr = seq_get(self._tokens, self._index) 1439 self._next = seq_get(self._tokens, self._index + 1) 1440 1441 if self._index > 0: 1442 self._prev = self._tokens[self._index - 1] 1443 self._prev_comments = self._prev.comments 1444 else: 1445 self._prev = None 1446 self._prev_comments = None 1447 1448 def _retreat(self, index: int) -> None: 1449 if index != self._index: 1450 self._advance(index - self._index) 1451 1452 def _warn_unsupported(self) -> None: 1453 if len(self._tokens) <= 1: 1454 return 1455 1456 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1457 # interested in emitting a warning for the one being currently processed. 1458 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1459 1460 logger.warning( 1461 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1462 ) 1463 1464 def _parse_command(self) -> exp.Command: 1465 self._warn_unsupported() 1466 return self.expression( 1467 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1468 ) 1469 1470 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1471 """ 1472 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1473 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1474 the parser state accordingly 1475 """ 1476 index = self._index 1477 error_level = self.error_level 1478 1479 self.error_level = ErrorLevel.IMMEDIATE 1480 try: 1481 this = parse_method() 1482 except ParseError: 1483 this = None 1484 finally: 1485 if not this or retreat: 1486 self._retreat(index) 1487 self.error_level = error_level 1488 1489 return this 1490 1491 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1492 start = self._prev 1493 exists = self._parse_exists() if allow_exists else None 1494 1495 self._match(TokenType.ON) 1496 1497 materialized = self._match_text_seq("MATERIALIZED") 1498 kind = self._match_set(self.CREATABLES) and self._prev 1499 if not kind: 1500 return self._parse_as_command(start) 1501 1502 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1503 this = self._parse_user_defined_function(kind=kind.token_type) 1504 elif kind.token_type == TokenType.TABLE: 1505 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1506 elif kind.token_type == TokenType.COLUMN: 1507 this = self._parse_column() 1508 else: 1509 this = self._parse_id_var() 1510 1511 self._match(TokenType.IS) 1512 1513 return self.expression( 1514 exp.Comment, 1515 this=this, 1516 kind=kind.text, 1517 expression=self._parse_string(), 1518 exists=exists, 1519 materialized=materialized, 1520 ) 1521 1522 def _parse_to_table( 1523 self, 1524 ) -> exp.ToTableProperty: 1525 table = self._parse_table_parts(schema=True) 1526 return self.expression(exp.ToTableProperty, this=table) 1527 1528 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1529 def _parse_ttl(self) -> exp.Expression: 1530 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1531 this = self._parse_bitwise() 1532 1533 if self._match_text_seq("DELETE"): 1534 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1535 if self._match_text_seq("RECOMPRESS"): 1536 return self.expression( 1537 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1538 ) 1539 if self._match_text_seq("TO", "DISK"): 1540 return self.expression( 1541 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1542 ) 1543 if self._match_text_seq("TO", "VOLUME"): 1544 return self.expression( 1545 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1546 ) 1547 1548 return this 1549 1550 expressions = self._parse_csv(_parse_ttl_action) 1551 where = self._parse_where() 1552 group = self._parse_group() 1553 1554 aggregates = None 1555 if group and self._match(TokenType.SET): 1556 aggregates = self._parse_csv(self._parse_set_item) 1557 1558 return self.expression( 1559 exp.MergeTreeTTL, 1560 expressions=expressions, 1561 where=where, 1562 group=group, 1563 aggregates=aggregates, 1564 ) 1565 1566 def _parse_statement(self) -> t.Optional[exp.Expression]: 1567 if self._curr is None: 1568 return None 1569 1570 if self._match_set(self.STATEMENT_PARSERS): 1571 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1572 1573 if self._match_set(self.dialect.tokenizer.COMMANDS): 1574 return self._parse_command() 1575 1576 expression = self._parse_expression() 1577 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1578 return self._parse_query_modifiers(expression) 1579 1580 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1581 start = self._prev 1582 temporary = self._match(TokenType.TEMPORARY) 1583 materialized = self._match_text_seq("MATERIALIZED") 1584 1585 kind = self._match_set(self.CREATABLES) and self._prev.text 1586 if not kind: 1587 return self._parse_as_command(start) 1588 1589 if_exists = exists or self._parse_exists() 1590 table = self._parse_table_parts( 1591 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1592 ) 1593 1594 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1595 1596 if self._match(TokenType.L_PAREN, advance=False): 1597 expressions = self._parse_wrapped_csv(self._parse_types) 1598 else: 1599 expressions = None 1600 1601 return self.expression( 1602 exp.Drop, 1603 comments=start.comments, 1604 exists=if_exists, 1605 this=table, 1606 expressions=expressions, 1607 kind=kind.upper(), 1608 temporary=temporary, 1609 materialized=materialized, 1610 cascade=self._match_text_seq("CASCADE"), 1611 constraints=self._match_text_seq("CONSTRAINTS"), 1612 purge=self._match_text_seq("PURGE"), 1613 cluster=cluster, 1614 ) 1615 1616 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1617 return ( 1618 self._match_text_seq("IF") 1619 and (not not_ or self._match(TokenType.NOT)) 1620 and self._match(TokenType.EXISTS) 1621 ) 1622 1623 def _parse_create(self) -> exp.Create | exp.Command: 1624 # Note: this can't be None because we've matched a statement parser 1625 start = self._prev 1626 comments = self._prev_comments 1627 1628 replace = ( 1629 start.token_type == TokenType.REPLACE 1630 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1631 or self._match_pair(TokenType.OR, TokenType.ALTER) 1632 ) 1633 1634 unique = self._match(TokenType.UNIQUE) 1635 1636 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1637 self._advance() 1638 1639 properties = None 1640 create_token = self._match_set(self.CREATABLES) and self._prev 1641 1642 if not create_token: 1643 # exp.Properties.Location.POST_CREATE 1644 properties = self._parse_properties() 1645 create_token = self._match_set(self.CREATABLES) and self._prev 1646 1647 if not properties or not create_token: 1648 return self._parse_as_command(start) 1649 1650 exists = self._parse_exists(not_=True) 1651 this = None 1652 expression: t.Optional[exp.Expression] = None 1653 indexes = None 1654 no_schema_binding = None 1655 begin = None 1656 end = None 1657 clone = None 1658 1659 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1660 nonlocal properties 1661 if properties and temp_props: 1662 properties.expressions.extend(temp_props.expressions) 1663 elif temp_props: 1664 properties = temp_props 1665 1666 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1667 this = self._parse_user_defined_function(kind=create_token.token_type) 1668 1669 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1670 extend_props(self._parse_properties()) 1671 1672 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1673 extend_props(self._parse_properties()) 1674 1675 if not expression: 1676 if self._match(TokenType.COMMAND): 1677 expression = self._parse_as_command(self._prev) 1678 else: 1679 begin = self._match(TokenType.BEGIN) 1680 return_ = self._match_text_seq("RETURN") 1681 1682 if self._match(TokenType.STRING, advance=False): 1683 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1684 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1685 expression = self._parse_string() 1686 extend_props(self._parse_properties()) 1687 else: 1688 expression = self._parse_statement() 1689 1690 end = self._match_text_seq("END") 1691 1692 if return_: 1693 expression = self.expression(exp.Return, this=expression) 1694 elif create_token.token_type == TokenType.INDEX: 1695 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1696 if not self._match(TokenType.ON): 1697 index = self._parse_id_var() 1698 anonymous = False 1699 else: 1700 index = None 1701 anonymous = True 1702 1703 this = self._parse_index(index=index, anonymous=anonymous) 1704 elif create_token.token_type in self.DB_CREATABLES: 1705 table_parts = self._parse_table_parts( 1706 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1707 ) 1708 1709 # exp.Properties.Location.POST_NAME 1710 self._match(TokenType.COMMA) 1711 extend_props(self._parse_properties(before=True)) 1712 1713 this = self._parse_schema(this=table_parts) 1714 1715 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1716 extend_props(self._parse_properties()) 1717 1718 self._match(TokenType.ALIAS) 1719 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1720 # exp.Properties.Location.POST_ALIAS 1721 extend_props(self._parse_properties()) 1722 1723 if create_token.token_type == TokenType.SEQUENCE: 1724 expression = self._parse_types() 1725 extend_props(self._parse_properties()) 1726 else: 1727 expression = self._parse_ddl_select() 1728 1729 if create_token.token_type == TokenType.TABLE: 1730 # exp.Properties.Location.POST_EXPRESSION 1731 extend_props(self._parse_properties()) 1732 1733 indexes = [] 1734 while True: 1735 index = self._parse_index() 1736 1737 # exp.Properties.Location.POST_INDEX 1738 extend_props(self._parse_properties()) 1739 1740 if not index: 1741 break 1742 else: 1743 self._match(TokenType.COMMA) 1744 indexes.append(index) 1745 elif create_token.token_type == TokenType.VIEW: 1746 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1747 no_schema_binding = True 1748 1749 shallow = self._match_text_seq("SHALLOW") 1750 1751 if self._match_texts(self.CLONE_KEYWORDS): 1752 copy = self._prev.text.lower() == "copy" 1753 clone = self.expression( 1754 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1755 ) 1756 1757 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1758 return self._parse_as_command(start) 1759 1760 return self.expression( 1761 exp.Create, 1762 comments=comments, 1763 this=this, 1764 kind=create_token.text.upper(), 1765 replace=replace, 1766 unique=unique, 1767 expression=expression, 1768 exists=exists, 1769 properties=properties, 1770 indexes=indexes, 1771 no_schema_binding=no_schema_binding, 1772 begin=begin, 1773 end=end, 1774 clone=clone, 1775 ) 1776 1777 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1778 seq = exp.SequenceProperties() 1779 1780 options = [] 1781 index = self._index 1782 1783 while self._curr: 1784 self._match(TokenType.COMMA) 1785 if self._match_text_seq("INCREMENT"): 1786 self._match_text_seq("BY") 1787 self._match_text_seq("=") 1788 seq.set("increment", self._parse_term()) 1789 elif self._match_text_seq("MINVALUE"): 1790 seq.set("minvalue", self._parse_term()) 1791 elif self._match_text_seq("MAXVALUE"): 1792 seq.set("maxvalue", self._parse_term()) 1793 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1794 self._match_text_seq("=") 1795 seq.set("start", self._parse_term()) 1796 elif self._match_text_seq("CACHE"): 1797 # T-SQL allows empty CACHE which is initialized dynamically 1798 seq.set("cache", self._parse_number() or True) 1799 elif self._match_text_seq("OWNED", "BY"): 1800 # "OWNED BY NONE" is the default 1801 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1802 else: 1803 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1804 if opt: 1805 options.append(opt) 1806 else: 1807 break 1808 1809 seq.set("options", options if options else None) 1810 return None if self._index == index else seq 1811 1812 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1813 # only used for teradata currently 1814 self._match(TokenType.COMMA) 1815 1816 kwargs = { 1817 "no": self._match_text_seq("NO"), 1818 "dual": self._match_text_seq("DUAL"), 1819 "before": self._match_text_seq("BEFORE"), 1820 "default": self._match_text_seq("DEFAULT"), 1821 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1822 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1823 "after": self._match_text_seq("AFTER"), 1824 "minimum": self._match_texts(("MIN", "MINIMUM")), 1825 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1826 } 1827 1828 if self._match_texts(self.PROPERTY_PARSERS): 1829 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1830 try: 1831 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1832 except TypeError: 1833 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1834 1835 return None 1836 1837 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1838 return self._parse_wrapped_csv(self._parse_property) 1839 1840 def _parse_property(self) -> t.Optional[exp.Expression]: 1841 if self._match_texts(self.PROPERTY_PARSERS): 1842 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1843 1844 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1845 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1846 1847 if self._match_text_seq("COMPOUND", "SORTKEY"): 1848 return self._parse_sortkey(compound=True) 1849 1850 if self._match_text_seq("SQL", "SECURITY"): 1851 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1852 1853 index = self._index 1854 key = self._parse_column() 1855 1856 if not self._match(TokenType.EQ): 1857 self._retreat(index) 1858 return self._parse_sequence_properties() 1859 1860 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1861 if isinstance(key, exp.Column): 1862 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1863 1864 value = self._parse_bitwise() or self._parse_var(any_token=True) 1865 1866 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1867 if isinstance(value, exp.Column): 1868 value = exp.var(value.name) 1869 1870 return self.expression(exp.Property, this=key, value=value) 1871 1872 def _parse_stored(self) -> exp.FileFormatProperty: 1873 self._match(TokenType.ALIAS) 1874 1875 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1876 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1877 1878 return self.expression( 1879 exp.FileFormatProperty, 1880 this=( 1881 self.expression( 1882 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1883 ) 1884 if input_format or output_format 1885 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1886 ), 1887 ) 1888 1889 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1890 field = self._parse_field() 1891 if isinstance(field, exp.Identifier) and not field.quoted: 1892 field = exp.var(field) 1893 1894 return field 1895 1896 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1897 self._match(TokenType.EQ) 1898 self._match(TokenType.ALIAS) 1899 1900 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1901 1902 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1903 properties = [] 1904 while True: 1905 if before: 1906 prop = self._parse_property_before() 1907 else: 1908 prop = self._parse_property() 1909 if not prop: 1910 break 1911 for p in ensure_list(prop): 1912 properties.append(p) 1913 1914 if properties: 1915 return self.expression(exp.Properties, expressions=properties) 1916 1917 return None 1918 1919 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1920 return self.expression( 1921 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1922 ) 1923 1924 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1925 if self._index >= 2: 1926 pre_volatile_token = self._tokens[self._index - 2] 1927 else: 1928 pre_volatile_token = None 1929 1930 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1931 return exp.VolatileProperty() 1932 1933 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1934 1935 def _parse_retention_period(self) -> exp.Var: 1936 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1937 number = self._parse_number() 1938 number_str = f"{number} " if number else "" 1939 unit = self._parse_var(any_token=True) 1940 return exp.var(f"{number_str}{unit}") 1941 1942 def _parse_system_versioning_property( 1943 self, with_: bool = False 1944 ) -> exp.WithSystemVersioningProperty: 1945 self._match(TokenType.EQ) 1946 prop = self.expression( 1947 exp.WithSystemVersioningProperty, 1948 **{ # type: ignore 1949 "on": True, 1950 "with": with_, 1951 }, 1952 ) 1953 1954 if self._match_text_seq("OFF"): 1955 prop.set("on", False) 1956 return prop 1957 1958 self._match(TokenType.ON) 1959 if self._match(TokenType.L_PAREN): 1960 while self._curr and not self._match(TokenType.R_PAREN): 1961 if self._match_text_seq("HISTORY_TABLE", "="): 1962 prop.set("this", self._parse_table_parts()) 1963 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1964 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1965 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1966 prop.set("retention_period", self._parse_retention_period()) 1967 1968 self._match(TokenType.COMMA) 1969 1970 return prop 1971 1972 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1973 self._match(TokenType.EQ) 1974 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1975 prop = self.expression(exp.DataDeletionProperty, on=on) 1976 1977 if self._match(TokenType.L_PAREN): 1978 while self._curr and not self._match(TokenType.R_PAREN): 1979 if self._match_text_seq("FILTER_COLUMN", "="): 1980 prop.set("filter_column", self._parse_column()) 1981 elif self._match_text_seq("RETENTION_PERIOD", "="): 1982 prop.set("retention_period", self._parse_retention_period()) 1983 1984 self._match(TokenType.COMMA) 1985 1986 return prop 1987 1988 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1989 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1990 prop = self._parse_system_versioning_property(with_=True) 1991 self._match_r_paren() 1992 return prop 1993 1994 if self._match(TokenType.L_PAREN, advance=False): 1995 return self._parse_wrapped_properties() 1996 1997 if self._match_text_seq("JOURNAL"): 1998 return self._parse_withjournaltable() 1999 2000 if self._match_texts(self.VIEW_ATTRIBUTES): 2001 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2002 2003 if self._match_text_seq("DATA"): 2004 return self._parse_withdata(no=False) 2005 elif self._match_text_seq("NO", "DATA"): 2006 return self._parse_withdata(no=True) 2007 2008 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2009 return self._parse_serde_properties(with_=True) 2010 2011 if not self._next: 2012 return None 2013 2014 return self._parse_withisolatedloading() 2015 2016 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2017 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2018 self._match(TokenType.EQ) 2019 2020 user = self._parse_id_var() 2021 self._match(TokenType.PARAMETER) 2022 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2023 2024 if not user or not host: 2025 return None 2026 2027 return exp.DefinerProperty(this=f"{user}@{host}") 2028 2029 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2030 self._match(TokenType.TABLE) 2031 self._match(TokenType.EQ) 2032 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2033 2034 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2035 return self.expression(exp.LogProperty, no=no) 2036 2037 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2038 return self.expression(exp.JournalProperty, **kwargs) 2039 2040 def _parse_checksum(self) -> exp.ChecksumProperty: 2041 self._match(TokenType.EQ) 2042 2043 on = None 2044 if self._match(TokenType.ON): 2045 on = True 2046 elif self._match_text_seq("OFF"): 2047 on = False 2048 2049 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2050 2051 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2052 return self.expression( 2053 exp.Cluster, 2054 expressions=( 2055 self._parse_wrapped_csv(self._parse_ordered) 2056 if wrapped 2057 else self._parse_csv(self._parse_ordered) 2058 ), 2059 ) 2060 2061 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2062 self._match_text_seq("BY") 2063 2064 self._match_l_paren() 2065 expressions = self._parse_csv(self._parse_column) 2066 self._match_r_paren() 2067 2068 if self._match_text_seq("SORTED", "BY"): 2069 self._match_l_paren() 2070 sorted_by = self._parse_csv(self._parse_ordered) 2071 self._match_r_paren() 2072 else: 2073 sorted_by = None 2074 2075 self._match(TokenType.INTO) 2076 buckets = self._parse_number() 2077 self._match_text_seq("BUCKETS") 2078 2079 return self.expression( 2080 exp.ClusteredByProperty, 2081 expressions=expressions, 2082 sorted_by=sorted_by, 2083 buckets=buckets, 2084 ) 2085 2086 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2087 if not self._match_text_seq("GRANTS"): 2088 self._retreat(self._index - 1) 2089 return None 2090 2091 return self.expression(exp.CopyGrantsProperty) 2092 2093 def _parse_freespace(self) -> exp.FreespaceProperty: 2094 self._match(TokenType.EQ) 2095 return self.expression( 2096 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2097 ) 2098 2099 def _parse_mergeblockratio( 2100 self, no: bool = False, default: bool = False 2101 ) -> exp.MergeBlockRatioProperty: 2102 if self._match(TokenType.EQ): 2103 return self.expression( 2104 exp.MergeBlockRatioProperty, 2105 this=self._parse_number(), 2106 percent=self._match(TokenType.PERCENT), 2107 ) 2108 2109 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2110 2111 def _parse_datablocksize( 2112 self, 2113 default: t.Optional[bool] = None, 2114 minimum: t.Optional[bool] = None, 2115 maximum: t.Optional[bool] = None, 2116 ) -> exp.DataBlocksizeProperty: 2117 self._match(TokenType.EQ) 2118 size = self._parse_number() 2119 2120 units = None 2121 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2122 units = self._prev.text 2123 2124 return self.expression( 2125 exp.DataBlocksizeProperty, 2126 size=size, 2127 units=units, 2128 default=default, 2129 minimum=minimum, 2130 maximum=maximum, 2131 ) 2132 2133 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2134 self._match(TokenType.EQ) 2135 always = self._match_text_seq("ALWAYS") 2136 manual = self._match_text_seq("MANUAL") 2137 never = self._match_text_seq("NEVER") 2138 default = self._match_text_seq("DEFAULT") 2139 2140 autotemp = None 2141 if self._match_text_seq("AUTOTEMP"): 2142 autotemp = self._parse_schema() 2143 2144 return self.expression( 2145 exp.BlockCompressionProperty, 2146 always=always, 2147 manual=manual, 2148 never=never, 2149 default=default, 2150 autotemp=autotemp, 2151 ) 2152 2153 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2154 index = self._index 2155 no = self._match_text_seq("NO") 2156 concurrent = self._match_text_seq("CONCURRENT") 2157 2158 if not self._match_text_seq("ISOLATED", "LOADING"): 2159 self._retreat(index) 2160 return None 2161 2162 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2163 return self.expression( 2164 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2165 ) 2166 2167 def _parse_locking(self) -> exp.LockingProperty: 2168 if self._match(TokenType.TABLE): 2169 kind = "TABLE" 2170 elif self._match(TokenType.VIEW): 2171 kind = "VIEW" 2172 elif self._match(TokenType.ROW): 2173 kind = "ROW" 2174 elif self._match_text_seq("DATABASE"): 2175 kind = "DATABASE" 2176 else: 2177 kind = None 2178 2179 if kind in ("DATABASE", "TABLE", "VIEW"): 2180 this = self._parse_table_parts() 2181 else: 2182 this = None 2183 2184 if self._match(TokenType.FOR): 2185 for_or_in = "FOR" 2186 elif self._match(TokenType.IN): 2187 for_or_in = "IN" 2188 else: 2189 for_or_in = None 2190 2191 if self._match_text_seq("ACCESS"): 2192 lock_type = "ACCESS" 2193 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2194 lock_type = "EXCLUSIVE" 2195 elif self._match_text_seq("SHARE"): 2196 lock_type = "SHARE" 2197 elif self._match_text_seq("READ"): 2198 lock_type = "READ" 2199 elif self._match_text_seq("WRITE"): 2200 lock_type = "WRITE" 2201 elif self._match_text_seq("CHECKSUM"): 2202 lock_type = "CHECKSUM" 2203 else: 2204 lock_type = None 2205 2206 override = self._match_text_seq("OVERRIDE") 2207 2208 return self.expression( 2209 exp.LockingProperty, 2210 this=this, 2211 kind=kind, 2212 for_or_in=for_or_in, 2213 lock_type=lock_type, 2214 override=override, 2215 ) 2216 2217 def _parse_partition_by(self) -> t.List[exp.Expression]: 2218 if self._match(TokenType.PARTITION_BY): 2219 return self._parse_csv(self._parse_assignment) 2220 return [] 2221 2222 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2223 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2224 if self._match_text_seq("MINVALUE"): 2225 return exp.var("MINVALUE") 2226 if self._match_text_seq("MAXVALUE"): 2227 return exp.var("MAXVALUE") 2228 return self._parse_bitwise() 2229 2230 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2231 expression = None 2232 from_expressions = None 2233 to_expressions = None 2234 2235 if self._match(TokenType.IN): 2236 this = self._parse_wrapped_csv(self._parse_bitwise) 2237 elif self._match(TokenType.FROM): 2238 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2239 self._match_text_seq("TO") 2240 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2241 elif self._match_text_seq("WITH", "(", "MODULUS"): 2242 this = self._parse_number() 2243 self._match_text_seq(",", "REMAINDER") 2244 expression = self._parse_number() 2245 self._match_r_paren() 2246 else: 2247 self.raise_error("Failed to parse partition bound spec.") 2248 2249 return self.expression( 2250 exp.PartitionBoundSpec, 2251 this=this, 2252 expression=expression, 2253 from_expressions=from_expressions, 2254 to_expressions=to_expressions, 2255 ) 2256 2257 # https://www.postgresql.org/docs/current/sql-createtable.html 2258 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2259 if not self._match_text_seq("OF"): 2260 self._retreat(self._index - 1) 2261 return None 2262 2263 this = self._parse_table(schema=True) 2264 2265 if self._match(TokenType.DEFAULT): 2266 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2267 elif self._match_text_seq("FOR", "VALUES"): 2268 expression = self._parse_partition_bound_spec() 2269 else: 2270 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2271 2272 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2273 2274 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2275 self._match(TokenType.EQ) 2276 return self.expression( 2277 exp.PartitionedByProperty, 2278 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2279 ) 2280 2281 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2282 if self._match_text_seq("AND", "STATISTICS"): 2283 statistics = True 2284 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2285 statistics = False 2286 else: 2287 statistics = None 2288 2289 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2290 2291 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2292 if self._match_text_seq("SQL"): 2293 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2294 return None 2295 2296 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2297 if self._match_text_seq("SQL", "DATA"): 2298 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2299 return None 2300 2301 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2302 if self._match_text_seq("PRIMARY", "INDEX"): 2303 return exp.NoPrimaryIndexProperty() 2304 if self._match_text_seq("SQL"): 2305 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2306 return None 2307 2308 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2309 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2310 return exp.OnCommitProperty() 2311 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2312 return exp.OnCommitProperty(delete=True) 2313 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2314 2315 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2316 if self._match_text_seq("SQL", "DATA"): 2317 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2318 return None 2319 2320 def _parse_distkey(self) -> exp.DistKeyProperty: 2321 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2322 2323 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2324 table = self._parse_table(schema=True) 2325 2326 options = [] 2327 while self._match_texts(("INCLUDING", "EXCLUDING")): 2328 this = self._prev.text.upper() 2329 2330 id_var = self._parse_id_var() 2331 if not id_var: 2332 return None 2333 2334 options.append( 2335 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2336 ) 2337 2338 return self.expression(exp.LikeProperty, this=table, expressions=options) 2339 2340 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2341 return self.expression( 2342 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2343 ) 2344 2345 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2346 self._match(TokenType.EQ) 2347 return self.expression( 2348 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2349 ) 2350 2351 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2352 self._match_text_seq("WITH", "CONNECTION") 2353 return self.expression( 2354 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2355 ) 2356 2357 def _parse_returns(self) -> exp.ReturnsProperty: 2358 value: t.Optional[exp.Expression] 2359 null = None 2360 is_table = self._match(TokenType.TABLE) 2361 2362 if is_table: 2363 if self._match(TokenType.LT): 2364 value = self.expression( 2365 exp.Schema, 2366 this="TABLE", 2367 expressions=self._parse_csv(self._parse_struct_types), 2368 ) 2369 if not self._match(TokenType.GT): 2370 self.raise_error("Expecting >") 2371 else: 2372 value = self._parse_schema(exp.var("TABLE")) 2373 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2374 null = True 2375 value = None 2376 else: 2377 value = self._parse_types() 2378 2379 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2380 2381 def _parse_describe(self) -> exp.Describe: 2382 kind = self._match_set(self.CREATABLES) and self._prev.text 2383 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2384 if self._match(TokenType.DOT): 2385 style = None 2386 self._retreat(self._index - 2) 2387 this = self._parse_table(schema=True) 2388 properties = self._parse_properties() 2389 expressions = properties.expressions if properties else None 2390 return self.expression( 2391 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2392 ) 2393 2394 def _parse_insert(self) -> exp.Insert: 2395 comments = ensure_list(self._prev_comments) 2396 hint = self._parse_hint() 2397 overwrite = self._match(TokenType.OVERWRITE) 2398 ignore = self._match(TokenType.IGNORE) 2399 local = self._match_text_seq("LOCAL") 2400 alternative = None 2401 is_function = None 2402 2403 if self._match_text_seq("DIRECTORY"): 2404 this: t.Optional[exp.Expression] = self.expression( 2405 exp.Directory, 2406 this=self._parse_var_or_string(), 2407 local=local, 2408 row_format=self._parse_row_format(match_row=True), 2409 ) 2410 else: 2411 if self._match(TokenType.OR): 2412 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2413 2414 self._match(TokenType.INTO) 2415 comments += ensure_list(self._prev_comments) 2416 self._match(TokenType.TABLE) 2417 is_function = self._match(TokenType.FUNCTION) 2418 2419 this = ( 2420 self._parse_table(schema=True, parse_partition=True) 2421 if not is_function 2422 else self._parse_function() 2423 ) 2424 2425 returning = self._parse_returning() 2426 2427 return self.expression( 2428 exp.Insert, 2429 comments=comments, 2430 hint=hint, 2431 is_function=is_function, 2432 this=this, 2433 stored=self._match_text_seq("STORED") and self._parse_stored(), 2434 by_name=self._match_text_seq("BY", "NAME"), 2435 exists=self._parse_exists(), 2436 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2437 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2438 conflict=self._parse_on_conflict(), 2439 returning=returning or self._parse_returning(), 2440 overwrite=overwrite, 2441 alternative=alternative, 2442 ignore=ignore, 2443 ) 2444 2445 def _parse_kill(self) -> exp.Kill: 2446 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2447 2448 return self.expression( 2449 exp.Kill, 2450 this=self._parse_primary(), 2451 kind=kind, 2452 ) 2453 2454 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2455 conflict = self._match_text_seq("ON", "CONFLICT") 2456 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2457 2458 if not conflict and not duplicate: 2459 return None 2460 2461 conflict_keys = None 2462 constraint = None 2463 2464 if conflict: 2465 if self._match_text_seq("ON", "CONSTRAINT"): 2466 constraint = self._parse_id_var() 2467 elif self._match(TokenType.L_PAREN): 2468 conflict_keys = self._parse_csv(self._parse_id_var) 2469 self._match_r_paren() 2470 2471 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2472 if self._prev.token_type == TokenType.UPDATE: 2473 self._match(TokenType.SET) 2474 expressions = self._parse_csv(self._parse_equality) 2475 else: 2476 expressions = None 2477 2478 return self.expression( 2479 exp.OnConflict, 2480 duplicate=duplicate, 2481 expressions=expressions, 2482 action=action, 2483 conflict_keys=conflict_keys, 2484 constraint=constraint, 2485 ) 2486 2487 def _parse_returning(self) -> t.Optional[exp.Returning]: 2488 if not self._match(TokenType.RETURNING): 2489 return None 2490 return self.expression( 2491 exp.Returning, 2492 expressions=self._parse_csv(self._parse_expression), 2493 into=self._match(TokenType.INTO) and self._parse_table_part(), 2494 ) 2495 2496 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2497 if not self._match(TokenType.FORMAT): 2498 return None 2499 return self._parse_row_format() 2500 2501 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2502 index = self._index 2503 with_ = with_ or self._match_text_seq("WITH") 2504 2505 if not self._match(TokenType.SERDE_PROPERTIES): 2506 self._retreat(index) 2507 return None 2508 return self.expression( 2509 exp.SerdeProperties, 2510 **{ # type: ignore 2511 "expressions": self._parse_wrapped_properties(), 2512 "with": with_, 2513 }, 2514 ) 2515 2516 def _parse_row_format( 2517 self, match_row: bool = False 2518 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2519 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2520 return None 2521 2522 if self._match_text_seq("SERDE"): 2523 this = self._parse_string() 2524 2525 serde_properties = self._parse_serde_properties() 2526 2527 return self.expression( 2528 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2529 ) 2530 2531 self._match_text_seq("DELIMITED") 2532 2533 kwargs = {} 2534 2535 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2536 kwargs["fields"] = self._parse_string() 2537 if self._match_text_seq("ESCAPED", "BY"): 2538 kwargs["escaped"] = self._parse_string() 2539 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2540 kwargs["collection_items"] = self._parse_string() 2541 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2542 kwargs["map_keys"] = self._parse_string() 2543 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2544 kwargs["lines"] = self._parse_string() 2545 if self._match_text_seq("NULL", "DEFINED", "AS"): 2546 kwargs["null"] = self._parse_string() 2547 2548 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2549 2550 def _parse_load(self) -> exp.LoadData | exp.Command: 2551 if self._match_text_seq("DATA"): 2552 local = self._match_text_seq("LOCAL") 2553 self._match_text_seq("INPATH") 2554 inpath = self._parse_string() 2555 overwrite = self._match(TokenType.OVERWRITE) 2556 self._match_pair(TokenType.INTO, TokenType.TABLE) 2557 2558 return self.expression( 2559 exp.LoadData, 2560 this=self._parse_table(schema=True), 2561 local=local, 2562 overwrite=overwrite, 2563 inpath=inpath, 2564 partition=self._parse_partition(), 2565 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2566 serde=self._match_text_seq("SERDE") and self._parse_string(), 2567 ) 2568 return self._parse_as_command(self._prev) 2569 2570 def _parse_delete(self) -> exp.Delete: 2571 # This handles MySQL's "Multiple-Table Syntax" 2572 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2573 tables = None 2574 comments = self._prev_comments 2575 if not self._match(TokenType.FROM, advance=False): 2576 tables = self._parse_csv(self._parse_table) or None 2577 2578 returning = self._parse_returning() 2579 2580 return self.expression( 2581 exp.Delete, 2582 comments=comments, 2583 tables=tables, 2584 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2585 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2586 where=self._parse_where(), 2587 returning=returning or self._parse_returning(), 2588 limit=self._parse_limit(), 2589 ) 2590 2591 def _parse_update(self) -> exp.Update: 2592 comments = self._prev_comments 2593 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2594 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2595 returning = self._parse_returning() 2596 return self.expression( 2597 exp.Update, 2598 comments=comments, 2599 **{ # type: ignore 2600 "this": this, 2601 "expressions": expressions, 2602 "from": self._parse_from(joins=True), 2603 "where": self._parse_where(), 2604 "returning": returning or self._parse_returning(), 2605 "order": self._parse_order(), 2606 "limit": self._parse_limit(), 2607 }, 2608 ) 2609 2610 def _parse_uncache(self) -> exp.Uncache: 2611 if not self._match(TokenType.TABLE): 2612 self.raise_error("Expecting TABLE after UNCACHE") 2613 2614 return self.expression( 2615 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2616 ) 2617 2618 def _parse_cache(self) -> exp.Cache: 2619 lazy = self._match_text_seq("LAZY") 2620 self._match(TokenType.TABLE) 2621 table = self._parse_table(schema=True) 2622 2623 options = [] 2624 if self._match_text_seq("OPTIONS"): 2625 self._match_l_paren() 2626 k = self._parse_string() 2627 self._match(TokenType.EQ) 2628 v = self._parse_string() 2629 options = [k, v] 2630 self._match_r_paren() 2631 2632 self._match(TokenType.ALIAS) 2633 return self.expression( 2634 exp.Cache, 2635 this=table, 2636 lazy=lazy, 2637 options=options, 2638 expression=self._parse_select(nested=True), 2639 ) 2640 2641 def _parse_partition(self) -> t.Optional[exp.Partition]: 2642 if not self._match(TokenType.PARTITION): 2643 return None 2644 2645 return self.expression( 2646 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2647 ) 2648 2649 def _parse_value(self) -> t.Optional[exp.Tuple]: 2650 if self._match(TokenType.L_PAREN): 2651 expressions = self._parse_csv(self._parse_expression) 2652 self._match_r_paren() 2653 return self.expression(exp.Tuple, expressions=expressions) 2654 2655 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2656 expression = self._parse_expression() 2657 if expression: 2658 return self.expression(exp.Tuple, expressions=[expression]) 2659 return None 2660 2661 def _parse_projections(self) -> t.List[exp.Expression]: 2662 return self._parse_expressions() 2663 2664 def _parse_select( 2665 self, 2666 nested: bool = False, 2667 table: bool = False, 2668 parse_subquery_alias: bool = True, 2669 parse_set_operation: bool = True, 2670 ) -> t.Optional[exp.Expression]: 2671 cte = self._parse_with() 2672 2673 if cte: 2674 this = self._parse_statement() 2675 2676 if not this: 2677 self.raise_error("Failed to parse any statement following CTE") 2678 return cte 2679 2680 if "with" in this.arg_types: 2681 this.set("with", cte) 2682 else: 2683 self.raise_error(f"{this.key} does not support CTE") 2684 this = cte 2685 2686 return this 2687 2688 # duckdb supports leading with FROM x 2689 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2690 2691 if self._match(TokenType.SELECT): 2692 comments = self._prev_comments 2693 2694 hint = self._parse_hint() 2695 all_ = self._match(TokenType.ALL) 2696 distinct = self._match_set(self.DISTINCT_TOKENS) 2697 2698 kind = ( 2699 self._match(TokenType.ALIAS) 2700 and self._match_texts(("STRUCT", "VALUE")) 2701 and self._prev.text.upper() 2702 ) 2703 2704 if distinct: 2705 distinct = self.expression( 2706 exp.Distinct, 2707 on=self._parse_value() if self._match(TokenType.ON) else None, 2708 ) 2709 2710 if all_ and distinct: 2711 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2712 2713 limit = self._parse_limit(top=True) 2714 projections = self._parse_projections() 2715 2716 this = self.expression( 2717 exp.Select, 2718 kind=kind, 2719 hint=hint, 2720 distinct=distinct, 2721 expressions=projections, 2722 limit=limit, 2723 ) 2724 this.comments = comments 2725 2726 into = self._parse_into() 2727 if into: 2728 this.set("into", into) 2729 2730 if not from_: 2731 from_ = self._parse_from() 2732 2733 if from_: 2734 this.set("from", from_) 2735 2736 this = self._parse_query_modifiers(this) 2737 elif (table or nested) and self._match(TokenType.L_PAREN): 2738 if self._match(TokenType.PIVOT): 2739 this = self._parse_simplified_pivot() 2740 elif self._match(TokenType.FROM): 2741 this = exp.select("*").from_( 2742 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2743 ) 2744 else: 2745 this = ( 2746 self._parse_table() 2747 if table 2748 else self._parse_select(nested=True, parse_set_operation=False) 2749 ) 2750 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2751 2752 self._match_r_paren() 2753 2754 # We return early here so that the UNION isn't attached to the subquery by the 2755 # following call to _parse_set_operations, but instead becomes the parent node 2756 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2757 elif self._match(TokenType.VALUES, advance=False): 2758 this = self._parse_derived_table_values() 2759 elif from_: 2760 this = exp.select("*").from_(from_.this, copy=False) 2761 else: 2762 this = None 2763 2764 if parse_set_operation: 2765 return self._parse_set_operations(this) 2766 return this 2767 2768 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2769 if not skip_with_token and not self._match(TokenType.WITH): 2770 return None 2771 2772 comments = self._prev_comments 2773 recursive = self._match(TokenType.RECURSIVE) 2774 2775 expressions = [] 2776 while True: 2777 expressions.append(self._parse_cte()) 2778 2779 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2780 break 2781 else: 2782 self._match(TokenType.WITH) 2783 2784 return self.expression( 2785 exp.With, comments=comments, expressions=expressions, recursive=recursive 2786 ) 2787 2788 def _parse_cte(self) -> exp.CTE: 2789 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2790 if not alias or not alias.this: 2791 self.raise_error("Expected CTE to have alias") 2792 2793 self._match(TokenType.ALIAS) 2794 2795 if self._match_text_seq("NOT", "MATERIALIZED"): 2796 materialized = False 2797 elif self._match_text_seq("MATERIALIZED"): 2798 materialized = True 2799 else: 2800 materialized = None 2801 2802 return self.expression( 2803 exp.CTE, 2804 this=self._parse_wrapped(self._parse_statement), 2805 alias=alias, 2806 materialized=materialized, 2807 ) 2808 2809 def _parse_table_alias( 2810 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2811 ) -> t.Optional[exp.TableAlias]: 2812 any_token = self._match(TokenType.ALIAS) 2813 alias = ( 2814 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2815 or self._parse_string_as_identifier() 2816 ) 2817 2818 index = self._index 2819 if self._match(TokenType.L_PAREN): 2820 columns = self._parse_csv(self._parse_function_parameter) 2821 self._match_r_paren() if columns else self._retreat(index) 2822 else: 2823 columns = None 2824 2825 if not alias and not columns: 2826 return None 2827 2828 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2829 2830 # We bubble up comments from the Identifier to the TableAlias 2831 if isinstance(alias, exp.Identifier): 2832 table_alias.add_comments(alias.pop_comments()) 2833 2834 return table_alias 2835 2836 def _parse_subquery( 2837 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2838 ) -> t.Optional[exp.Subquery]: 2839 if not this: 2840 return None 2841 2842 return self.expression( 2843 exp.Subquery, 2844 this=this, 2845 pivots=self._parse_pivots(), 2846 alias=self._parse_table_alias() if parse_alias else None, 2847 ) 2848 2849 def _implicit_unnests_to_explicit(self, this: E) -> E: 2850 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2851 2852 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2853 for i, join in enumerate(this.args.get("joins") or []): 2854 table = join.this 2855 normalized_table = table.copy() 2856 normalized_table.meta["maybe_column"] = True 2857 normalized_table = _norm(normalized_table, dialect=self.dialect) 2858 2859 if isinstance(table, exp.Table) and not join.args.get("on"): 2860 if normalized_table.parts[0].name in refs: 2861 table_as_column = table.to_column() 2862 unnest = exp.Unnest(expressions=[table_as_column]) 2863 2864 # Table.to_column creates a parent Alias node that we want to convert to 2865 # a TableAlias and attach to the Unnest, so it matches the parser's output 2866 if isinstance(table.args.get("alias"), exp.TableAlias): 2867 table_as_column.replace(table_as_column.this) 2868 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2869 2870 table.replace(unnest) 2871 2872 refs.add(normalized_table.alias_or_name) 2873 2874 return this 2875 2876 def _parse_query_modifiers( 2877 self, this: t.Optional[exp.Expression] 2878 ) -> t.Optional[exp.Expression]: 2879 if isinstance(this, (exp.Query, exp.Table)): 2880 for join in self._parse_joins(): 2881 this.append("joins", join) 2882 for lateral in iter(self._parse_lateral, None): 2883 this.append("laterals", lateral) 2884 2885 while True: 2886 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2887 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2888 key, expression = parser(self) 2889 2890 if expression: 2891 this.set(key, expression) 2892 if key == "limit": 2893 offset = expression.args.pop("offset", None) 2894 2895 if offset: 2896 offset = exp.Offset(expression=offset) 2897 this.set("offset", offset) 2898 2899 limit_by_expressions = expression.expressions 2900 expression.set("expressions", None) 2901 offset.set("expressions", limit_by_expressions) 2902 continue 2903 break 2904 2905 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 2906 this = self._implicit_unnests_to_explicit(this) 2907 2908 return this 2909 2910 def _parse_hint(self) -> t.Optional[exp.Hint]: 2911 if self._match(TokenType.HINT): 2912 hints = [] 2913 for hint in iter( 2914 lambda: self._parse_csv( 2915 lambda: self._parse_function() or self._parse_var(upper=True) 2916 ), 2917 [], 2918 ): 2919 hints.extend(hint) 2920 2921 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2922 self.raise_error("Expected */ after HINT") 2923 2924 return self.expression(exp.Hint, expressions=hints) 2925 2926 return None 2927 2928 def _parse_into(self) -> t.Optional[exp.Into]: 2929 if not self._match(TokenType.INTO): 2930 return None 2931 2932 temp = self._match(TokenType.TEMPORARY) 2933 unlogged = self._match_text_seq("UNLOGGED") 2934 self._match(TokenType.TABLE) 2935 2936 return self.expression( 2937 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2938 ) 2939 2940 def _parse_from( 2941 self, joins: bool = False, skip_from_token: bool = False 2942 ) -> t.Optional[exp.From]: 2943 if not skip_from_token and not self._match(TokenType.FROM): 2944 return None 2945 2946 return self.expression( 2947 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2948 ) 2949 2950 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2951 return self.expression( 2952 exp.MatchRecognizeMeasure, 2953 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2954 this=self._parse_expression(), 2955 ) 2956 2957 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2958 if not self._match(TokenType.MATCH_RECOGNIZE): 2959 return None 2960 2961 self._match_l_paren() 2962 2963 partition = self._parse_partition_by() 2964 order = self._parse_order() 2965 2966 measures = ( 2967 self._parse_csv(self._parse_match_recognize_measure) 2968 if self._match_text_seq("MEASURES") 2969 else None 2970 ) 2971 2972 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2973 rows = exp.var("ONE ROW PER MATCH") 2974 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2975 text = "ALL ROWS PER MATCH" 2976 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2977 text += " SHOW EMPTY MATCHES" 2978 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2979 text += " OMIT EMPTY MATCHES" 2980 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2981 text += " WITH UNMATCHED ROWS" 2982 rows = exp.var(text) 2983 else: 2984 rows = None 2985 2986 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2987 text = "AFTER MATCH SKIP" 2988 if self._match_text_seq("PAST", "LAST", "ROW"): 2989 text += " PAST LAST ROW" 2990 elif self._match_text_seq("TO", "NEXT", "ROW"): 2991 text += " TO NEXT ROW" 2992 elif self._match_text_seq("TO", "FIRST"): 2993 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2994 elif self._match_text_seq("TO", "LAST"): 2995 text += f" TO LAST {self._advance_any().text}" # type: ignore 2996 after = exp.var(text) 2997 else: 2998 after = None 2999 3000 if self._match_text_seq("PATTERN"): 3001 self._match_l_paren() 3002 3003 if not self._curr: 3004 self.raise_error("Expecting )", self._curr) 3005 3006 paren = 1 3007 start = self._curr 3008 3009 while self._curr and paren > 0: 3010 if self._curr.token_type == TokenType.L_PAREN: 3011 paren += 1 3012 if self._curr.token_type == TokenType.R_PAREN: 3013 paren -= 1 3014 3015 end = self._prev 3016 self._advance() 3017 3018 if paren > 0: 3019 self.raise_error("Expecting )", self._curr) 3020 3021 pattern = exp.var(self._find_sql(start, end)) 3022 else: 3023 pattern = None 3024 3025 define = ( 3026 self._parse_csv(self._parse_name_as_expression) 3027 if self._match_text_seq("DEFINE") 3028 else None 3029 ) 3030 3031 self._match_r_paren() 3032 3033 return self.expression( 3034 exp.MatchRecognize, 3035 partition_by=partition, 3036 order=order, 3037 measures=measures, 3038 rows=rows, 3039 after=after, 3040 pattern=pattern, 3041 define=define, 3042 alias=self._parse_table_alias(), 3043 ) 3044 3045 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3046 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3047 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3048 cross_apply = False 3049 3050 if cross_apply is not None: 3051 this = self._parse_select(table=True) 3052 view = None 3053 outer = None 3054 elif self._match(TokenType.LATERAL): 3055 this = self._parse_select(table=True) 3056 view = self._match(TokenType.VIEW) 3057 outer = self._match(TokenType.OUTER) 3058 else: 3059 return None 3060 3061 if not this: 3062 this = ( 3063 self._parse_unnest() 3064 or self._parse_function() 3065 or self._parse_id_var(any_token=False) 3066 ) 3067 3068 while self._match(TokenType.DOT): 3069 this = exp.Dot( 3070 this=this, 3071 expression=self._parse_function() or self._parse_id_var(any_token=False), 3072 ) 3073 3074 if view: 3075 table = self._parse_id_var(any_token=False) 3076 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3077 table_alias: t.Optional[exp.TableAlias] = self.expression( 3078 exp.TableAlias, this=table, columns=columns 3079 ) 3080 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3081 # We move the alias from the lateral's child node to the lateral itself 3082 table_alias = this.args["alias"].pop() 3083 else: 3084 table_alias = self._parse_table_alias() 3085 3086 return self.expression( 3087 exp.Lateral, 3088 this=this, 3089 view=view, 3090 outer=outer, 3091 alias=table_alias, 3092 cross_apply=cross_apply, 3093 ) 3094 3095 def _parse_join_parts( 3096 self, 3097 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3098 return ( 3099 self._match_set(self.JOIN_METHODS) and self._prev, 3100 self._match_set(self.JOIN_SIDES) and self._prev, 3101 self._match_set(self.JOIN_KINDS) and self._prev, 3102 ) 3103 3104 def _parse_join( 3105 self, skip_join_token: bool = False, parse_bracket: bool = False 3106 ) -> t.Optional[exp.Join]: 3107 if self._match(TokenType.COMMA): 3108 return self.expression(exp.Join, this=self._parse_table()) 3109 3110 index = self._index 3111 method, side, kind = self._parse_join_parts() 3112 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3113 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3114 3115 if not skip_join_token and not join: 3116 self._retreat(index) 3117 kind = None 3118 method = None 3119 side = None 3120 3121 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3122 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3123 3124 if not skip_join_token and not join and not outer_apply and not cross_apply: 3125 return None 3126 3127 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3128 3129 if method: 3130 kwargs["method"] = method.text 3131 if side: 3132 kwargs["side"] = side.text 3133 if kind: 3134 kwargs["kind"] = kind.text 3135 if hint: 3136 kwargs["hint"] = hint 3137 3138 if self._match(TokenType.MATCH_CONDITION): 3139 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3140 3141 if self._match(TokenType.ON): 3142 kwargs["on"] = self._parse_assignment() 3143 elif self._match(TokenType.USING): 3144 kwargs["using"] = self._parse_wrapped_id_vars() 3145 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3146 kind and kind.token_type == TokenType.CROSS 3147 ): 3148 index = self._index 3149 joins: t.Optional[list] = list(self._parse_joins()) 3150 3151 if joins and self._match(TokenType.ON): 3152 kwargs["on"] = self._parse_assignment() 3153 elif joins and self._match(TokenType.USING): 3154 kwargs["using"] = self._parse_wrapped_id_vars() 3155 else: 3156 joins = None 3157 self._retreat(index) 3158 3159 kwargs["this"].set("joins", joins if joins else None) 3160 3161 comments = [c for token in (method, side, kind) if token for c in token.comments] 3162 return self.expression(exp.Join, comments=comments, **kwargs) 3163 3164 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3165 this = self._parse_assignment() 3166 3167 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3168 return this 3169 3170 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3171 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3172 3173 return this 3174 3175 def _parse_index_params(self) -> exp.IndexParameters: 3176 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3177 3178 if self._match(TokenType.L_PAREN, advance=False): 3179 columns = self._parse_wrapped_csv(self._parse_with_operator) 3180 else: 3181 columns = None 3182 3183 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3184 partition_by = self._parse_partition_by() 3185 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3186 tablespace = ( 3187 self._parse_var(any_token=True) 3188 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3189 else None 3190 ) 3191 where = self._parse_where() 3192 3193 on = self._parse_field() if self._match(TokenType.ON) else None 3194 3195 return self.expression( 3196 exp.IndexParameters, 3197 using=using, 3198 columns=columns, 3199 include=include, 3200 partition_by=partition_by, 3201 where=where, 3202 with_storage=with_storage, 3203 tablespace=tablespace, 3204 on=on, 3205 ) 3206 3207 def _parse_index( 3208 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3209 ) -> t.Optional[exp.Index]: 3210 if index or anonymous: 3211 unique = None 3212 primary = None 3213 amp = None 3214 3215 self._match(TokenType.ON) 3216 self._match(TokenType.TABLE) # hive 3217 table = self._parse_table_parts(schema=True) 3218 else: 3219 unique = self._match(TokenType.UNIQUE) 3220 primary = self._match_text_seq("PRIMARY") 3221 amp = self._match_text_seq("AMP") 3222 3223 if not self._match(TokenType.INDEX): 3224 return None 3225 3226 index = self._parse_id_var() 3227 table = None 3228 3229 params = self._parse_index_params() 3230 3231 return self.expression( 3232 exp.Index, 3233 this=index, 3234 table=table, 3235 unique=unique, 3236 primary=primary, 3237 amp=amp, 3238 params=params, 3239 ) 3240 3241 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3242 hints: t.List[exp.Expression] = [] 3243 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3244 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3245 hints.append( 3246 self.expression( 3247 exp.WithTableHint, 3248 expressions=self._parse_csv( 3249 lambda: self._parse_function() or self._parse_var(any_token=True) 3250 ), 3251 ) 3252 ) 3253 self._match_r_paren() 3254 else: 3255 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3256 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3257 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3258 3259 self._match_set((TokenType.INDEX, TokenType.KEY)) 3260 if self._match(TokenType.FOR): 3261 hint.set("target", self._advance_any() and self._prev.text.upper()) 3262 3263 hint.set("expressions", self._parse_wrapped_id_vars()) 3264 hints.append(hint) 3265 3266 return hints or None 3267 3268 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3269 return ( 3270 (not schema and self._parse_function(optional_parens=False)) 3271 or self._parse_id_var(any_token=False) 3272 or self._parse_string_as_identifier() 3273 or self._parse_placeholder() 3274 ) 3275 3276 def _parse_table_parts( 3277 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3278 ) -> exp.Table: 3279 catalog = None 3280 db = None 3281 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3282 3283 while self._match(TokenType.DOT): 3284 if catalog: 3285 # This allows nesting the table in arbitrarily many dot expressions if needed 3286 table = self.expression( 3287 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3288 ) 3289 else: 3290 catalog = db 3291 db = table 3292 # "" used for tsql FROM a..b case 3293 table = self._parse_table_part(schema=schema) or "" 3294 3295 if ( 3296 wildcard 3297 and self._is_connected() 3298 and (isinstance(table, exp.Identifier) or not table) 3299 and self._match(TokenType.STAR) 3300 ): 3301 if isinstance(table, exp.Identifier): 3302 table.args["this"] += "*" 3303 else: 3304 table = exp.Identifier(this="*") 3305 3306 # We bubble up comments from the Identifier to the Table 3307 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3308 3309 if is_db_reference: 3310 catalog = db 3311 db = table 3312 table = None 3313 3314 if not table and not is_db_reference: 3315 self.raise_error(f"Expected table name but got {self._curr}") 3316 if not db and is_db_reference: 3317 self.raise_error(f"Expected database name but got {self._curr}") 3318 3319 return self.expression( 3320 exp.Table, 3321 comments=comments, 3322 this=table, 3323 db=db, 3324 catalog=catalog, 3325 pivots=self._parse_pivots(), 3326 ) 3327 3328 def _parse_table( 3329 self, 3330 schema: bool = False, 3331 joins: bool = False, 3332 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3333 parse_bracket: bool = False, 3334 is_db_reference: bool = False, 3335 parse_partition: bool = False, 3336 ) -> t.Optional[exp.Expression]: 3337 lateral = self._parse_lateral() 3338 if lateral: 3339 return lateral 3340 3341 unnest = self._parse_unnest() 3342 if unnest: 3343 return unnest 3344 3345 values = self._parse_derived_table_values() 3346 if values: 3347 return values 3348 3349 subquery = self._parse_select(table=True) 3350 if subquery: 3351 if not subquery.args.get("pivots"): 3352 subquery.set("pivots", self._parse_pivots()) 3353 return subquery 3354 3355 bracket = parse_bracket and self._parse_bracket(None) 3356 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3357 3358 only = self._match(TokenType.ONLY) 3359 3360 this = t.cast( 3361 exp.Expression, 3362 bracket 3363 or self._parse_bracket( 3364 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3365 ), 3366 ) 3367 3368 if only: 3369 this.set("only", only) 3370 3371 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3372 self._match_text_seq("*") 3373 3374 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3375 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3376 this.set("partition", self._parse_partition()) 3377 3378 if schema: 3379 return self._parse_schema(this=this) 3380 3381 version = self._parse_version() 3382 3383 if version: 3384 this.set("version", version) 3385 3386 if self.dialect.ALIAS_POST_TABLESAMPLE: 3387 table_sample = self._parse_table_sample() 3388 3389 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3390 if alias: 3391 this.set("alias", alias) 3392 3393 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3394 return self.expression( 3395 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3396 ) 3397 3398 this.set("hints", self._parse_table_hints()) 3399 3400 if not this.args.get("pivots"): 3401 this.set("pivots", self._parse_pivots()) 3402 3403 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3404 table_sample = self._parse_table_sample() 3405 3406 if table_sample: 3407 table_sample.set("this", this) 3408 this = table_sample 3409 3410 if joins: 3411 for join in self._parse_joins(): 3412 this.append("joins", join) 3413 3414 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3415 this.set("ordinality", True) 3416 this.set("alias", self._parse_table_alias()) 3417 3418 return this 3419 3420 def _parse_version(self) -> t.Optional[exp.Version]: 3421 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3422 this = "TIMESTAMP" 3423 elif self._match(TokenType.VERSION_SNAPSHOT): 3424 this = "VERSION" 3425 else: 3426 return None 3427 3428 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3429 kind = self._prev.text.upper() 3430 start = self._parse_bitwise() 3431 self._match_texts(("TO", "AND")) 3432 end = self._parse_bitwise() 3433 expression: t.Optional[exp.Expression] = self.expression( 3434 exp.Tuple, expressions=[start, end] 3435 ) 3436 elif self._match_text_seq("CONTAINED", "IN"): 3437 kind = "CONTAINED IN" 3438 expression = self.expression( 3439 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3440 ) 3441 elif self._match(TokenType.ALL): 3442 kind = "ALL" 3443 expression = None 3444 else: 3445 self._match_text_seq("AS", "OF") 3446 kind = "AS OF" 3447 expression = self._parse_type() 3448 3449 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3450 3451 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3452 if not self._match(TokenType.UNNEST): 3453 return None 3454 3455 expressions = self._parse_wrapped_csv(self._parse_equality) 3456 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3457 3458 alias = self._parse_table_alias() if with_alias else None 3459 3460 if alias: 3461 if self.dialect.UNNEST_COLUMN_ONLY: 3462 if alias.args.get("columns"): 3463 self.raise_error("Unexpected extra column alias in unnest.") 3464 3465 alias.set("columns", [alias.this]) 3466 alias.set("this", None) 3467 3468 columns = alias.args.get("columns") or [] 3469 if offset and len(expressions) < len(columns): 3470 offset = columns.pop() 3471 3472 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3473 self._match(TokenType.ALIAS) 3474 offset = self._parse_id_var( 3475 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3476 ) or exp.to_identifier("offset") 3477 3478 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3479 3480 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3481 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3482 if not is_derived and not self._match_text_seq("VALUES"): 3483 return None 3484 3485 expressions = self._parse_csv(self._parse_value) 3486 alias = self._parse_table_alias() 3487 3488 if is_derived: 3489 self._match_r_paren() 3490 3491 return self.expression( 3492 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3493 ) 3494 3495 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3496 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3497 as_modifier and self._match_text_seq("USING", "SAMPLE") 3498 ): 3499 return None 3500 3501 bucket_numerator = None 3502 bucket_denominator = None 3503 bucket_field = None 3504 percent = None 3505 size = None 3506 seed = None 3507 3508 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3509 matched_l_paren = self._match(TokenType.L_PAREN) 3510 3511 if self.TABLESAMPLE_CSV: 3512 num = None 3513 expressions = self._parse_csv(self._parse_primary) 3514 else: 3515 expressions = None 3516 num = ( 3517 self._parse_factor() 3518 if self._match(TokenType.NUMBER, advance=False) 3519 else self._parse_primary() or self._parse_placeholder() 3520 ) 3521 3522 if self._match_text_seq("BUCKET"): 3523 bucket_numerator = self._parse_number() 3524 self._match_text_seq("OUT", "OF") 3525 bucket_denominator = bucket_denominator = self._parse_number() 3526 self._match(TokenType.ON) 3527 bucket_field = self._parse_field() 3528 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3529 percent = num 3530 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3531 size = num 3532 else: 3533 percent = num 3534 3535 if matched_l_paren: 3536 self._match_r_paren() 3537 3538 if self._match(TokenType.L_PAREN): 3539 method = self._parse_var(upper=True) 3540 seed = self._match(TokenType.COMMA) and self._parse_number() 3541 self._match_r_paren() 3542 elif self._match_texts(("SEED", "REPEATABLE")): 3543 seed = self._parse_wrapped(self._parse_number) 3544 3545 if not method and self.DEFAULT_SAMPLING_METHOD: 3546 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3547 3548 return self.expression( 3549 exp.TableSample, 3550 expressions=expressions, 3551 method=method, 3552 bucket_numerator=bucket_numerator, 3553 bucket_denominator=bucket_denominator, 3554 bucket_field=bucket_field, 3555 percent=percent, 3556 size=size, 3557 seed=seed, 3558 ) 3559 3560 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3561 return list(iter(self._parse_pivot, None)) or None 3562 3563 def _parse_joins(self) -> t.Iterator[exp.Join]: 3564 return iter(self._parse_join, None) 3565 3566 # https://duckdb.org/docs/sql/statements/pivot 3567 def _parse_simplified_pivot(self) -> exp.Pivot: 3568 def _parse_on() -> t.Optional[exp.Expression]: 3569 this = self._parse_bitwise() 3570 return self._parse_in(this) if self._match(TokenType.IN) else this 3571 3572 this = self._parse_table() 3573 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3574 using = self._match(TokenType.USING) and self._parse_csv( 3575 lambda: self._parse_alias(self._parse_function()) 3576 ) 3577 group = self._parse_group() 3578 return self.expression( 3579 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3580 ) 3581 3582 def _parse_pivot_in(self) -> exp.In: 3583 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3584 this = self._parse_assignment() 3585 3586 self._match(TokenType.ALIAS) 3587 alias = self._parse_field() 3588 if alias: 3589 return self.expression(exp.PivotAlias, this=this, alias=alias) 3590 3591 return this 3592 3593 value = self._parse_column() 3594 3595 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3596 self.raise_error("Expecting IN (") 3597 3598 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3599 3600 self._match_r_paren() 3601 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3602 3603 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3604 index = self._index 3605 include_nulls = None 3606 3607 if self._match(TokenType.PIVOT): 3608 unpivot = False 3609 elif self._match(TokenType.UNPIVOT): 3610 unpivot = True 3611 3612 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3613 if self._match_text_seq("INCLUDE", "NULLS"): 3614 include_nulls = True 3615 elif self._match_text_seq("EXCLUDE", "NULLS"): 3616 include_nulls = False 3617 else: 3618 return None 3619 3620 expressions = [] 3621 3622 if not self._match(TokenType.L_PAREN): 3623 self._retreat(index) 3624 return None 3625 3626 if unpivot: 3627 expressions = self._parse_csv(self._parse_column) 3628 else: 3629 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3630 3631 if not expressions: 3632 self.raise_error("Failed to parse PIVOT's aggregation list") 3633 3634 if not self._match(TokenType.FOR): 3635 self.raise_error("Expecting FOR") 3636 3637 field = self._parse_pivot_in() 3638 3639 self._match_r_paren() 3640 3641 pivot = self.expression( 3642 exp.Pivot, 3643 expressions=expressions, 3644 field=field, 3645 unpivot=unpivot, 3646 include_nulls=include_nulls, 3647 ) 3648 3649 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3650 pivot.set("alias", self._parse_table_alias()) 3651 3652 if not unpivot: 3653 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3654 3655 columns: t.List[exp.Expression] = [] 3656 for fld in pivot.args["field"].expressions: 3657 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3658 for name in names: 3659 if self.PREFIXED_PIVOT_COLUMNS: 3660 name = f"{name}_{field_name}" if name else field_name 3661 else: 3662 name = f"{field_name}_{name}" if name else field_name 3663 3664 columns.append(exp.to_identifier(name)) 3665 3666 pivot.set("columns", columns) 3667 3668 return pivot 3669 3670 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3671 return [agg.alias for agg in aggregations] 3672 3673 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3674 if not skip_where_token and not self._match(TokenType.PREWHERE): 3675 return None 3676 3677 return self.expression( 3678 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3679 ) 3680 3681 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3682 if not skip_where_token and not self._match(TokenType.WHERE): 3683 return None 3684 3685 return self.expression( 3686 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3687 ) 3688 3689 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3690 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3691 return None 3692 3693 elements: t.Dict[str, t.Any] = defaultdict(list) 3694 3695 if self._match(TokenType.ALL): 3696 elements["all"] = True 3697 elif self._match(TokenType.DISTINCT): 3698 elements["all"] = False 3699 3700 while True: 3701 expressions = self._parse_csv( 3702 lambda: None 3703 if self._match(TokenType.ROLLUP, advance=False) 3704 else self._parse_assignment() 3705 ) 3706 if expressions: 3707 elements["expressions"].extend(expressions) 3708 3709 grouping_sets = self._parse_grouping_sets() 3710 if grouping_sets: 3711 elements["grouping_sets"].extend(grouping_sets) 3712 3713 rollup = None 3714 cube = None 3715 totals = None 3716 3717 index = self._index 3718 with_ = self._match(TokenType.WITH) 3719 if self._match(TokenType.ROLLUP): 3720 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3721 elements["rollup"].extend(ensure_list(rollup)) 3722 3723 if self._match(TokenType.CUBE): 3724 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3725 elements["cube"].extend(ensure_list(cube)) 3726 3727 if self._match_text_seq("TOTALS"): 3728 totals = True 3729 elements["totals"] = True # type: ignore 3730 3731 if not (grouping_sets or rollup or cube or totals): 3732 if with_: 3733 self._retreat(index) 3734 break 3735 3736 return self.expression(exp.Group, **elements) # type: ignore 3737 3738 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3739 if not self._match(TokenType.GROUPING_SETS): 3740 return None 3741 3742 return self._parse_wrapped_csv(self._parse_grouping_set) 3743 3744 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3745 if self._match(TokenType.L_PAREN): 3746 grouping_set = self._parse_csv(self._parse_column) 3747 self._match_r_paren() 3748 return self.expression(exp.Tuple, expressions=grouping_set) 3749 3750 return self._parse_column() 3751 3752 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3753 if not skip_having_token and not self._match(TokenType.HAVING): 3754 return None 3755 return self.expression(exp.Having, this=self._parse_assignment()) 3756 3757 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3758 if not self._match(TokenType.QUALIFY): 3759 return None 3760 return self.expression(exp.Qualify, this=self._parse_assignment()) 3761 3762 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3763 if skip_start_token: 3764 start = None 3765 elif self._match(TokenType.START_WITH): 3766 start = self._parse_assignment() 3767 else: 3768 return None 3769 3770 self._match(TokenType.CONNECT_BY) 3771 nocycle = self._match_text_seq("NOCYCLE") 3772 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3773 exp.Prior, this=self._parse_bitwise() 3774 ) 3775 connect = self._parse_assignment() 3776 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3777 3778 if not start and self._match(TokenType.START_WITH): 3779 start = self._parse_assignment() 3780 3781 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3782 3783 def _parse_name_as_expression(self) -> exp.Alias: 3784 return self.expression( 3785 exp.Alias, 3786 alias=self._parse_id_var(any_token=True), 3787 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3788 ) 3789 3790 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3791 if self._match_text_seq("INTERPOLATE"): 3792 return self._parse_wrapped_csv(self._parse_name_as_expression) 3793 return None 3794 3795 def _parse_order( 3796 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3797 ) -> t.Optional[exp.Expression]: 3798 siblings = None 3799 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3800 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3801 return this 3802 3803 siblings = True 3804 3805 return self.expression( 3806 exp.Order, 3807 this=this, 3808 expressions=self._parse_csv(self._parse_ordered), 3809 interpolate=self._parse_interpolate(), 3810 siblings=siblings, 3811 ) 3812 3813 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3814 if not self._match(token): 3815 return None 3816 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3817 3818 def _parse_ordered( 3819 self, parse_method: t.Optional[t.Callable] = None 3820 ) -> t.Optional[exp.Ordered]: 3821 this = parse_method() if parse_method else self._parse_assignment() 3822 if not this: 3823 return None 3824 3825 asc = self._match(TokenType.ASC) 3826 desc = self._match(TokenType.DESC) or (asc and False) 3827 3828 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3829 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3830 3831 nulls_first = is_nulls_first or False 3832 explicitly_null_ordered = is_nulls_first or is_nulls_last 3833 3834 if ( 3835 not explicitly_null_ordered 3836 and ( 3837 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3838 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3839 ) 3840 and self.dialect.NULL_ORDERING != "nulls_are_last" 3841 ): 3842 nulls_first = True 3843 3844 if self._match_text_seq("WITH", "FILL"): 3845 with_fill = self.expression( 3846 exp.WithFill, 3847 **{ # type: ignore 3848 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3849 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3850 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3851 }, 3852 ) 3853 else: 3854 with_fill = None 3855 3856 return self.expression( 3857 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3858 ) 3859 3860 def _parse_limit( 3861 self, 3862 this: t.Optional[exp.Expression] = None, 3863 top: bool = False, 3864 skip_limit_token: bool = False, 3865 ) -> t.Optional[exp.Expression]: 3866 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3867 comments = self._prev_comments 3868 if top: 3869 limit_paren = self._match(TokenType.L_PAREN) 3870 expression = self._parse_term() if limit_paren else self._parse_number() 3871 3872 if limit_paren: 3873 self._match_r_paren() 3874 else: 3875 expression = self._parse_term() 3876 3877 if self._match(TokenType.COMMA): 3878 offset = expression 3879 expression = self._parse_term() 3880 else: 3881 offset = None 3882 3883 limit_exp = self.expression( 3884 exp.Limit, 3885 this=this, 3886 expression=expression, 3887 offset=offset, 3888 comments=comments, 3889 expressions=self._parse_limit_by(), 3890 ) 3891 3892 return limit_exp 3893 3894 if self._match(TokenType.FETCH): 3895 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3896 direction = self._prev.text.upper() if direction else "FIRST" 3897 3898 count = self._parse_field(tokens=self.FETCH_TOKENS) 3899 percent = self._match(TokenType.PERCENT) 3900 3901 self._match_set((TokenType.ROW, TokenType.ROWS)) 3902 3903 only = self._match_text_seq("ONLY") 3904 with_ties = self._match_text_seq("WITH", "TIES") 3905 3906 if only and with_ties: 3907 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3908 3909 return self.expression( 3910 exp.Fetch, 3911 direction=direction, 3912 count=count, 3913 percent=percent, 3914 with_ties=with_ties, 3915 ) 3916 3917 return this 3918 3919 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3920 if not self._match(TokenType.OFFSET): 3921 return this 3922 3923 count = self._parse_term() 3924 self._match_set((TokenType.ROW, TokenType.ROWS)) 3925 3926 return self.expression( 3927 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3928 ) 3929 3930 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3931 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3932 3933 def _parse_locks(self) -> t.List[exp.Lock]: 3934 locks = [] 3935 while True: 3936 if self._match_text_seq("FOR", "UPDATE"): 3937 update = True 3938 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3939 "LOCK", "IN", "SHARE", "MODE" 3940 ): 3941 update = False 3942 else: 3943 break 3944 3945 expressions = None 3946 if self._match_text_seq("OF"): 3947 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3948 3949 wait: t.Optional[bool | exp.Expression] = None 3950 if self._match_text_seq("NOWAIT"): 3951 wait = True 3952 elif self._match_text_seq("WAIT"): 3953 wait = self._parse_primary() 3954 elif self._match_text_seq("SKIP", "LOCKED"): 3955 wait = False 3956 3957 locks.append( 3958 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3959 ) 3960 3961 return locks 3962 3963 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3964 while this and self._match_set(self.SET_OPERATIONS): 3965 token_type = self._prev.token_type 3966 3967 if token_type == TokenType.UNION: 3968 operation: t.Type[exp.SetOperation] = exp.Union 3969 elif token_type == TokenType.EXCEPT: 3970 operation = exp.Except 3971 else: 3972 operation = exp.Intersect 3973 3974 comments = self._prev.comments 3975 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3976 by_name = self._match_text_seq("BY", "NAME") 3977 expression = self._parse_select(nested=True, parse_set_operation=False) 3978 3979 this = self.expression( 3980 operation, 3981 comments=comments, 3982 this=this, 3983 distinct=distinct, 3984 by_name=by_name, 3985 expression=expression, 3986 ) 3987 3988 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 3989 expression = this.expression 3990 3991 if expression: 3992 for arg in self.SET_OP_MODIFIERS: 3993 expr = expression.args.get(arg) 3994 if expr: 3995 this.set(arg, expr.pop()) 3996 3997 return this 3998 3999 def _parse_expression(self) -> t.Optional[exp.Expression]: 4000 return self._parse_alias(self._parse_assignment()) 4001 4002 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4003 this = self._parse_disjunction() 4004 4005 while self._match_set(self.ASSIGNMENT): 4006 this = self.expression( 4007 self.ASSIGNMENT[self._prev.token_type], 4008 this=this, 4009 comments=self._prev_comments, 4010 expression=self._parse_assignment(), 4011 ) 4012 4013 return this 4014 4015 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4016 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4017 4018 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4019 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4020 4021 def _parse_equality(self) -> t.Optional[exp.Expression]: 4022 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4023 4024 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4025 return self._parse_tokens(self._parse_range, self.COMPARISON) 4026 4027 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4028 this = this or self._parse_bitwise() 4029 negate = self._match(TokenType.NOT) 4030 4031 if self._match_set(self.RANGE_PARSERS): 4032 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4033 if not expression: 4034 return this 4035 4036 this = expression 4037 elif self._match(TokenType.ISNULL): 4038 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4039 4040 # Postgres supports ISNULL and NOTNULL for conditions. 4041 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4042 if self._match(TokenType.NOTNULL): 4043 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4044 this = self.expression(exp.Not, this=this) 4045 4046 if negate: 4047 this = self.expression(exp.Not, this=this) 4048 4049 if self._match(TokenType.IS): 4050 this = self._parse_is(this) 4051 4052 return this 4053 4054 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4055 index = self._index - 1 4056 negate = self._match(TokenType.NOT) 4057 4058 if self._match_text_seq("DISTINCT", "FROM"): 4059 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4060 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4061 4062 expression = self._parse_null() or self._parse_boolean() 4063 if not expression: 4064 self._retreat(index) 4065 return None 4066 4067 this = self.expression(exp.Is, this=this, expression=expression) 4068 return self.expression(exp.Not, this=this) if negate else this 4069 4070 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4071 unnest = self._parse_unnest(with_alias=False) 4072 if unnest: 4073 this = self.expression(exp.In, this=this, unnest=unnest) 4074 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4075 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4076 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4077 4078 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4079 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4080 else: 4081 this = self.expression(exp.In, this=this, expressions=expressions) 4082 4083 if matched_l_paren: 4084 self._match_r_paren(this) 4085 elif not self._match(TokenType.R_BRACKET, expression=this): 4086 self.raise_error("Expecting ]") 4087 else: 4088 this = self.expression(exp.In, this=this, field=self._parse_field()) 4089 4090 return this 4091 4092 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4093 low = self._parse_bitwise() 4094 self._match(TokenType.AND) 4095 high = self._parse_bitwise() 4096 return self.expression(exp.Between, this=this, low=low, high=high) 4097 4098 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4099 if not self._match(TokenType.ESCAPE): 4100 return this 4101 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4102 4103 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4104 index = self._index 4105 4106 if not self._match(TokenType.INTERVAL) and match_interval: 4107 return None 4108 4109 if self._match(TokenType.STRING, advance=False): 4110 this = self._parse_primary() 4111 else: 4112 this = self._parse_term() 4113 4114 if not this or ( 4115 isinstance(this, exp.Column) 4116 and not this.table 4117 and not this.this.quoted 4118 and this.name.upper() == "IS" 4119 ): 4120 self._retreat(index) 4121 return None 4122 4123 unit = self._parse_function() or ( 4124 not self._match(TokenType.ALIAS, advance=False) 4125 and self._parse_var(any_token=True, upper=True) 4126 ) 4127 4128 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4129 # each INTERVAL expression into this canonical form so it's easy to transpile 4130 if this and this.is_number: 4131 this = exp.Literal.string(this.name) 4132 elif this and this.is_string: 4133 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4134 if len(parts) == 1: 4135 if unit: 4136 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4137 self._retreat(self._index - 1) 4138 4139 this = exp.Literal.string(parts[0][0]) 4140 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4141 4142 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4143 unit = self.expression( 4144 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4145 ) 4146 4147 interval = self.expression(exp.Interval, this=this, unit=unit) 4148 4149 index = self._index 4150 self._match(TokenType.PLUS) 4151 4152 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4153 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4154 return self.expression( 4155 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4156 ) 4157 4158 self._retreat(index) 4159 return interval 4160 4161 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4162 this = self._parse_term() 4163 4164 while True: 4165 if self._match_set(self.BITWISE): 4166 this = self.expression( 4167 self.BITWISE[self._prev.token_type], 4168 this=this, 4169 expression=self._parse_term(), 4170 ) 4171 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4172 this = self.expression( 4173 exp.DPipe, 4174 this=this, 4175 expression=self._parse_term(), 4176 safe=not self.dialect.STRICT_STRING_CONCAT, 4177 ) 4178 elif self._match(TokenType.DQMARK): 4179 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4180 elif self._match_pair(TokenType.LT, TokenType.LT): 4181 this = self.expression( 4182 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4183 ) 4184 elif self._match_pair(TokenType.GT, TokenType.GT): 4185 this = self.expression( 4186 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4187 ) 4188 else: 4189 break 4190 4191 return this 4192 4193 def _parse_term(self) -> t.Optional[exp.Expression]: 4194 return self._parse_tokens(self._parse_factor, self.TERM) 4195 4196 def _parse_factor(self) -> t.Optional[exp.Expression]: 4197 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4198 this = parse_method() 4199 4200 while self._match_set(self.FACTOR): 4201 klass = self.FACTOR[self._prev.token_type] 4202 comments = self._prev_comments 4203 expression = parse_method() 4204 4205 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4206 self._retreat(self._index - 1) 4207 return this 4208 4209 this = self.expression(klass, this=this, comments=comments, expression=expression) 4210 4211 if isinstance(this, exp.Div): 4212 this.args["typed"] = self.dialect.TYPED_DIVISION 4213 this.args["safe"] = self.dialect.SAFE_DIVISION 4214 4215 return this 4216 4217 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4218 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4219 4220 def _parse_unary(self) -> t.Optional[exp.Expression]: 4221 if self._match_set(self.UNARY_PARSERS): 4222 return self.UNARY_PARSERS[self._prev.token_type](self) 4223 return self._parse_at_time_zone(self._parse_type()) 4224 4225 def _parse_type( 4226 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4227 ) -> t.Optional[exp.Expression]: 4228 interval = parse_interval and self._parse_interval() 4229 if interval: 4230 return interval 4231 4232 index = self._index 4233 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4234 4235 if data_type: 4236 index2 = self._index 4237 this = self._parse_primary() 4238 4239 if isinstance(this, exp.Literal): 4240 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4241 if parser: 4242 return parser(self, this, data_type) 4243 4244 return self.expression(exp.Cast, this=this, to=data_type) 4245 4246 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4247 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4248 # 4249 # If the index difference here is greater than 1, that means the parser itself must have 4250 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4251 # 4252 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4253 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4254 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4255 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4256 # 4257 # In these cases, we don't really want to return the converted type, but instead retreat 4258 # and try to parse a Column or Identifier in the section below. 4259 if data_type.expressions and index2 - index > 1: 4260 self._retreat(index2) 4261 return self._parse_column_ops(data_type) 4262 4263 self._retreat(index) 4264 4265 if fallback_to_identifier: 4266 return self._parse_id_var() 4267 4268 this = self._parse_column() 4269 return this and self._parse_column_ops(this) 4270 4271 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4272 this = self._parse_type() 4273 if not this: 4274 return None 4275 4276 if isinstance(this, exp.Column) and not this.table: 4277 this = exp.var(this.name.upper()) 4278 4279 return self.expression( 4280 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4281 ) 4282 4283 def _parse_types( 4284 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4285 ) -> t.Optional[exp.Expression]: 4286 index = self._index 4287 4288 this: t.Optional[exp.Expression] = None 4289 prefix = self._match_text_seq("SYSUDTLIB", ".") 4290 4291 if not self._match_set(self.TYPE_TOKENS): 4292 identifier = allow_identifiers and self._parse_id_var( 4293 any_token=False, tokens=(TokenType.VAR,) 4294 ) 4295 if identifier: 4296 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4297 4298 if len(tokens) != 1: 4299 self.raise_error("Unexpected identifier", self._prev) 4300 4301 if tokens[0].token_type in self.TYPE_TOKENS: 4302 self._prev = tokens[0] 4303 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4304 type_name = identifier.name 4305 4306 while self._match(TokenType.DOT): 4307 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4308 4309 this = exp.DataType.build(type_name, udt=True) 4310 else: 4311 self._retreat(self._index - 1) 4312 return None 4313 else: 4314 return None 4315 4316 type_token = self._prev.token_type 4317 4318 if type_token == TokenType.PSEUDO_TYPE: 4319 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4320 4321 if type_token == TokenType.OBJECT_IDENTIFIER: 4322 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4323 4324 # https://materialize.com/docs/sql/types/map/ 4325 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4326 key_type = self._parse_types( 4327 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4328 ) 4329 if not self._match(TokenType.FARROW): 4330 self._retreat(index) 4331 return None 4332 4333 value_type = self._parse_types( 4334 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4335 ) 4336 if not self._match(TokenType.R_BRACKET): 4337 self._retreat(index) 4338 return None 4339 4340 return exp.DataType( 4341 this=exp.DataType.Type.MAP, 4342 expressions=[key_type, value_type], 4343 nested=True, 4344 prefix=prefix, 4345 ) 4346 4347 nested = type_token in self.NESTED_TYPE_TOKENS 4348 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4349 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4350 expressions = None 4351 maybe_func = False 4352 4353 if self._match(TokenType.L_PAREN): 4354 if is_struct: 4355 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4356 elif nested: 4357 expressions = self._parse_csv( 4358 lambda: self._parse_types( 4359 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4360 ) 4361 ) 4362 elif type_token in self.ENUM_TYPE_TOKENS: 4363 expressions = self._parse_csv(self._parse_equality) 4364 elif is_aggregate: 4365 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4366 any_token=False, tokens=(TokenType.VAR,) 4367 ) 4368 if not func_or_ident or not self._match(TokenType.COMMA): 4369 return None 4370 expressions = self._parse_csv( 4371 lambda: self._parse_types( 4372 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4373 ) 4374 ) 4375 expressions.insert(0, func_or_ident) 4376 else: 4377 expressions = self._parse_csv(self._parse_type_size) 4378 4379 if not expressions or not self._match(TokenType.R_PAREN): 4380 self._retreat(index) 4381 return None 4382 4383 maybe_func = True 4384 4385 values: t.Optional[t.List[exp.Expression]] = None 4386 4387 if nested and self._match(TokenType.LT): 4388 if is_struct: 4389 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4390 else: 4391 expressions = self._parse_csv( 4392 lambda: self._parse_types( 4393 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4394 ) 4395 ) 4396 4397 if not self._match(TokenType.GT): 4398 self.raise_error("Expecting >") 4399 4400 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4401 values = self._parse_csv(self._parse_assignment) 4402 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4403 4404 if type_token in self.TIMESTAMPS: 4405 if self._match_text_seq("WITH", "TIME", "ZONE"): 4406 maybe_func = False 4407 tz_type = ( 4408 exp.DataType.Type.TIMETZ 4409 if type_token in self.TIMES 4410 else exp.DataType.Type.TIMESTAMPTZ 4411 ) 4412 this = exp.DataType(this=tz_type, expressions=expressions) 4413 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4414 maybe_func = False 4415 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4416 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4417 maybe_func = False 4418 elif type_token == TokenType.INTERVAL: 4419 unit = self._parse_var(upper=True) 4420 if unit: 4421 if self._match_text_seq("TO"): 4422 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4423 4424 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4425 else: 4426 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4427 4428 if maybe_func and check_func: 4429 index2 = self._index 4430 peek = self._parse_string() 4431 4432 if not peek: 4433 self._retreat(index) 4434 return None 4435 4436 self._retreat(index2) 4437 4438 if not this: 4439 if self._match_text_seq("UNSIGNED"): 4440 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4441 if not unsigned_type_token: 4442 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4443 4444 type_token = unsigned_type_token or type_token 4445 4446 this = exp.DataType( 4447 this=exp.DataType.Type[type_token.value], 4448 expressions=expressions, 4449 nested=nested, 4450 values=values, 4451 prefix=prefix, 4452 ) 4453 elif expressions: 4454 this.set("expressions", expressions) 4455 4456 # https://materialize.com/docs/sql/types/list/#type-name 4457 while self._match(TokenType.LIST): 4458 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4459 4460 index = self._index 4461 4462 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4463 matched_array = self._match(TokenType.ARRAY) 4464 4465 while self._curr: 4466 matched_l_bracket = self._match(TokenType.L_BRACKET) 4467 if not matched_l_bracket and not matched_array: 4468 break 4469 4470 matched_array = False 4471 values = self._parse_csv(self._parse_assignment) or None 4472 if values and not schema: 4473 self._retreat(index) 4474 break 4475 4476 this = exp.DataType( 4477 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4478 ) 4479 self._match(TokenType.R_BRACKET) 4480 4481 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4482 converter = self.TYPE_CONVERTERS.get(this.this) 4483 if converter: 4484 this = converter(t.cast(exp.DataType, this)) 4485 4486 return this 4487 4488 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4489 index = self._index 4490 4491 if ( 4492 self._curr 4493 and self._next 4494 and self._curr.token_type in self.TYPE_TOKENS 4495 and self._next.token_type in self.TYPE_TOKENS 4496 ): 4497 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4498 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4499 this = self._parse_id_var() 4500 else: 4501 this = ( 4502 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4503 or self._parse_id_var() 4504 ) 4505 4506 self._match(TokenType.COLON) 4507 4508 if ( 4509 type_required 4510 and not isinstance(this, exp.DataType) 4511 and not self._match_set(self.TYPE_TOKENS, advance=False) 4512 ): 4513 self._retreat(index) 4514 return self._parse_types() 4515 4516 return self._parse_column_def(this) 4517 4518 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4519 if not self._match_text_seq("AT", "TIME", "ZONE"): 4520 return this 4521 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4522 4523 def _parse_column(self) -> t.Optional[exp.Expression]: 4524 this = self._parse_column_reference() 4525 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4526 4527 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4528 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4529 4530 return column 4531 4532 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4533 this = self._parse_field() 4534 if ( 4535 not this 4536 and self._match(TokenType.VALUES, advance=False) 4537 and self.VALUES_FOLLOWED_BY_PAREN 4538 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4539 ): 4540 this = self._parse_id_var() 4541 4542 if isinstance(this, exp.Identifier): 4543 # We bubble up comments from the Identifier to the Column 4544 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4545 4546 return this 4547 4548 def _parse_colon_as_json_extract( 4549 self, this: t.Optional[exp.Expression] 4550 ) -> t.Optional[exp.Expression]: 4551 casts = [] 4552 json_path = [] 4553 4554 while self._match(TokenType.COLON): 4555 start_index = self._index 4556 4557 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4558 path = self._parse_column_ops( 4559 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4560 ) 4561 4562 # The cast :: operator has a lower precedence than the extraction operator :, so 4563 # we rearrange the AST appropriately to avoid casting the JSON path 4564 while isinstance(path, exp.Cast): 4565 casts.append(path.to) 4566 path = path.this 4567 4568 if casts: 4569 dcolon_offset = next( 4570 i 4571 for i, t in enumerate(self._tokens[start_index:]) 4572 if t.token_type == TokenType.DCOLON 4573 ) 4574 end_token = self._tokens[start_index + dcolon_offset - 1] 4575 else: 4576 end_token = self._prev 4577 4578 if path: 4579 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4580 4581 if json_path: 4582 this = self.expression( 4583 exp.JSONExtract, 4584 this=this, 4585 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4586 ) 4587 4588 while casts: 4589 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4590 4591 return this 4592 4593 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4594 this = self._parse_bracket(this) 4595 4596 while self._match_set(self.COLUMN_OPERATORS): 4597 op_token = self._prev.token_type 4598 op = self.COLUMN_OPERATORS.get(op_token) 4599 4600 if op_token == TokenType.DCOLON: 4601 field = self._parse_types() 4602 if not field: 4603 self.raise_error("Expected type") 4604 elif op and self._curr: 4605 field = self._parse_column_reference() 4606 else: 4607 field = self._parse_field(any_token=True, anonymous_func=True) 4608 4609 if isinstance(field, exp.Func) and this: 4610 # bigquery allows function calls like x.y.count(...) 4611 # SAFE.SUBSTR(...) 4612 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4613 this = exp.replace_tree( 4614 this, 4615 lambda n: ( 4616 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4617 if n.table 4618 else n.this 4619 ) 4620 if isinstance(n, exp.Column) 4621 else n, 4622 ) 4623 4624 if op: 4625 this = op(self, this, field) 4626 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4627 this = self.expression( 4628 exp.Column, 4629 this=field, 4630 table=this.this, 4631 db=this.args.get("table"), 4632 catalog=this.args.get("db"), 4633 ) 4634 else: 4635 this = self.expression(exp.Dot, this=this, expression=field) 4636 4637 this = self._parse_bracket(this) 4638 4639 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4640 4641 def _parse_primary(self) -> t.Optional[exp.Expression]: 4642 if self._match_set(self.PRIMARY_PARSERS): 4643 token_type = self._prev.token_type 4644 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4645 4646 if token_type == TokenType.STRING: 4647 expressions = [primary] 4648 while self._match(TokenType.STRING): 4649 expressions.append(exp.Literal.string(self._prev.text)) 4650 4651 if len(expressions) > 1: 4652 return self.expression(exp.Concat, expressions=expressions) 4653 4654 return primary 4655 4656 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4657 return exp.Literal.number(f"0.{self._prev.text}") 4658 4659 if self._match(TokenType.L_PAREN): 4660 comments = self._prev_comments 4661 query = self._parse_select() 4662 4663 if query: 4664 expressions = [query] 4665 else: 4666 expressions = self._parse_expressions() 4667 4668 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4669 4670 if not this and self._match(TokenType.R_PAREN, advance=False): 4671 this = self.expression(exp.Tuple) 4672 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4673 this = self._parse_subquery(this=this, parse_alias=False) 4674 elif isinstance(this, exp.Subquery): 4675 this = self._parse_subquery( 4676 this=self._parse_set_operations(this), parse_alias=False 4677 ) 4678 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4679 this = self.expression(exp.Tuple, expressions=expressions) 4680 else: 4681 this = self.expression(exp.Paren, this=this) 4682 4683 if this: 4684 this.add_comments(comments) 4685 4686 self._match_r_paren(expression=this) 4687 return this 4688 4689 return None 4690 4691 def _parse_field( 4692 self, 4693 any_token: bool = False, 4694 tokens: t.Optional[t.Collection[TokenType]] = None, 4695 anonymous_func: bool = False, 4696 ) -> t.Optional[exp.Expression]: 4697 if anonymous_func: 4698 field = ( 4699 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4700 or self._parse_primary() 4701 ) 4702 else: 4703 field = self._parse_primary() or self._parse_function( 4704 anonymous=anonymous_func, any_token=any_token 4705 ) 4706 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4707 4708 def _parse_function( 4709 self, 4710 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4711 anonymous: bool = False, 4712 optional_parens: bool = True, 4713 any_token: bool = False, 4714 ) -> t.Optional[exp.Expression]: 4715 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4716 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4717 fn_syntax = False 4718 if ( 4719 self._match(TokenType.L_BRACE, advance=False) 4720 and self._next 4721 and self._next.text.upper() == "FN" 4722 ): 4723 self._advance(2) 4724 fn_syntax = True 4725 4726 func = self._parse_function_call( 4727 functions=functions, 4728 anonymous=anonymous, 4729 optional_parens=optional_parens, 4730 any_token=any_token, 4731 ) 4732 4733 if fn_syntax: 4734 self._match(TokenType.R_BRACE) 4735 4736 return func 4737 4738 def _parse_function_call( 4739 self, 4740 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4741 anonymous: bool = False, 4742 optional_parens: bool = True, 4743 any_token: bool = False, 4744 ) -> t.Optional[exp.Expression]: 4745 if not self._curr: 4746 return None 4747 4748 comments = self._curr.comments 4749 token_type = self._curr.token_type 4750 this = self._curr.text 4751 upper = this.upper() 4752 4753 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4754 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4755 self._advance() 4756 return self._parse_window(parser(self)) 4757 4758 if not self._next or self._next.token_type != TokenType.L_PAREN: 4759 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4760 self._advance() 4761 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4762 4763 return None 4764 4765 if any_token: 4766 if token_type in self.RESERVED_TOKENS: 4767 return None 4768 elif token_type not in self.FUNC_TOKENS: 4769 return None 4770 4771 self._advance(2) 4772 4773 parser = self.FUNCTION_PARSERS.get(upper) 4774 if parser and not anonymous: 4775 this = parser(self) 4776 else: 4777 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4778 4779 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4780 this = self.expression(subquery_predicate, this=self._parse_select()) 4781 self._match_r_paren() 4782 return this 4783 4784 if functions is None: 4785 functions = self.FUNCTIONS 4786 4787 function = functions.get(upper) 4788 4789 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4790 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4791 4792 if alias: 4793 args = self._kv_to_prop_eq(args) 4794 4795 if function and not anonymous: 4796 if "dialect" in function.__code__.co_varnames: 4797 func = function(args, dialect=self.dialect) 4798 else: 4799 func = function(args) 4800 4801 func = self.validate_expression(func, args) 4802 if not self.dialect.NORMALIZE_FUNCTIONS: 4803 func.meta["name"] = this 4804 4805 this = func 4806 else: 4807 if token_type == TokenType.IDENTIFIER: 4808 this = exp.Identifier(this=this, quoted=True) 4809 this = self.expression(exp.Anonymous, this=this, expressions=args) 4810 4811 if isinstance(this, exp.Expression): 4812 this.add_comments(comments) 4813 4814 self._match_r_paren(this) 4815 return self._parse_window(this) 4816 4817 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4818 transformed = [] 4819 4820 for e in expressions: 4821 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4822 if isinstance(e, exp.Alias): 4823 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4824 4825 if not isinstance(e, exp.PropertyEQ): 4826 e = self.expression( 4827 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4828 ) 4829 4830 if isinstance(e.this, exp.Column): 4831 e.this.replace(e.this.this) 4832 4833 transformed.append(e) 4834 4835 return transformed 4836 4837 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4838 return self._parse_column_def(self._parse_id_var()) 4839 4840 def _parse_user_defined_function( 4841 self, kind: t.Optional[TokenType] = None 4842 ) -> t.Optional[exp.Expression]: 4843 this = self._parse_id_var() 4844 4845 while self._match(TokenType.DOT): 4846 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4847 4848 if not self._match(TokenType.L_PAREN): 4849 return this 4850 4851 expressions = self._parse_csv(self._parse_function_parameter) 4852 self._match_r_paren() 4853 return self.expression( 4854 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4855 ) 4856 4857 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4858 literal = self._parse_primary() 4859 if literal: 4860 return self.expression(exp.Introducer, this=token.text, expression=literal) 4861 4862 return self.expression(exp.Identifier, this=token.text) 4863 4864 def _parse_session_parameter(self) -> exp.SessionParameter: 4865 kind = None 4866 this = self._parse_id_var() or self._parse_primary() 4867 4868 if this and self._match(TokenType.DOT): 4869 kind = this.name 4870 this = self._parse_var() or self._parse_primary() 4871 4872 return self.expression(exp.SessionParameter, this=this, kind=kind) 4873 4874 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4875 return self._parse_id_var() 4876 4877 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4878 index = self._index 4879 4880 if self._match(TokenType.L_PAREN): 4881 expressions = t.cast( 4882 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4883 ) 4884 4885 if not self._match(TokenType.R_PAREN): 4886 self._retreat(index) 4887 else: 4888 expressions = [self._parse_lambda_arg()] 4889 4890 if self._match_set(self.LAMBDAS): 4891 return self.LAMBDAS[self._prev.token_type](self, expressions) 4892 4893 self._retreat(index) 4894 4895 this: t.Optional[exp.Expression] 4896 4897 if self._match(TokenType.DISTINCT): 4898 this = self.expression( 4899 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4900 ) 4901 else: 4902 this = self._parse_select_or_expression(alias=alias) 4903 4904 return self._parse_limit( 4905 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4906 ) 4907 4908 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4909 index = self._index 4910 if not self._match(TokenType.L_PAREN): 4911 return this 4912 4913 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4914 # expr can be of both types 4915 if self._match_set(self.SELECT_START_TOKENS): 4916 self._retreat(index) 4917 return this 4918 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4919 self._match_r_paren() 4920 return self.expression(exp.Schema, this=this, expressions=args) 4921 4922 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4923 return self._parse_column_def(self._parse_field(any_token=True)) 4924 4925 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4926 # column defs are not really columns, they're identifiers 4927 if isinstance(this, exp.Column): 4928 this = this.this 4929 4930 kind = self._parse_types(schema=True) 4931 4932 if self._match_text_seq("FOR", "ORDINALITY"): 4933 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4934 4935 constraints: t.List[exp.Expression] = [] 4936 4937 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4938 ("ALIAS", "MATERIALIZED") 4939 ): 4940 persisted = self._prev.text.upper() == "MATERIALIZED" 4941 constraints.append( 4942 self.expression( 4943 exp.ComputedColumnConstraint, 4944 this=self._parse_assignment(), 4945 persisted=persisted or self._match_text_seq("PERSISTED"), 4946 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4947 ) 4948 ) 4949 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4950 self._match(TokenType.ALIAS) 4951 constraints.append( 4952 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4953 ) 4954 4955 while True: 4956 constraint = self._parse_column_constraint() 4957 if not constraint: 4958 break 4959 constraints.append(constraint) 4960 4961 if not kind and not constraints: 4962 return this 4963 4964 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4965 4966 def _parse_auto_increment( 4967 self, 4968 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4969 start = None 4970 increment = None 4971 4972 if self._match(TokenType.L_PAREN, advance=False): 4973 args = self._parse_wrapped_csv(self._parse_bitwise) 4974 start = seq_get(args, 0) 4975 increment = seq_get(args, 1) 4976 elif self._match_text_seq("START"): 4977 start = self._parse_bitwise() 4978 self._match_text_seq("INCREMENT") 4979 increment = self._parse_bitwise() 4980 4981 if start and increment: 4982 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4983 4984 return exp.AutoIncrementColumnConstraint() 4985 4986 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4987 if not self._match_text_seq("REFRESH"): 4988 self._retreat(self._index - 1) 4989 return None 4990 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4991 4992 def _parse_compress(self) -> exp.CompressColumnConstraint: 4993 if self._match(TokenType.L_PAREN, advance=False): 4994 return self.expression( 4995 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4996 ) 4997 4998 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4999 5000 def _parse_generated_as_identity( 5001 self, 5002 ) -> ( 5003 exp.GeneratedAsIdentityColumnConstraint 5004 | exp.ComputedColumnConstraint 5005 | exp.GeneratedAsRowColumnConstraint 5006 ): 5007 if self._match_text_seq("BY", "DEFAULT"): 5008 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5009 this = self.expression( 5010 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5011 ) 5012 else: 5013 self._match_text_seq("ALWAYS") 5014 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5015 5016 self._match(TokenType.ALIAS) 5017 5018 if self._match_text_seq("ROW"): 5019 start = self._match_text_seq("START") 5020 if not start: 5021 self._match(TokenType.END) 5022 hidden = self._match_text_seq("HIDDEN") 5023 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5024 5025 identity = self._match_text_seq("IDENTITY") 5026 5027 if self._match(TokenType.L_PAREN): 5028 if self._match(TokenType.START_WITH): 5029 this.set("start", self._parse_bitwise()) 5030 if self._match_text_seq("INCREMENT", "BY"): 5031 this.set("increment", self._parse_bitwise()) 5032 if self._match_text_seq("MINVALUE"): 5033 this.set("minvalue", self._parse_bitwise()) 5034 if self._match_text_seq("MAXVALUE"): 5035 this.set("maxvalue", self._parse_bitwise()) 5036 5037 if self._match_text_seq("CYCLE"): 5038 this.set("cycle", True) 5039 elif self._match_text_seq("NO", "CYCLE"): 5040 this.set("cycle", False) 5041 5042 if not identity: 5043 this.set("expression", self._parse_range()) 5044 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5045 args = self._parse_csv(self._parse_bitwise) 5046 this.set("start", seq_get(args, 0)) 5047 this.set("increment", seq_get(args, 1)) 5048 5049 self._match_r_paren() 5050 5051 return this 5052 5053 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5054 self._match_text_seq("LENGTH") 5055 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5056 5057 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5058 if self._match_text_seq("NULL"): 5059 return self.expression(exp.NotNullColumnConstraint) 5060 if self._match_text_seq("CASESPECIFIC"): 5061 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5062 if self._match_text_seq("FOR", "REPLICATION"): 5063 return self.expression(exp.NotForReplicationColumnConstraint) 5064 return None 5065 5066 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5067 if self._match(TokenType.CONSTRAINT): 5068 this = self._parse_id_var() 5069 else: 5070 this = None 5071 5072 if self._match_texts(self.CONSTRAINT_PARSERS): 5073 return self.expression( 5074 exp.ColumnConstraint, 5075 this=this, 5076 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5077 ) 5078 5079 return this 5080 5081 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5082 if not self._match(TokenType.CONSTRAINT): 5083 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5084 5085 return self.expression( 5086 exp.Constraint, 5087 this=self._parse_id_var(), 5088 expressions=self._parse_unnamed_constraints(), 5089 ) 5090 5091 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5092 constraints = [] 5093 while True: 5094 constraint = self._parse_unnamed_constraint() or self._parse_function() 5095 if not constraint: 5096 break 5097 constraints.append(constraint) 5098 5099 return constraints 5100 5101 def _parse_unnamed_constraint( 5102 self, constraints: t.Optional[t.Collection[str]] = None 5103 ) -> t.Optional[exp.Expression]: 5104 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5105 constraints or self.CONSTRAINT_PARSERS 5106 ): 5107 return None 5108 5109 constraint = self._prev.text.upper() 5110 if constraint not in self.CONSTRAINT_PARSERS: 5111 self.raise_error(f"No parser found for schema constraint {constraint}.") 5112 5113 return self.CONSTRAINT_PARSERS[constraint](self) 5114 5115 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5116 self._match_text_seq("KEY") 5117 return self.expression( 5118 exp.UniqueColumnConstraint, 5119 this=self._parse_schema(self._parse_id_var(any_token=False)), 5120 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5121 on_conflict=self._parse_on_conflict(), 5122 ) 5123 5124 def _parse_key_constraint_options(self) -> t.List[str]: 5125 options = [] 5126 while True: 5127 if not self._curr: 5128 break 5129 5130 if self._match(TokenType.ON): 5131 action = None 5132 on = self._advance_any() and self._prev.text 5133 5134 if self._match_text_seq("NO", "ACTION"): 5135 action = "NO ACTION" 5136 elif self._match_text_seq("CASCADE"): 5137 action = "CASCADE" 5138 elif self._match_text_seq("RESTRICT"): 5139 action = "RESTRICT" 5140 elif self._match_pair(TokenType.SET, TokenType.NULL): 5141 action = "SET NULL" 5142 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5143 action = "SET DEFAULT" 5144 else: 5145 self.raise_error("Invalid key constraint") 5146 5147 options.append(f"ON {on} {action}") 5148 elif self._match_text_seq("NOT", "ENFORCED"): 5149 options.append("NOT ENFORCED") 5150 elif self._match_text_seq("DEFERRABLE"): 5151 options.append("DEFERRABLE") 5152 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5153 options.append("INITIALLY DEFERRED") 5154 elif self._match_text_seq("NORELY"): 5155 options.append("NORELY") 5156 elif self._match_text_seq("MATCH", "FULL"): 5157 options.append("MATCH FULL") 5158 else: 5159 break 5160 5161 return options 5162 5163 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5164 if match and not self._match(TokenType.REFERENCES): 5165 return None 5166 5167 expressions = None 5168 this = self._parse_table(schema=True) 5169 options = self._parse_key_constraint_options() 5170 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5171 5172 def _parse_foreign_key(self) -> exp.ForeignKey: 5173 expressions = self._parse_wrapped_id_vars() 5174 reference = self._parse_references() 5175 options = {} 5176 5177 while self._match(TokenType.ON): 5178 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5179 self.raise_error("Expected DELETE or UPDATE") 5180 5181 kind = self._prev.text.lower() 5182 5183 if self._match_text_seq("NO", "ACTION"): 5184 action = "NO ACTION" 5185 elif self._match(TokenType.SET): 5186 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5187 action = "SET " + self._prev.text.upper() 5188 else: 5189 self._advance() 5190 action = self._prev.text.upper() 5191 5192 options[kind] = action 5193 5194 return self.expression( 5195 exp.ForeignKey, 5196 expressions=expressions, 5197 reference=reference, 5198 **options, # type: ignore 5199 ) 5200 5201 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5202 return self._parse_field() 5203 5204 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5205 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5206 self._retreat(self._index - 1) 5207 return None 5208 5209 id_vars = self._parse_wrapped_id_vars() 5210 return self.expression( 5211 exp.PeriodForSystemTimeConstraint, 5212 this=seq_get(id_vars, 0), 5213 expression=seq_get(id_vars, 1), 5214 ) 5215 5216 def _parse_primary_key( 5217 self, wrapped_optional: bool = False, in_props: bool = False 5218 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5219 desc = ( 5220 self._match_set((TokenType.ASC, TokenType.DESC)) 5221 and self._prev.token_type == TokenType.DESC 5222 ) 5223 5224 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5225 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5226 5227 expressions = self._parse_wrapped_csv( 5228 self._parse_primary_key_part, optional=wrapped_optional 5229 ) 5230 options = self._parse_key_constraint_options() 5231 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5232 5233 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5234 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5235 5236 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5237 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5238 return this 5239 5240 bracket_kind = self._prev.token_type 5241 expressions = self._parse_csv( 5242 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5243 ) 5244 5245 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5246 self.raise_error("Expected ]") 5247 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5248 self.raise_error("Expected }") 5249 5250 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5251 if bracket_kind == TokenType.L_BRACE: 5252 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5253 elif not this: 5254 this = self.expression(exp.Array, expressions=expressions) 5255 else: 5256 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5257 if constructor_type: 5258 return self.expression(constructor_type, expressions=expressions) 5259 5260 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5261 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5262 5263 self._add_comments(this) 5264 return self._parse_bracket(this) 5265 5266 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5267 if self._match(TokenType.COLON): 5268 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5269 return this 5270 5271 def _parse_case(self) -> t.Optional[exp.Expression]: 5272 ifs = [] 5273 default = None 5274 5275 comments = self._prev_comments 5276 expression = self._parse_assignment() 5277 5278 while self._match(TokenType.WHEN): 5279 this = self._parse_assignment() 5280 self._match(TokenType.THEN) 5281 then = self._parse_assignment() 5282 ifs.append(self.expression(exp.If, this=this, true=then)) 5283 5284 if self._match(TokenType.ELSE): 5285 default = self._parse_assignment() 5286 5287 if not self._match(TokenType.END): 5288 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5289 default = exp.column("interval") 5290 else: 5291 self.raise_error("Expected END after CASE", self._prev) 5292 5293 return self.expression( 5294 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5295 ) 5296 5297 def _parse_if(self) -> t.Optional[exp.Expression]: 5298 if self._match(TokenType.L_PAREN): 5299 args = self._parse_csv(self._parse_assignment) 5300 this = self.validate_expression(exp.If.from_arg_list(args), args) 5301 self._match_r_paren() 5302 else: 5303 index = self._index - 1 5304 5305 if self.NO_PAREN_IF_COMMANDS and index == 0: 5306 return self._parse_as_command(self._prev) 5307 5308 condition = self._parse_assignment() 5309 5310 if not condition: 5311 self._retreat(index) 5312 return None 5313 5314 self._match(TokenType.THEN) 5315 true = self._parse_assignment() 5316 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5317 self._match(TokenType.END) 5318 this = self.expression(exp.If, this=condition, true=true, false=false) 5319 5320 return this 5321 5322 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5323 if not self._match_text_seq("VALUE", "FOR"): 5324 self._retreat(self._index - 1) 5325 return None 5326 5327 return self.expression( 5328 exp.NextValueFor, 5329 this=self._parse_column(), 5330 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5331 ) 5332 5333 def _parse_extract(self) -> exp.Extract: 5334 this = self._parse_function() or self._parse_var() or self._parse_type() 5335 5336 if self._match(TokenType.FROM): 5337 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5338 5339 if not self._match(TokenType.COMMA): 5340 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5341 5342 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5343 5344 def _parse_gap_fill(self) -> exp.GapFill: 5345 self._match(TokenType.TABLE) 5346 this = self._parse_table() 5347 5348 self._match(TokenType.COMMA) 5349 args = [this, *self._parse_csv(self._parse_lambda)] 5350 5351 gap_fill = exp.GapFill.from_arg_list(args) 5352 return self.validate_expression(gap_fill, args) 5353 5354 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5355 this = self._parse_assignment() 5356 5357 if not self._match(TokenType.ALIAS): 5358 if self._match(TokenType.COMMA): 5359 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5360 5361 self.raise_error("Expected AS after CAST") 5362 5363 fmt = None 5364 to = self._parse_types() 5365 5366 if self._match(TokenType.FORMAT): 5367 fmt_string = self._parse_string() 5368 fmt = self._parse_at_time_zone(fmt_string) 5369 5370 if not to: 5371 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5372 if to.this in exp.DataType.TEMPORAL_TYPES: 5373 this = self.expression( 5374 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5375 this=this, 5376 format=exp.Literal.string( 5377 format_time( 5378 fmt_string.this if fmt_string else "", 5379 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5380 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5381 ) 5382 ), 5383 ) 5384 5385 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5386 this.set("zone", fmt.args["zone"]) 5387 return this 5388 elif not to: 5389 self.raise_error("Expected TYPE after CAST") 5390 elif isinstance(to, exp.Identifier): 5391 to = exp.DataType.build(to.name, udt=True) 5392 elif to.this == exp.DataType.Type.CHAR: 5393 if self._match(TokenType.CHARACTER_SET): 5394 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5395 5396 return self.expression( 5397 exp.Cast if strict else exp.TryCast, 5398 this=this, 5399 to=to, 5400 format=fmt, 5401 safe=safe, 5402 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5403 ) 5404 5405 def _parse_string_agg(self) -> exp.Expression: 5406 if self._match(TokenType.DISTINCT): 5407 args: t.List[t.Optional[exp.Expression]] = [ 5408 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5409 ] 5410 if self._match(TokenType.COMMA): 5411 args.extend(self._parse_csv(self._parse_assignment)) 5412 else: 5413 args = self._parse_csv(self._parse_assignment) # type: ignore 5414 5415 index = self._index 5416 if not self._match(TokenType.R_PAREN) and args: 5417 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5418 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5419 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5420 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5421 5422 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5423 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5424 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5425 if not self._match_text_seq("WITHIN", "GROUP"): 5426 self._retreat(index) 5427 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5428 5429 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5430 order = self._parse_order(this=seq_get(args, 0)) 5431 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5432 5433 def _parse_convert( 5434 self, strict: bool, safe: t.Optional[bool] = None 5435 ) -> t.Optional[exp.Expression]: 5436 this = self._parse_bitwise() 5437 5438 if self._match(TokenType.USING): 5439 to: t.Optional[exp.Expression] = self.expression( 5440 exp.CharacterSet, this=self._parse_var() 5441 ) 5442 elif self._match(TokenType.COMMA): 5443 to = self._parse_types() 5444 else: 5445 to = None 5446 5447 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5448 5449 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5450 """ 5451 There are generally two variants of the DECODE function: 5452 5453 - DECODE(bin, charset) 5454 - DECODE(expression, search, result [, search, result] ... [, default]) 5455 5456 The second variant will always be parsed into a CASE expression. Note that NULL 5457 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5458 instead of relying on pattern matching. 5459 """ 5460 args = self._parse_csv(self._parse_assignment) 5461 5462 if len(args) < 3: 5463 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5464 5465 expression, *expressions = args 5466 if not expression: 5467 return None 5468 5469 ifs = [] 5470 for search, result in zip(expressions[::2], expressions[1::2]): 5471 if not search or not result: 5472 return None 5473 5474 if isinstance(search, exp.Literal): 5475 ifs.append( 5476 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5477 ) 5478 elif isinstance(search, exp.Null): 5479 ifs.append( 5480 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5481 ) 5482 else: 5483 cond = exp.or_( 5484 exp.EQ(this=expression.copy(), expression=search), 5485 exp.and_( 5486 exp.Is(this=expression.copy(), expression=exp.Null()), 5487 exp.Is(this=search.copy(), expression=exp.Null()), 5488 copy=False, 5489 ), 5490 copy=False, 5491 ) 5492 ifs.append(exp.If(this=cond, true=result)) 5493 5494 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5495 5496 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5497 self._match_text_seq("KEY") 5498 key = self._parse_column() 5499 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5500 self._match_text_seq("VALUE") 5501 value = self._parse_bitwise() 5502 5503 if not key and not value: 5504 return None 5505 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5506 5507 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5508 if not this or not self._match_text_seq("FORMAT", "JSON"): 5509 return this 5510 5511 return self.expression(exp.FormatJson, this=this) 5512 5513 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5514 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5515 for value in values: 5516 if self._match_text_seq(value, "ON", on): 5517 return f"{value} ON {on}" 5518 5519 return None 5520 5521 @t.overload 5522 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5523 5524 @t.overload 5525 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5526 5527 def _parse_json_object(self, agg=False): 5528 star = self._parse_star() 5529 expressions = ( 5530 [star] 5531 if star 5532 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5533 ) 5534 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5535 5536 unique_keys = None 5537 if self._match_text_seq("WITH", "UNIQUE"): 5538 unique_keys = True 5539 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5540 unique_keys = False 5541 5542 self._match_text_seq("KEYS") 5543 5544 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5545 self._parse_type() 5546 ) 5547 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5548 5549 return self.expression( 5550 exp.JSONObjectAgg if agg else exp.JSONObject, 5551 expressions=expressions, 5552 null_handling=null_handling, 5553 unique_keys=unique_keys, 5554 return_type=return_type, 5555 encoding=encoding, 5556 ) 5557 5558 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5559 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5560 if not self._match_text_seq("NESTED"): 5561 this = self._parse_id_var() 5562 kind = self._parse_types(allow_identifiers=False) 5563 nested = None 5564 else: 5565 this = None 5566 kind = None 5567 nested = True 5568 5569 path = self._match_text_seq("PATH") and self._parse_string() 5570 nested_schema = nested and self._parse_json_schema() 5571 5572 return self.expression( 5573 exp.JSONColumnDef, 5574 this=this, 5575 kind=kind, 5576 path=path, 5577 nested_schema=nested_schema, 5578 ) 5579 5580 def _parse_json_schema(self) -> exp.JSONSchema: 5581 self._match_text_seq("COLUMNS") 5582 return self.expression( 5583 exp.JSONSchema, 5584 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5585 ) 5586 5587 def _parse_json_table(self) -> exp.JSONTable: 5588 this = self._parse_format_json(self._parse_bitwise()) 5589 path = self._match(TokenType.COMMA) and self._parse_string() 5590 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5591 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5592 schema = self._parse_json_schema() 5593 5594 return exp.JSONTable( 5595 this=this, 5596 schema=schema, 5597 path=path, 5598 error_handling=error_handling, 5599 empty_handling=empty_handling, 5600 ) 5601 5602 def _parse_match_against(self) -> exp.MatchAgainst: 5603 expressions = self._parse_csv(self._parse_column) 5604 5605 self._match_text_seq(")", "AGAINST", "(") 5606 5607 this = self._parse_string() 5608 5609 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5610 modifier = "IN NATURAL LANGUAGE MODE" 5611 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5612 modifier = f"{modifier} WITH QUERY EXPANSION" 5613 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5614 modifier = "IN BOOLEAN MODE" 5615 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5616 modifier = "WITH QUERY EXPANSION" 5617 else: 5618 modifier = None 5619 5620 return self.expression( 5621 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5622 ) 5623 5624 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5625 def _parse_open_json(self) -> exp.OpenJSON: 5626 this = self._parse_bitwise() 5627 path = self._match(TokenType.COMMA) and self._parse_string() 5628 5629 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5630 this = self._parse_field(any_token=True) 5631 kind = self._parse_types() 5632 path = self._parse_string() 5633 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5634 5635 return self.expression( 5636 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5637 ) 5638 5639 expressions = None 5640 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5641 self._match_l_paren() 5642 expressions = self._parse_csv(_parse_open_json_column_def) 5643 5644 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5645 5646 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5647 args = self._parse_csv(self._parse_bitwise) 5648 5649 if self._match(TokenType.IN): 5650 return self.expression( 5651 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5652 ) 5653 5654 if haystack_first: 5655 haystack = seq_get(args, 0) 5656 needle = seq_get(args, 1) 5657 else: 5658 needle = seq_get(args, 0) 5659 haystack = seq_get(args, 1) 5660 5661 return self.expression( 5662 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5663 ) 5664 5665 def _parse_predict(self) -> exp.Predict: 5666 self._match_text_seq("MODEL") 5667 this = self._parse_table() 5668 5669 self._match(TokenType.COMMA) 5670 self._match_text_seq("TABLE") 5671 5672 return self.expression( 5673 exp.Predict, 5674 this=this, 5675 expression=self._parse_table(), 5676 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5677 ) 5678 5679 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5680 args = self._parse_csv(self._parse_table) 5681 return exp.JoinHint(this=func_name.upper(), expressions=args) 5682 5683 def _parse_substring(self) -> exp.Substring: 5684 # Postgres supports the form: substring(string [from int] [for int]) 5685 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5686 5687 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5688 5689 if self._match(TokenType.FROM): 5690 args.append(self._parse_bitwise()) 5691 if self._match(TokenType.FOR): 5692 if len(args) == 1: 5693 args.append(exp.Literal.number(1)) 5694 args.append(self._parse_bitwise()) 5695 5696 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5697 5698 def _parse_trim(self) -> exp.Trim: 5699 # https://www.w3resource.com/sql/character-functions/trim.php 5700 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5701 5702 position = None 5703 collation = None 5704 expression = None 5705 5706 if self._match_texts(self.TRIM_TYPES): 5707 position = self._prev.text.upper() 5708 5709 this = self._parse_bitwise() 5710 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5711 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5712 expression = self._parse_bitwise() 5713 5714 if invert_order: 5715 this, expression = expression, this 5716 5717 if self._match(TokenType.COLLATE): 5718 collation = self._parse_bitwise() 5719 5720 return self.expression( 5721 exp.Trim, this=this, position=position, expression=expression, collation=collation 5722 ) 5723 5724 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5725 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5726 5727 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5728 return self._parse_window(self._parse_id_var(), alias=True) 5729 5730 def _parse_respect_or_ignore_nulls( 5731 self, this: t.Optional[exp.Expression] 5732 ) -> t.Optional[exp.Expression]: 5733 if self._match_text_seq("IGNORE", "NULLS"): 5734 return self.expression(exp.IgnoreNulls, this=this) 5735 if self._match_text_seq("RESPECT", "NULLS"): 5736 return self.expression(exp.RespectNulls, this=this) 5737 return this 5738 5739 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5740 if self._match(TokenType.HAVING): 5741 self._match_texts(("MAX", "MIN")) 5742 max = self._prev.text.upper() != "MIN" 5743 return self.expression( 5744 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5745 ) 5746 5747 return this 5748 5749 def _parse_window( 5750 self, this: t.Optional[exp.Expression], alias: bool = False 5751 ) -> t.Optional[exp.Expression]: 5752 func = this 5753 comments = func.comments if isinstance(func, exp.Expression) else None 5754 5755 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5756 self._match(TokenType.WHERE) 5757 this = self.expression( 5758 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5759 ) 5760 self._match_r_paren() 5761 5762 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5763 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5764 if self._match_text_seq("WITHIN", "GROUP"): 5765 order = self._parse_wrapped(self._parse_order) 5766 this = self.expression(exp.WithinGroup, this=this, expression=order) 5767 5768 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5769 # Some dialects choose to implement and some do not. 5770 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5771 5772 # There is some code above in _parse_lambda that handles 5773 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5774 5775 # The below changes handle 5776 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5777 5778 # Oracle allows both formats 5779 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5780 # and Snowflake chose to do the same for familiarity 5781 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5782 if isinstance(this, exp.AggFunc): 5783 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5784 5785 if ignore_respect and ignore_respect is not this: 5786 ignore_respect.replace(ignore_respect.this) 5787 this = self.expression(ignore_respect.__class__, this=this) 5788 5789 this = self._parse_respect_or_ignore_nulls(this) 5790 5791 # bigquery select from window x AS (partition by ...) 5792 if alias: 5793 over = None 5794 self._match(TokenType.ALIAS) 5795 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5796 return this 5797 else: 5798 over = self._prev.text.upper() 5799 5800 if comments and isinstance(func, exp.Expression): 5801 func.pop_comments() 5802 5803 if not self._match(TokenType.L_PAREN): 5804 return self.expression( 5805 exp.Window, 5806 comments=comments, 5807 this=this, 5808 alias=self._parse_id_var(False), 5809 over=over, 5810 ) 5811 5812 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5813 5814 first = self._match(TokenType.FIRST) 5815 if self._match_text_seq("LAST"): 5816 first = False 5817 5818 partition, order = self._parse_partition_and_order() 5819 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5820 5821 if kind: 5822 self._match(TokenType.BETWEEN) 5823 start = self._parse_window_spec() 5824 self._match(TokenType.AND) 5825 end = self._parse_window_spec() 5826 5827 spec = self.expression( 5828 exp.WindowSpec, 5829 kind=kind, 5830 start=start["value"], 5831 start_side=start["side"], 5832 end=end["value"], 5833 end_side=end["side"], 5834 ) 5835 else: 5836 spec = None 5837 5838 self._match_r_paren() 5839 5840 window = self.expression( 5841 exp.Window, 5842 comments=comments, 5843 this=this, 5844 partition_by=partition, 5845 order=order, 5846 spec=spec, 5847 alias=window_alias, 5848 over=over, 5849 first=first, 5850 ) 5851 5852 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5853 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5854 return self._parse_window(window, alias=alias) 5855 5856 return window 5857 5858 def _parse_partition_and_order( 5859 self, 5860 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5861 return self._parse_partition_by(), self._parse_order() 5862 5863 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5864 self._match(TokenType.BETWEEN) 5865 5866 return { 5867 "value": ( 5868 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5869 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5870 or self._parse_bitwise() 5871 ), 5872 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5873 } 5874 5875 def _parse_alias( 5876 self, this: t.Optional[exp.Expression], explicit: bool = False 5877 ) -> t.Optional[exp.Expression]: 5878 any_token = self._match(TokenType.ALIAS) 5879 comments = self._prev_comments or [] 5880 5881 if explicit and not any_token: 5882 return this 5883 5884 if self._match(TokenType.L_PAREN): 5885 aliases = self.expression( 5886 exp.Aliases, 5887 comments=comments, 5888 this=this, 5889 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5890 ) 5891 self._match_r_paren(aliases) 5892 return aliases 5893 5894 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5895 self.STRING_ALIASES and self._parse_string_as_identifier() 5896 ) 5897 5898 if alias: 5899 comments.extend(alias.pop_comments()) 5900 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5901 column = this.this 5902 5903 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5904 if not this.comments and column and column.comments: 5905 this.comments = column.pop_comments() 5906 5907 return this 5908 5909 def _parse_id_var( 5910 self, 5911 any_token: bool = True, 5912 tokens: t.Optional[t.Collection[TokenType]] = None, 5913 ) -> t.Optional[exp.Expression]: 5914 expression = self._parse_identifier() 5915 if not expression and ( 5916 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5917 ): 5918 quoted = self._prev.token_type == TokenType.STRING 5919 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5920 5921 return expression 5922 5923 def _parse_string(self) -> t.Optional[exp.Expression]: 5924 if self._match_set(self.STRING_PARSERS): 5925 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5926 return self._parse_placeholder() 5927 5928 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5929 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5930 5931 def _parse_number(self) -> t.Optional[exp.Expression]: 5932 if self._match_set(self.NUMERIC_PARSERS): 5933 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5934 return self._parse_placeholder() 5935 5936 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5937 if self._match(TokenType.IDENTIFIER): 5938 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5939 return self._parse_placeholder() 5940 5941 def _parse_var( 5942 self, 5943 any_token: bool = False, 5944 tokens: t.Optional[t.Collection[TokenType]] = None, 5945 upper: bool = False, 5946 ) -> t.Optional[exp.Expression]: 5947 if ( 5948 (any_token and self._advance_any()) 5949 or self._match(TokenType.VAR) 5950 or (self._match_set(tokens) if tokens else False) 5951 ): 5952 return self.expression( 5953 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5954 ) 5955 return self._parse_placeholder() 5956 5957 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5958 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5959 self._advance() 5960 return self._prev 5961 return None 5962 5963 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5964 return self._parse_var() or self._parse_string() 5965 5966 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5967 return self._parse_primary() or self._parse_var(any_token=True) 5968 5969 def _parse_null(self) -> t.Optional[exp.Expression]: 5970 if self._match_set(self.NULL_TOKENS): 5971 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5972 return self._parse_placeholder() 5973 5974 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5975 if self._match(TokenType.TRUE): 5976 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5977 if self._match(TokenType.FALSE): 5978 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5979 return self._parse_placeholder() 5980 5981 def _parse_star(self) -> t.Optional[exp.Expression]: 5982 if self._match(TokenType.STAR): 5983 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5984 return self._parse_placeholder() 5985 5986 def _parse_parameter(self) -> exp.Parameter: 5987 this = self._parse_identifier() or self._parse_primary_or_var() 5988 return self.expression(exp.Parameter, this=this) 5989 5990 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5991 if self._match_set(self.PLACEHOLDER_PARSERS): 5992 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5993 if placeholder: 5994 return placeholder 5995 self._advance(-1) 5996 return None 5997 5998 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5999 if not self._match_texts(keywords): 6000 return None 6001 if self._match(TokenType.L_PAREN, advance=False): 6002 return self._parse_wrapped_csv(self._parse_expression) 6003 6004 expression = self._parse_expression() 6005 return [expression] if expression else None 6006 6007 def _parse_csv( 6008 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6009 ) -> t.List[exp.Expression]: 6010 parse_result = parse_method() 6011 items = [parse_result] if parse_result is not None else [] 6012 6013 while self._match(sep): 6014 self._add_comments(parse_result) 6015 parse_result = parse_method() 6016 if parse_result is not None: 6017 items.append(parse_result) 6018 6019 return items 6020 6021 def _parse_tokens( 6022 self, parse_method: t.Callable, expressions: t.Dict 6023 ) -> t.Optional[exp.Expression]: 6024 this = parse_method() 6025 6026 while self._match_set(expressions): 6027 this = self.expression( 6028 expressions[self._prev.token_type], 6029 this=this, 6030 comments=self._prev_comments, 6031 expression=parse_method(), 6032 ) 6033 6034 return this 6035 6036 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6037 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6038 6039 def _parse_wrapped_csv( 6040 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6041 ) -> t.List[exp.Expression]: 6042 return self._parse_wrapped( 6043 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6044 ) 6045 6046 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6047 wrapped = self._match(TokenType.L_PAREN) 6048 if not wrapped and not optional: 6049 self.raise_error("Expecting (") 6050 parse_result = parse_method() 6051 if wrapped: 6052 self._match_r_paren() 6053 return parse_result 6054 6055 def _parse_expressions(self) -> t.List[exp.Expression]: 6056 return self._parse_csv(self._parse_expression) 6057 6058 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6059 return self._parse_select() or self._parse_set_operations( 6060 self._parse_expression() if alias else self._parse_assignment() 6061 ) 6062 6063 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6064 return self._parse_query_modifiers( 6065 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6066 ) 6067 6068 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6069 this = None 6070 if self._match_texts(self.TRANSACTION_KIND): 6071 this = self._prev.text 6072 6073 self._match_texts(("TRANSACTION", "WORK")) 6074 6075 modes = [] 6076 while True: 6077 mode = [] 6078 while self._match(TokenType.VAR): 6079 mode.append(self._prev.text) 6080 6081 if mode: 6082 modes.append(" ".join(mode)) 6083 if not self._match(TokenType.COMMA): 6084 break 6085 6086 return self.expression(exp.Transaction, this=this, modes=modes) 6087 6088 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6089 chain = None 6090 savepoint = None 6091 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6092 6093 self._match_texts(("TRANSACTION", "WORK")) 6094 6095 if self._match_text_seq("TO"): 6096 self._match_text_seq("SAVEPOINT") 6097 savepoint = self._parse_id_var() 6098 6099 if self._match(TokenType.AND): 6100 chain = not self._match_text_seq("NO") 6101 self._match_text_seq("CHAIN") 6102 6103 if is_rollback: 6104 return self.expression(exp.Rollback, savepoint=savepoint) 6105 6106 return self.expression(exp.Commit, chain=chain) 6107 6108 def _parse_refresh(self) -> exp.Refresh: 6109 self._match(TokenType.TABLE) 6110 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6111 6112 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6113 if not self._match_text_seq("ADD"): 6114 return None 6115 6116 self._match(TokenType.COLUMN) 6117 exists_column = self._parse_exists(not_=True) 6118 expression = self._parse_field_def() 6119 6120 if expression: 6121 expression.set("exists", exists_column) 6122 6123 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6124 if self._match_texts(("FIRST", "AFTER")): 6125 position = self._prev.text 6126 column_position = self.expression( 6127 exp.ColumnPosition, this=self._parse_column(), position=position 6128 ) 6129 expression.set("position", column_position) 6130 6131 return expression 6132 6133 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6134 drop = self._match(TokenType.DROP) and self._parse_drop() 6135 if drop and not isinstance(drop, exp.Command): 6136 drop.set("kind", drop.args.get("kind", "COLUMN")) 6137 return drop 6138 6139 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6140 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6141 return self.expression( 6142 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6143 ) 6144 6145 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6146 index = self._index - 1 6147 6148 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6149 return self._parse_csv( 6150 lambda: self.expression( 6151 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6152 ) 6153 ) 6154 6155 self._retreat(index) 6156 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6157 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6158 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6159 6160 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6161 if self._match_texts(self.ALTER_ALTER_PARSERS): 6162 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6163 6164 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6165 # keyword after ALTER we default to parsing this statement 6166 self._match(TokenType.COLUMN) 6167 column = self._parse_field(any_token=True) 6168 6169 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6170 return self.expression(exp.AlterColumn, this=column, drop=True) 6171 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6172 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6173 if self._match(TokenType.COMMENT): 6174 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6175 if self._match_text_seq("DROP", "NOT", "NULL"): 6176 return self.expression( 6177 exp.AlterColumn, 6178 this=column, 6179 drop=True, 6180 allow_null=True, 6181 ) 6182 if self._match_text_seq("SET", "NOT", "NULL"): 6183 return self.expression( 6184 exp.AlterColumn, 6185 this=column, 6186 allow_null=False, 6187 ) 6188 self._match_text_seq("SET", "DATA") 6189 self._match_text_seq("TYPE") 6190 return self.expression( 6191 exp.AlterColumn, 6192 this=column, 6193 dtype=self._parse_types(), 6194 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6195 using=self._match(TokenType.USING) and self._parse_assignment(), 6196 ) 6197 6198 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6199 if self._match_texts(("ALL", "EVEN", "AUTO")): 6200 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6201 6202 self._match_text_seq("KEY", "DISTKEY") 6203 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6204 6205 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6206 if compound: 6207 self._match_text_seq("SORTKEY") 6208 6209 if self._match(TokenType.L_PAREN, advance=False): 6210 return self.expression( 6211 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6212 ) 6213 6214 self._match_texts(("AUTO", "NONE")) 6215 return self.expression( 6216 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6217 ) 6218 6219 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6220 index = self._index - 1 6221 6222 partition_exists = self._parse_exists() 6223 if self._match(TokenType.PARTITION, advance=False): 6224 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6225 6226 self._retreat(index) 6227 return self._parse_csv(self._parse_drop_column) 6228 6229 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6230 if self._match(TokenType.COLUMN): 6231 exists = self._parse_exists() 6232 old_column = self._parse_column() 6233 to = self._match_text_seq("TO") 6234 new_column = self._parse_column() 6235 6236 if old_column is None or to is None or new_column is None: 6237 return None 6238 6239 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6240 6241 self._match_text_seq("TO") 6242 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6243 6244 def _parse_alter_table_set(self) -> exp.AlterSet: 6245 alter_set = self.expression(exp.AlterSet) 6246 6247 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6248 "TABLE", "PROPERTIES" 6249 ): 6250 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6251 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6252 alter_set.set("expressions", [self._parse_assignment()]) 6253 elif self._match_texts(("LOGGED", "UNLOGGED")): 6254 alter_set.set("option", exp.var(self._prev.text.upper())) 6255 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6256 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6257 elif self._match_text_seq("LOCATION"): 6258 alter_set.set("location", self._parse_field()) 6259 elif self._match_text_seq("ACCESS", "METHOD"): 6260 alter_set.set("access_method", self._parse_field()) 6261 elif self._match_text_seq("TABLESPACE"): 6262 alter_set.set("tablespace", self._parse_field()) 6263 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6264 alter_set.set("file_format", [self._parse_field()]) 6265 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6266 alter_set.set("file_format", self._parse_wrapped_options()) 6267 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6268 alter_set.set("copy_options", self._parse_wrapped_options()) 6269 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6270 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6271 else: 6272 if self._match_text_seq("SERDE"): 6273 alter_set.set("serde", self._parse_field()) 6274 6275 alter_set.set("expressions", [self._parse_properties()]) 6276 6277 return alter_set 6278 6279 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6280 start = self._prev 6281 6282 if not self._match(TokenType.TABLE): 6283 return self._parse_as_command(start) 6284 6285 exists = self._parse_exists() 6286 only = self._match_text_seq("ONLY") 6287 this = self._parse_table(schema=True) 6288 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6289 6290 if self._next: 6291 self._advance() 6292 6293 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6294 if parser: 6295 actions = ensure_list(parser(self)) 6296 options = self._parse_csv(self._parse_property) 6297 6298 if not self._curr and actions: 6299 return self.expression( 6300 exp.AlterTable, 6301 this=this, 6302 exists=exists, 6303 actions=actions, 6304 only=only, 6305 options=options, 6306 cluster=cluster, 6307 ) 6308 6309 return self._parse_as_command(start) 6310 6311 def _parse_merge(self) -> exp.Merge: 6312 self._match(TokenType.INTO) 6313 target = self._parse_table() 6314 6315 if target and self._match(TokenType.ALIAS, advance=False): 6316 target.set("alias", self._parse_table_alias()) 6317 6318 self._match(TokenType.USING) 6319 using = self._parse_table() 6320 6321 self._match(TokenType.ON) 6322 on = self._parse_assignment() 6323 6324 return self.expression( 6325 exp.Merge, 6326 this=target, 6327 using=using, 6328 on=on, 6329 expressions=self._parse_when_matched(), 6330 ) 6331 6332 def _parse_when_matched(self) -> t.List[exp.When]: 6333 whens = [] 6334 6335 while self._match(TokenType.WHEN): 6336 matched = not self._match(TokenType.NOT) 6337 self._match_text_seq("MATCHED") 6338 source = ( 6339 False 6340 if self._match_text_seq("BY", "TARGET") 6341 else self._match_text_seq("BY", "SOURCE") 6342 ) 6343 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6344 6345 self._match(TokenType.THEN) 6346 6347 if self._match(TokenType.INSERT): 6348 _this = self._parse_star() 6349 if _this: 6350 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6351 else: 6352 then = self.expression( 6353 exp.Insert, 6354 this=self._parse_value(), 6355 expression=self._match_text_seq("VALUES") and self._parse_value(), 6356 ) 6357 elif self._match(TokenType.UPDATE): 6358 expressions = self._parse_star() 6359 if expressions: 6360 then = self.expression(exp.Update, expressions=expressions) 6361 else: 6362 then = self.expression( 6363 exp.Update, 6364 expressions=self._match(TokenType.SET) 6365 and self._parse_csv(self._parse_equality), 6366 ) 6367 elif self._match(TokenType.DELETE): 6368 then = self.expression(exp.Var, this=self._prev.text) 6369 else: 6370 then = None 6371 6372 whens.append( 6373 self.expression( 6374 exp.When, 6375 matched=matched, 6376 source=source, 6377 condition=condition, 6378 then=then, 6379 ) 6380 ) 6381 return whens 6382 6383 def _parse_show(self) -> t.Optional[exp.Expression]: 6384 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6385 if parser: 6386 return parser(self) 6387 return self._parse_as_command(self._prev) 6388 6389 def _parse_set_item_assignment( 6390 self, kind: t.Optional[str] = None 6391 ) -> t.Optional[exp.Expression]: 6392 index = self._index 6393 6394 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6395 return self._parse_set_transaction(global_=kind == "GLOBAL") 6396 6397 left = self._parse_primary() or self._parse_column() 6398 assignment_delimiter = self._match_texts(("=", "TO")) 6399 6400 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6401 self._retreat(index) 6402 return None 6403 6404 right = self._parse_statement() or self._parse_id_var() 6405 if isinstance(right, (exp.Column, exp.Identifier)): 6406 right = exp.var(right.name) 6407 6408 this = self.expression(exp.EQ, this=left, expression=right) 6409 return self.expression(exp.SetItem, this=this, kind=kind) 6410 6411 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6412 self._match_text_seq("TRANSACTION") 6413 characteristics = self._parse_csv( 6414 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6415 ) 6416 return self.expression( 6417 exp.SetItem, 6418 expressions=characteristics, 6419 kind="TRANSACTION", 6420 **{"global": global_}, # type: ignore 6421 ) 6422 6423 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6424 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6425 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6426 6427 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6428 index = self._index 6429 set_ = self.expression( 6430 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6431 ) 6432 6433 if self._curr: 6434 self._retreat(index) 6435 return self._parse_as_command(self._prev) 6436 6437 return set_ 6438 6439 def _parse_var_from_options( 6440 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6441 ) -> t.Optional[exp.Var]: 6442 start = self._curr 6443 if not start: 6444 return None 6445 6446 option = start.text.upper() 6447 continuations = options.get(option) 6448 6449 index = self._index 6450 self._advance() 6451 for keywords in continuations or []: 6452 if isinstance(keywords, str): 6453 keywords = (keywords,) 6454 6455 if self._match_text_seq(*keywords): 6456 option = f"{option} {' '.join(keywords)}" 6457 break 6458 else: 6459 if continuations or continuations is None: 6460 if raise_unmatched: 6461 self.raise_error(f"Unknown option {option}") 6462 6463 self._retreat(index) 6464 return None 6465 6466 return exp.var(option) 6467 6468 def _parse_as_command(self, start: Token) -> exp.Command: 6469 while self._curr: 6470 self._advance() 6471 text = self._find_sql(start, self._prev) 6472 size = len(start.text) 6473 self._warn_unsupported() 6474 return exp.Command(this=text[:size], expression=text[size:]) 6475 6476 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6477 settings = [] 6478 6479 self._match_l_paren() 6480 kind = self._parse_id_var() 6481 6482 if self._match(TokenType.L_PAREN): 6483 while True: 6484 key = self._parse_id_var() 6485 value = self._parse_primary() 6486 6487 if not key and value is None: 6488 break 6489 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6490 self._match(TokenType.R_PAREN) 6491 6492 self._match_r_paren() 6493 6494 return self.expression( 6495 exp.DictProperty, 6496 this=this, 6497 kind=kind.this if kind else None, 6498 settings=settings, 6499 ) 6500 6501 def _parse_dict_range(self, this: str) -> exp.DictRange: 6502 self._match_l_paren() 6503 has_min = self._match_text_seq("MIN") 6504 if has_min: 6505 min = self._parse_var() or self._parse_primary() 6506 self._match_text_seq("MAX") 6507 max = self._parse_var() or self._parse_primary() 6508 else: 6509 max = self._parse_var() or self._parse_primary() 6510 min = exp.Literal.number(0) 6511 self._match_r_paren() 6512 return self.expression(exp.DictRange, this=this, min=min, max=max) 6513 6514 def _parse_comprehension( 6515 self, this: t.Optional[exp.Expression] 6516 ) -> t.Optional[exp.Comprehension]: 6517 index = self._index 6518 expression = self._parse_column() 6519 if not self._match(TokenType.IN): 6520 self._retreat(index - 1) 6521 return None 6522 iterator = self._parse_column() 6523 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6524 return self.expression( 6525 exp.Comprehension, 6526 this=this, 6527 expression=expression, 6528 iterator=iterator, 6529 condition=condition, 6530 ) 6531 6532 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6533 if self._match(TokenType.HEREDOC_STRING): 6534 return self.expression(exp.Heredoc, this=self._prev.text) 6535 6536 if not self._match_text_seq("$"): 6537 return None 6538 6539 tags = ["$"] 6540 tag_text = None 6541 6542 if self._is_connected(): 6543 self._advance() 6544 tags.append(self._prev.text.upper()) 6545 else: 6546 self.raise_error("No closing $ found") 6547 6548 if tags[-1] != "$": 6549 if self._is_connected() and self._match_text_seq("$"): 6550 tag_text = tags[-1] 6551 tags.append("$") 6552 else: 6553 self.raise_error("No closing $ found") 6554 6555 heredoc_start = self._curr 6556 6557 while self._curr: 6558 if self._match_text_seq(*tags, advance=False): 6559 this = self._find_sql(heredoc_start, self._prev) 6560 self._advance(len(tags)) 6561 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6562 6563 self._advance() 6564 6565 self.raise_error(f"No closing {''.join(tags)} found") 6566 return None 6567 6568 def _find_parser( 6569 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6570 ) -> t.Optional[t.Callable]: 6571 if not self._curr: 6572 return None 6573 6574 index = self._index 6575 this = [] 6576 while True: 6577 # The current token might be multiple words 6578 curr = self._curr.text.upper() 6579 key = curr.split(" ") 6580 this.append(curr) 6581 6582 self._advance() 6583 result, trie = in_trie(trie, key) 6584 if result == TrieResult.FAILED: 6585 break 6586 6587 if result == TrieResult.EXISTS: 6588 subparser = parsers[" ".join(this)] 6589 return subparser 6590 6591 self._retreat(index) 6592 return None 6593 6594 def _match(self, token_type, advance=True, expression=None): 6595 if not self._curr: 6596 return None 6597 6598 if self._curr.token_type == token_type: 6599 if advance: 6600 self._advance() 6601 self._add_comments(expression) 6602 return True 6603 6604 return None 6605 6606 def _match_set(self, types, advance=True): 6607 if not self._curr: 6608 return None 6609 6610 if self._curr.token_type in types: 6611 if advance: 6612 self._advance() 6613 return True 6614 6615 return None 6616 6617 def _match_pair(self, token_type_a, token_type_b, advance=True): 6618 if not self._curr or not self._next: 6619 return None 6620 6621 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6622 if advance: 6623 self._advance(2) 6624 return True 6625 6626 return None 6627 6628 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6629 if not self._match(TokenType.L_PAREN, expression=expression): 6630 self.raise_error("Expecting (") 6631 6632 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6633 if not self._match(TokenType.R_PAREN, expression=expression): 6634 self.raise_error("Expecting )") 6635 6636 def _match_texts(self, texts, advance=True): 6637 if self._curr and self._curr.text.upper() in texts: 6638 if advance: 6639 self._advance() 6640 return True 6641 return None 6642 6643 def _match_text_seq(self, *texts, advance=True): 6644 index = self._index 6645 for text in texts: 6646 if self._curr and self._curr.text.upper() == text: 6647 self._advance() 6648 else: 6649 self._retreat(index) 6650 return None 6651 6652 if not advance: 6653 self._retreat(index) 6654 6655 return True 6656 6657 def _replace_lambda( 6658 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6659 ) -> t.Optional[exp.Expression]: 6660 if not node: 6661 return node 6662 6663 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6664 6665 for column in node.find_all(exp.Column): 6666 typ = lambda_types.get(column.parts[0].name) 6667 if typ is not None: 6668 dot_or_id = column.to_dot() if column.table else column.this 6669 6670 if typ: 6671 dot_or_id = self.expression( 6672 exp.Cast, 6673 this=dot_or_id, 6674 to=typ, 6675 ) 6676 6677 parent = column.parent 6678 6679 while isinstance(parent, exp.Dot): 6680 if not isinstance(parent.parent, exp.Dot): 6681 parent.replace(dot_or_id) 6682 break 6683 parent = parent.parent 6684 else: 6685 if column is node: 6686 node = dot_or_id 6687 else: 6688 column.replace(dot_or_id) 6689 return node 6690 6691 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6692 start = self._prev 6693 6694 # Not to be confused with TRUNCATE(number, decimals) function call 6695 if self._match(TokenType.L_PAREN): 6696 self._retreat(self._index - 2) 6697 return self._parse_function() 6698 6699 # Clickhouse supports TRUNCATE DATABASE as well 6700 is_database = self._match(TokenType.DATABASE) 6701 6702 self._match(TokenType.TABLE) 6703 6704 exists = self._parse_exists(not_=False) 6705 6706 expressions = self._parse_csv( 6707 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6708 ) 6709 6710 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6711 6712 if self._match_text_seq("RESTART", "IDENTITY"): 6713 identity = "RESTART" 6714 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6715 identity = "CONTINUE" 6716 else: 6717 identity = None 6718 6719 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6720 option = self._prev.text 6721 else: 6722 option = None 6723 6724 partition = self._parse_partition() 6725 6726 # Fallback case 6727 if self._curr: 6728 return self._parse_as_command(start) 6729 6730 return self.expression( 6731 exp.TruncateTable, 6732 expressions=expressions, 6733 is_database=is_database, 6734 exists=exists, 6735 cluster=cluster, 6736 identity=identity, 6737 option=option, 6738 partition=partition, 6739 ) 6740 6741 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6742 this = self._parse_ordered(self._parse_opclass) 6743 6744 if not self._match(TokenType.WITH): 6745 return this 6746 6747 op = self._parse_var(any_token=True) 6748 6749 return self.expression(exp.WithOperator, this=this, op=op) 6750 6751 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6752 self._match(TokenType.EQ) 6753 self._match(TokenType.L_PAREN) 6754 6755 opts: t.List[t.Optional[exp.Expression]] = [] 6756 while self._curr and not self._match(TokenType.R_PAREN): 6757 if self._match_text_seq("FORMAT_NAME", "="): 6758 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6759 # so we parse it separately to use _parse_field() 6760 prop = self.expression( 6761 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6762 ) 6763 opts.append(prop) 6764 else: 6765 opts.append(self._parse_property()) 6766 6767 self._match(TokenType.COMMA) 6768 6769 return opts 6770 6771 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6772 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6773 6774 options = [] 6775 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6776 option = self._parse_var(any_token=True) 6777 prev = self._prev.text.upper() 6778 6779 # Different dialects might separate options and values by white space, "=" and "AS" 6780 self._match(TokenType.EQ) 6781 self._match(TokenType.ALIAS) 6782 6783 param = self.expression(exp.CopyParameter, this=option) 6784 6785 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6786 TokenType.L_PAREN, advance=False 6787 ): 6788 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6789 param.set("expressions", self._parse_wrapped_options()) 6790 elif prev == "FILE_FORMAT": 6791 # T-SQL's external file format case 6792 param.set("expression", self._parse_field()) 6793 else: 6794 param.set("expression", self._parse_unquoted_field()) 6795 6796 options.append(param) 6797 self._match(sep) 6798 6799 return options 6800 6801 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6802 expr = self.expression(exp.Credentials) 6803 6804 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6805 expr.set("storage", self._parse_field()) 6806 if self._match_text_seq("CREDENTIALS"): 6807 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6808 creds = ( 6809 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6810 ) 6811 expr.set("credentials", creds) 6812 if self._match_text_seq("ENCRYPTION"): 6813 expr.set("encryption", self._parse_wrapped_options()) 6814 if self._match_text_seq("IAM_ROLE"): 6815 expr.set("iam_role", self._parse_field()) 6816 if self._match_text_seq("REGION"): 6817 expr.set("region", self._parse_field()) 6818 6819 return expr 6820 6821 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6822 return self._parse_field() 6823 6824 def _parse_copy(self) -> exp.Copy | exp.Command: 6825 start = self._prev 6826 6827 self._match(TokenType.INTO) 6828 6829 this = ( 6830 self._parse_select(nested=True, parse_subquery_alias=False) 6831 if self._match(TokenType.L_PAREN, advance=False) 6832 else self._parse_table(schema=True) 6833 ) 6834 6835 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6836 6837 files = self._parse_csv(self._parse_file_location) 6838 credentials = self._parse_credentials() 6839 6840 self._match_text_seq("WITH") 6841 6842 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6843 6844 # Fallback case 6845 if self._curr: 6846 return self._parse_as_command(start) 6847 6848 return self.expression( 6849 exp.Copy, 6850 this=this, 6851 kind=kind, 6852 credentials=credentials, 6853 files=files, 6854 params=params, 6855 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1234 def __init__( 1235 self, 1236 error_level: t.Optional[ErrorLevel] = None, 1237 error_message_context: int = 100, 1238 max_errors: int = 3, 1239 dialect: DialectType = None, 1240 ): 1241 from sqlglot.dialects import Dialect 1242 1243 self.error_level = error_level or ErrorLevel.IMMEDIATE 1244 self.error_message_context = error_message_context 1245 self.max_errors = max_errors 1246 self.dialect = Dialect.get_or_raise(dialect) 1247 self.reset()
1259 def parse( 1260 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1261 ) -> t.List[t.Optional[exp.Expression]]: 1262 """ 1263 Parses a list of tokens and returns a list of syntax trees, one tree 1264 per parsed SQL statement. 1265 1266 Args: 1267 raw_tokens: The list of tokens. 1268 sql: The original SQL string, used to produce helpful debug messages. 1269 1270 Returns: 1271 The list of the produced syntax trees. 1272 """ 1273 return self._parse( 1274 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1275 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1277 def parse_into( 1278 self, 1279 expression_types: exp.IntoType, 1280 raw_tokens: t.List[Token], 1281 sql: t.Optional[str] = None, 1282 ) -> t.List[t.Optional[exp.Expression]]: 1283 """ 1284 Parses a list of tokens into a given Expression type. If a collection of Expression 1285 types is given instead, this method will try to parse the token list into each one 1286 of them, stopping at the first for which the parsing succeeds. 1287 1288 Args: 1289 expression_types: The expression type(s) to try and parse the token list into. 1290 raw_tokens: The list of tokens. 1291 sql: The original SQL string, used to produce helpful debug messages. 1292 1293 Returns: 1294 The target Expression. 1295 """ 1296 errors = [] 1297 for expression_type in ensure_list(expression_types): 1298 parser = self.EXPRESSION_PARSERS.get(expression_type) 1299 if not parser: 1300 raise TypeError(f"No parser registered for {expression_type}") 1301 1302 try: 1303 return self._parse(parser, raw_tokens, sql) 1304 except ParseError as e: 1305 e.errors[0]["into_expression"] = expression_type 1306 errors.append(e) 1307 1308 raise ParseError( 1309 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1310 errors=merge_errors(errors), 1311 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1351 def check_errors(self) -> None: 1352 """Logs or raises any found errors, depending on the chosen error level setting.""" 1353 if self.error_level == ErrorLevel.WARN: 1354 for error in self.errors: 1355 logger.error(str(error)) 1356 elif self.error_level == ErrorLevel.RAISE and self.errors: 1357 raise ParseError( 1358 concat_messages(self.errors, self.max_errors), 1359 errors=merge_errors(self.errors), 1360 )
Logs or raises any found errors, depending on the chosen error level setting.
1362 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1363 """ 1364 Appends an error in the list of recorded errors or raises it, depending on the chosen 1365 error level setting. 1366 """ 1367 token = token or self._curr or self._prev or Token.string("") 1368 start = token.start 1369 end = token.end + 1 1370 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1371 highlight = self.sql[start:end] 1372 end_context = self.sql[end : end + self.error_message_context] 1373 1374 error = ParseError.new( 1375 f"{message}. Line {token.line}, Col: {token.col}.\n" 1376 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1377 description=message, 1378 line=token.line, 1379 col=token.col, 1380 start_context=start_context, 1381 highlight=highlight, 1382 end_context=end_context, 1383 ) 1384 1385 if self.error_level == ErrorLevel.IMMEDIATE: 1386 raise error 1387 1388 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1390 def expression( 1391 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1392 ) -> E: 1393 """ 1394 Creates a new, validated Expression. 1395 1396 Args: 1397 exp_class: The expression class to instantiate. 1398 comments: An optional list of comments to attach to the expression. 1399 kwargs: The arguments to set for the expression along with their respective values. 1400 1401 Returns: 1402 The target expression. 1403 """ 1404 instance = exp_class(**kwargs) 1405 instance.add_comments(comments) if comments else self._add_comments(instance) 1406 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1413 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1414 """ 1415 Validates an Expression, making sure that all its mandatory arguments are set. 1416 1417 Args: 1418 expression: The expression to validate. 1419 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1420 1421 Returns: 1422 The validated expression. 1423 """ 1424 if self.error_level != ErrorLevel.IGNORE: 1425 for error_message in expression.error_messages(args): 1426 self.raise_error(error_message) 1427 1428 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.