sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143class _Parser(type): 144 def __new__(cls, clsname, bases, attrs): 145 klass = super().__new__(cls, clsname, bases, attrs) 146 147 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 148 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 149 150 return klass 151 152 153class Parser(metaclass=_Parser): 154 """ 155 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 156 157 Args: 158 error_level: The desired error level. 159 Default: ErrorLevel.IMMEDIATE 160 error_message_context: The amount of context to capture from a query string when displaying 161 the error message (in number of characters). 162 Default: 100 163 max_errors: Maximum number of error messages to include in a raised ParseError. 164 This is only relevant if error_level is ErrorLevel.RAISE. 165 Default: 3 166 """ 167 168 FUNCTIONS: t.Dict[str, t.Callable] = { 169 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 170 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 171 "CONCAT": lambda args, dialect: exp.Concat( 172 expressions=args, 173 safe=not dialect.STRICT_STRING_CONCAT, 174 coalesce=dialect.CONCAT_COALESCE, 175 ), 176 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 177 expressions=args, 178 safe=not dialect.STRICT_STRING_CONCAT, 179 coalesce=dialect.CONCAT_COALESCE, 180 ), 181 "CONVERT_TIMEZONE": build_convert_timezone, 182 "DATE_TO_DATE_STR": lambda args: exp.Cast( 183 this=seq_get(args, 0), 184 to=exp.DataType(this=exp.DataType.Type.TEXT), 185 ), 186 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 187 start=seq_get(args, 0), 188 end=seq_get(args, 1), 189 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 190 ), 191 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 192 "HEX": build_hex, 193 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 194 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 195 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 196 "LIKE": build_like, 197 "LOG": build_logarithm, 198 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 199 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 200 "LOWER": build_lower, 201 "LPAD": lambda args: build_pad(args), 202 "LEFTPAD": lambda args: build_pad(args), 203 "MOD": build_mod, 204 "RPAD": lambda args: build_pad(args, is_left=False), 205 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 206 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 207 if len(args) != 2 208 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 209 "TIME_TO_TIME_STR": lambda args: exp.Cast( 210 this=seq_get(args, 0), 211 to=exp.DataType(this=exp.DataType.Type.TEXT), 212 ), 213 "TO_HEX": build_hex, 214 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 215 this=exp.Cast( 216 this=seq_get(args, 0), 217 to=exp.DataType(this=exp.DataType.Type.TEXT), 218 ), 219 start=exp.Literal.number(1), 220 length=exp.Literal.number(10), 221 ), 222 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 223 "UPPER": build_upper, 224 "VAR_MAP": build_var_map, 225 } 226 227 NO_PAREN_FUNCTIONS = { 228 TokenType.CURRENT_DATE: exp.CurrentDate, 229 TokenType.CURRENT_DATETIME: exp.CurrentDate, 230 TokenType.CURRENT_TIME: exp.CurrentTime, 231 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 232 TokenType.CURRENT_USER: exp.CurrentUser, 233 } 234 235 STRUCT_TYPE_TOKENS = { 236 TokenType.NESTED, 237 TokenType.OBJECT, 238 TokenType.STRUCT, 239 } 240 241 NESTED_TYPE_TOKENS = { 242 TokenType.ARRAY, 243 TokenType.LIST, 244 TokenType.LOWCARDINALITY, 245 TokenType.MAP, 246 TokenType.NULLABLE, 247 *STRUCT_TYPE_TOKENS, 248 } 249 250 ENUM_TYPE_TOKENS = { 251 TokenType.ENUM, 252 TokenType.ENUM8, 253 TokenType.ENUM16, 254 } 255 256 AGGREGATE_TYPE_TOKENS = { 257 TokenType.AGGREGATEFUNCTION, 258 TokenType.SIMPLEAGGREGATEFUNCTION, 259 } 260 261 TYPE_TOKENS = { 262 TokenType.BIT, 263 TokenType.BOOLEAN, 264 TokenType.TINYINT, 265 TokenType.UTINYINT, 266 TokenType.SMALLINT, 267 TokenType.USMALLINT, 268 TokenType.INT, 269 TokenType.UINT, 270 TokenType.BIGINT, 271 TokenType.UBIGINT, 272 TokenType.INT128, 273 TokenType.UINT128, 274 TokenType.INT256, 275 TokenType.UINT256, 276 TokenType.MEDIUMINT, 277 TokenType.UMEDIUMINT, 278 TokenType.FIXEDSTRING, 279 TokenType.FLOAT, 280 TokenType.DOUBLE, 281 TokenType.CHAR, 282 TokenType.NCHAR, 283 TokenType.VARCHAR, 284 TokenType.NVARCHAR, 285 TokenType.BPCHAR, 286 TokenType.TEXT, 287 TokenType.MEDIUMTEXT, 288 TokenType.LONGTEXT, 289 TokenType.MEDIUMBLOB, 290 TokenType.LONGBLOB, 291 TokenType.BINARY, 292 TokenType.VARBINARY, 293 TokenType.JSON, 294 TokenType.JSONB, 295 TokenType.INTERVAL, 296 TokenType.TINYBLOB, 297 TokenType.TINYTEXT, 298 TokenType.TIME, 299 TokenType.TIMETZ, 300 TokenType.TIMESTAMP, 301 TokenType.TIMESTAMP_S, 302 TokenType.TIMESTAMP_MS, 303 TokenType.TIMESTAMP_NS, 304 TokenType.TIMESTAMPTZ, 305 TokenType.TIMESTAMPLTZ, 306 TokenType.TIMESTAMPNTZ, 307 TokenType.DATETIME, 308 TokenType.DATETIME64, 309 TokenType.DATE, 310 TokenType.DATE32, 311 TokenType.INT4RANGE, 312 TokenType.INT4MULTIRANGE, 313 TokenType.INT8RANGE, 314 TokenType.INT8MULTIRANGE, 315 TokenType.NUMRANGE, 316 TokenType.NUMMULTIRANGE, 317 TokenType.TSRANGE, 318 TokenType.TSMULTIRANGE, 319 TokenType.TSTZRANGE, 320 TokenType.TSTZMULTIRANGE, 321 TokenType.DATERANGE, 322 TokenType.DATEMULTIRANGE, 323 TokenType.DECIMAL, 324 TokenType.UDECIMAL, 325 TokenType.BIGDECIMAL, 326 TokenType.UUID, 327 TokenType.GEOGRAPHY, 328 TokenType.GEOMETRY, 329 TokenType.HLLSKETCH, 330 TokenType.HSTORE, 331 TokenType.PSEUDO_TYPE, 332 TokenType.SUPER, 333 TokenType.SERIAL, 334 TokenType.SMALLSERIAL, 335 TokenType.BIGSERIAL, 336 TokenType.XML, 337 TokenType.YEAR, 338 TokenType.UNIQUEIDENTIFIER, 339 TokenType.USERDEFINED, 340 TokenType.MONEY, 341 TokenType.SMALLMONEY, 342 TokenType.ROWVERSION, 343 TokenType.IMAGE, 344 TokenType.VARIANT, 345 TokenType.VECTOR, 346 TokenType.OBJECT, 347 TokenType.OBJECT_IDENTIFIER, 348 TokenType.INET, 349 TokenType.IPADDRESS, 350 TokenType.IPPREFIX, 351 TokenType.IPV4, 352 TokenType.IPV6, 353 TokenType.UNKNOWN, 354 TokenType.NULL, 355 TokenType.NAME, 356 TokenType.TDIGEST, 357 *ENUM_TYPE_TOKENS, 358 *NESTED_TYPE_TOKENS, 359 *AGGREGATE_TYPE_TOKENS, 360 } 361 362 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 363 TokenType.BIGINT: TokenType.UBIGINT, 364 TokenType.INT: TokenType.UINT, 365 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 366 TokenType.SMALLINT: TokenType.USMALLINT, 367 TokenType.TINYINT: TokenType.UTINYINT, 368 TokenType.DECIMAL: TokenType.UDECIMAL, 369 } 370 371 SUBQUERY_PREDICATES = { 372 TokenType.ANY: exp.Any, 373 TokenType.ALL: exp.All, 374 TokenType.EXISTS: exp.Exists, 375 TokenType.SOME: exp.Any, 376 } 377 378 RESERVED_TOKENS = { 379 *Tokenizer.SINGLE_TOKENS.values(), 380 TokenType.SELECT, 381 } - {TokenType.IDENTIFIER} 382 383 DB_CREATABLES = { 384 TokenType.DATABASE, 385 TokenType.DICTIONARY, 386 TokenType.MODEL, 387 TokenType.SCHEMA, 388 TokenType.SEQUENCE, 389 TokenType.STORAGE_INTEGRATION, 390 TokenType.TABLE, 391 TokenType.TAG, 392 TokenType.VIEW, 393 TokenType.WAREHOUSE, 394 TokenType.STREAMLIT, 395 } 396 397 CREATABLES = { 398 TokenType.COLUMN, 399 TokenType.CONSTRAINT, 400 TokenType.FOREIGN_KEY, 401 TokenType.FUNCTION, 402 TokenType.INDEX, 403 TokenType.PROCEDURE, 404 *DB_CREATABLES, 405 } 406 407 ALTERABLES = { 408 TokenType.TABLE, 409 TokenType.VIEW, 410 } 411 412 # Tokens that can represent identifiers 413 ID_VAR_TOKENS = { 414 TokenType.ALL, 415 TokenType.VAR, 416 TokenType.ANTI, 417 TokenType.APPLY, 418 TokenType.ASC, 419 TokenType.ASOF, 420 TokenType.AUTO_INCREMENT, 421 TokenType.BEGIN, 422 TokenType.BPCHAR, 423 TokenType.CACHE, 424 TokenType.CASE, 425 TokenType.COLLATE, 426 TokenType.COMMAND, 427 TokenType.COMMENT, 428 TokenType.COMMIT, 429 TokenType.CONSTRAINT, 430 TokenType.COPY, 431 TokenType.CUBE, 432 TokenType.DEFAULT, 433 TokenType.DELETE, 434 TokenType.DESC, 435 TokenType.DESCRIBE, 436 TokenType.DICTIONARY, 437 TokenType.DIV, 438 TokenType.END, 439 TokenType.EXECUTE, 440 TokenType.ESCAPE, 441 TokenType.FALSE, 442 TokenType.FIRST, 443 TokenType.FILTER, 444 TokenType.FINAL, 445 TokenType.FORMAT, 446 TokenType.FULL, 447 TokenType.IDENTIFIER, 448 TokenType.IS, 449 TokenType.ISNULL, 450 TokenType.INTERVAL, 451 TokenType.KEEP, 452 TokenType.KILL, 453 TokenType.LEFT, 454 TokenType.LOAD, 455 TokenType.MERGE, 456 TokenType.NATURAL, 457 TokenType.NEXT, 458 TokenType.OFFSET, 459 TokenType.OPERATOR, 460 TokenType.ORDINALITY, 461 TokenType.OVERLAPS, 462 TokenType.OVERWRITE, 463 TokenType.PARTITION, 464 TokenType.PERCENT, 465 TokenType.PIVOT, 466 TokenType.PRAGMA, 467 TokenType.RANGE, 468 TokenType.RECURSIVE, 469 TokenType.REFERENCES, 470 TokenType.REFRESH, 471 TokenType.RENAME, 472 TokenType.REPLACE, 473 TokenType.RIGHT, 474 TokenType.ROLLUP, 475 TokenType.ROW, 476 TokenType.ROWS, 477 TokenType.SEMI, 478 TokenType.SET, 479 TokenType.SETTINGS, 480 TokenType.SHOW, 481 TokenType.TEMPORARY, 482 TokenType.TOP, 483 TokenType.TRUE, 484 TokenType.TRUNCATE, 485 TokenType.UNIQUE, 486 TokenType.UNNEST, 487 TokenType.UNPIVOT, 488 TokenType.UPDATE, 489 TokenType.USE, 490 TokenType.VOLATILE, 491 TokenType.WINDOW, 492 *CREATABLES, 493 *SUBQUERY_PREDICATES, 494 *TYPE_TOKENS, 495 *NO_PAREN_FUNCTIONS, 496 } 497 498 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 499 500 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 501 TokenType.ANTI, 502 TokenType.APPLY, 503 TokenType.ASOF, 504 TokenType.FULL, 505 TokenType.LEFT, 506 TokenType.LOCK, 507 TokenType.NATURAL, 508 TokenType.OFFSET, 509 TokenType.RIGHT, 510 TokenType.SEMI, 511 TokenType.WINDOW, 512 } 513 514 ALIAS_TOKENS = ID_VAR_TOKENS 515 516 ARRAY_CONSTRUCTORS = { 517 "ARRAY": exp.Array, 518 "LIST": exp.List, 519 } 520 521 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 522 523 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 524 525 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 526 527 FUNC_TOKENS = { 528 TokenType.COLLATE, 529 TokenType.COMMAND, 530 TokenType.CURRENT_DATE, 531 TokenType.CURRENT_DATETIME, 532 TokenType.CURRENT_TIMESTAMP, 533 TokenType.CURRENT_TIME, 534 TokenType.CURRENT_USER, 535 TokenType.FILTER, 536 TokenType.FIRST, 537 TokenType.FORMAT, 538 TokenType.GLOB, 539 TokenType.IDENTIFIER, 540 TokenType.INDEX, 541 TokenType.ISNULL, 542 TokenType.ILIKE, 543 TokenType.INSERT, 544 TokenType.LIKE, 545 TokenType.MERGE, 546 TokenType.OFFSET, 547 TokenType.PRIMARY_KEY, 548 TokenType.RANGE, 549 TokenType.REPLACE, 550 TokenType.RLIKE, 551 TokenType.ROW, 552 TokenType.UNNEST, 553 TokenType.VAR, 554 TokenType.LEFT, 555 TokenType.RIGHT, 556 TokenType.SEQUENCE, 557 TokenType.DATE, 558 TokenType.DATETIME, 559 TokenType.TABLE, 560 TokenType.TIMESTAMP, 561 TokenType.TIMESTAMPTZ, 562 TokenType.TRUNCATE, 563 TokenType.WINDOW, 564 TokenType.XOR, 565 *TYPE_TOKENS, 566 *SUBQUERY_PREDICATES, 567 } 568 569 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 570 TokenType.AND: exp.And, 571 } 572 573 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 574 TokenType.COLON_EQ: exp.PropertyEQ, 575 } 576 577 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 578 TokenType.OR: exp.Or, 579 } 580 581 EQUALITY = { 582 TokenType.EQ: exp.EQ, 583 TokenType.NEQ: exp.NEQ, 584 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 585 } 586 587 COMPARISON = { 588 TokenType.GT: exp.GT, 589 TokenType.GTE: exp.GTE, 590 TokenType.LT: exp.LT, 591 TokenType.LTE: exp.LTE, 592 } 593 594 BITWISE = { 595 TokenType.AMP: exp.BitwiseAnd, 596 TokenType.CARET: exp.BitwiseXor, 597 TokenType.PIPE: exp.BitwiseOr, 598 } 599 600 TERM = { 601 TokenType.DASH: exp.Sub, 602 TokenType.PLUS: exp.Add, 603 TokenType.MOD: exp.Mod, 604 TokenType.COLLATE: exp.Collate, 605 } 606 607 FACTOR = { 608 TokenType.DIV: exp.IntDiv, 609 TokenType.LR_ARROW: exp.Distance, 610 TokenType.SLASH: exp.Div, 611 TokenType.STAR: exp.Mul, 612 } 613 614 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 615 616 TIMES = { 617 TokenType.TIME, 618 TokenType.TIMETZ, 619 } 620 621 TIMESTAMPS = { 622 TokenType.TIMESTAMP, 623 TokenType.TIMESTAMPTZ, 624 TokenType.TIMESTAMPLTZ, 625 *TIMES, 626 } 627 628 SET_OPERATIONS = { 629 TokenType.UNION, 630 TokenType.INTERSECT, 631 TokenType.EXCEPT, 632 } 633 634 JOIN_METHODS = { 635 TokenType.ASOF, 636 TokenType.NATURAL, 637 TokenType.POSITIONAL, 638 } 639 640 JOIN_SIDES = { 641 TokenType.LEFT, 642 TokenType.RIGHT, 643 TokenType.FULL, 644 } 645 646 JOIN_KINDS = { 647 TokenType.ANTI, 648 TokenType.CROSS, 649 TokenType.INNER, 650 TokenType.OUTER, 651 TokenType.SEMI, 652 TokenType.STRAIGHT_JOIN, 653 } 654 655 JOIN_HINTS: t.Set[str] = set() 656 657 LAMBDAS = { 658 TokenType.ARROW: lambda self, expressions: self.expression( 659 exp.Lambda, 660 this=self._replace_lambda( 661 self._parse_assignment(), 662 expressions, 663 ), 664 expressions=expressions, 665 ), 666 TokenType.FARROW: lambda self, expressions: self.expression( 667 exp.Kwarg, 668 this=exp.var(expressions[0].name), 669 expression=self._parse_assignment(), 670 ), 671 } 672 673 COLUMN_OPERATORS = { 674 TokenType.DOT: None, 675 TokenType.DCOLON: lambda self, this, to: self.expression( 676 exp.Cast if self.STRICT_CAST else exp.TryCast, 677 this=this, 678 to=to, 679 ), 680 TokenType.ARROW: lambda self, this, path: self.expression( 681 exp.JSONExtract, 682 this=this, 683 expression=self.dialect.to_json_path(path), 684 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 685 ), 686 TokenType.DARROW: lambda self, this, path: self.expression( 687 exp.JSONExtractScalar, 688 this=this, 689 expression=self.dialect.to_json_path(path), 690 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 691 ), 692 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 693 exp.JSONBExtract, 694 this=this, 695 expression=path, 696 ), 697 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 698 exp.JSONBExtractScalar, 699 this=this, 700 expression=path, 701 ), 702 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 703 exp.JSONBContains, 704 this=this, 705 expression=key, 706 ), 707 } 708 709 EXPRESSION_PARSERS = { 710 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 711 exp.Column: lambda self: self._parse_column(), 712 exp.Condition: lambda self: self._parse_assignment(), 713 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 714 exp.Expression: lambda self: self._parse_expression(), 715 exp.From: lambda self: self._parse_from(joins=True), 716 exp.Group: lambda self: self._parse_group(), 717 exp.Having: lambda self: self._parse_having(), 718 exp.Identifier: lambda self: self._parse_id_var(), 719 exp.Join: lambda self: self._parse_join(), 720 exp.Lambda: lambda self: self._parse_lambda(), 721 exp.Lateral: lambda self: self._parse_lateral(), 722 exp.Limit: lambda self: self._parse_limit(), 723 exp.Offset: lambda self: self._parse_offset(), 724 exp.Order: lambda self: self._parse_order(), 725 exp.Ordered: lambda self: self._parse_ordered(), 726 exp.Properties: lambda self: self._parse_properties(), 727 exp.Qualify: lambda self: self._parse_qualify(), 728 exp.Returning: lambda self: self._parse_returning(), 729 exp.Select: lambda self: self._parse_select(), 730 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 731 exp.Table: lambda self: self._parse_table_parts(), 732 exp.TableAlias: lambda self: self._parse_table_alias(), 733 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 734 exp.Where: lambda self: self._parse_where(), 735 exp.Window: lambda self: self._parse_named_window(), 736 exp.With: lambda self: self._parse_with(), 737 "JOIN_TYPE": lambda self: self._parse_join_parts(), 738 } 739 740 STATEMENT_PARSERS = { 741 TokenType.ALTER: lambda self: self._parse_alter(), 742 TokenType.BEGIN: lambda self: self._parse_transaction(), 743 TokenType.CACHE: lambda self: self._parse_cache(), 744 TokenType.COMMENT: lambda self: self._parse_comment(), 745 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 746 TokenType.COPY: lambda self: self._parse_copy(), 747 TokenType.CREATE: lambda self: self._parse_create(), 748 TokenType.DELETE: lambda self: self._parse_delete(), 749 TokenType.DESC: lambda self: self._parse_describe(), 750 TokenType.DESCRIBE: lambda self: self._parse_describe(), 751 TokenType.DROP: lambda self: self._parse_drop(), 752 TokenType.INSERT: lambda self: self._parse_insert(), 753 TokenType.KILL: lambda self: self._parse_kill(), 754 TokenType.LOAD: lambda self: self._parse_load(), 755 TokenType.MERGE: lambda self: self._parse_merge(), 756 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 757 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 758 TokenType.REFRESH: lambda self: self._parse_refresh(), 759 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 760 TokenType.SET: lambda self: self._parse_set(), 761 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 762 TokenType.UNCACHE: lambda self: self._parse_uncache(), 763 TokenType.UPDATE: lambda self: self._parse_update(), 764 TokenType.USE: lambda self: self.expression( 765 exp.Use, 766 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 767 this=self._parse_table(schema=False), 768 ), 769 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 770 } 771 772 UNARY_PARSERS = { 773 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 774 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 775 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 776 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 777 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 778 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 779 } 780 781 STRING_PARSERS = { 782 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 783 exp.RawString, this=token.text 784 ), 785 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 786 exp.National, this=token.text 787 ), 788 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 789 TokenType.STRING: lambda self, token: self.expression( 790 exp.Literal, this=token.text, is_string=True 791 ), 792 TokenType.UNICODE_STRING: lambda self, token: self.expression( 793 exp.UnicodeString, 794 this=token.text, 795 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 796 ), 797 } 798 799 NUMERIC_PARSERS = { 800 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 801 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 802 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 803 TokenType.NUMBER: lambda self, token: self.expression( 804 exp.Literal, this=token.text, is_string=False 805 ), 806 } 807 808 PRIMARY_PARSERS = { 809 **STRING_PARSERS, 810 **NUMERIC_PARSERS, 811 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 812 TokenType.NULL: lambda self, _: self.expression(exp.Null), 813 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 814 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 815 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 816 TokenType.STAR: lambda self, _: self.expression( 817 exp.Star, 818 **{ 819 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 820 "replace": self._parse_star_op("REPLACE"), 821 "rename": self._parse_star_op("RENAME"), 822 }, 823 ), 824 } 825 826 PLACEHOLDER_PARSERS = { 827 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 828 TokenType.PARAMETER: lambda self: self._parse_parameter(), 829 TokenType.COLON: lambda self: ( 830 self.expression(exp.Placeholder, this=self._prev.text) 831 if self._match_set(self.ID_VAR_TOKENS) 832 else None 833 ), 834 } 835 836 RANGE_PARSERS = { 837 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 838 TokenType.GLOB: binary_range_parser(exp.Glob), 839 TokenType.ILIKE: binary_range_parser(exp.ILike), 840 TokenType.IN: lambda self, this: self._parse_in(this), 841 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 842 TokenType.IS: lambda self, this: self._parse_is(this), 843 TokenType.LIKE: binary_range_parser(exp.Like), 844 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 845 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 846 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 847 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 848 } 849 850 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 851 "ALLOWED_VALUES": lambda self: self.expression( 852 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 853 ), 854 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 855 "AUTO": lambda self: self._parse_auto_property(), 856 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 857 "BACKUP": lambda self: self.expression( 858 exp.BackupProperty, this=self._parse_var(any_token=True) 859 ), 860 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 861 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 862 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 863 "CHECKSUM": lambda self: self._parse_checksum(), 864 "CLUSTER BY": lambda self: self._parse_cluster(), 865 "CLUSTERED": lambda self: self._parse_clustered_by(), 866 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 867 exp.CollateProperty, **kwargs 868 ), 869 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 870 "CONTAINS": lambda self: self._parse_contains_property(), 871 "COPY": lambda self: self._parse_copy_property(), 872 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 873 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 874 "DEFINER": lambda self: self._parse_definer(), 875 "DETERMINISTIC": lambda self: self.expression( 876 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 877 ), 878 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 879 "DISTKEY": lambda self: self._parse_distkey(), 880 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 881 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 882 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 883 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 884 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 885 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 886 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 887 "FREESPACE": lambda self: self._parse_freespace(), 888 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 889 "HEAP": lambda self: self.expression(exp.HeapProperty), 890 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 891 "IMMUTABLE": lambda self: self.expression( 892 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 893 ), 894 "INHERITS": lambda self: self.expression( 895 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 896 ), 897 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 898 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 899 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 900 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 901 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 902 "LIKE": lambda self: self._parse_create_like(), 903 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 904 "LOCK": lambda self: self._parse_locking(), 905 "LOCKING": lambda self: self._parse_locking(), 906 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 907 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 908 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 909 "MODIFIES": lambda self: self._parse_modifies_property(), 910 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 911 "NO": lambda self: self._parse_no_property(), 912 "ON": lambda self: self._parse_on_property(), 913 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 914 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 915 "PARTITION": lambda self: self._parse_partitioned_of(), 916 "PARTITION BY": lambda self: self._parse_partitioned_by(), 917 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 918 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 919 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 920 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 921 "READS": lambda self: self._parse_reads_property(), 922 "REMOTE": lambda self: self._parse_remote_with_connection(), 923 "RETURNS": lambda self: self._parse_returns(), 924 "STRICT": lambda self: self.expression(exp.StrictProperty), 925 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 926 "ROW": lambda self: self._parse_row(), 927 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 928 "SAMPLE": lambda self: self.expression( 929 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 930 ), 931 "SECURE": lambda self: self.expression(exp.SecureProperty), 932 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 933 "SETTINGS": lambda self: self._parse_settings_property(), 934 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 935 "SORTKEY": lambda self: self._parse_sortkey(), 936 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 937 "STABLE": lambda self: self.expression( 938 exp.StabilityProperty, this=exp.Literal.string("STABLE") 939 ), 940 "STORED": lambda self: self._parse_stored(), 941 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 942 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 943 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 944 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 945 "TO": lambda self: self._parse_to_table(), 946 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 947 "TRANSFORM": lambda self: self.expression( 948 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 949 ), 950 "TTL": lambda self: self._parse_ttl(), 951 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 952 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 953 "VOLATILE": lambda self: self._parse_volatile_property(), 954 "WITH": lambda self: self._parse_with_property(), 955 } 956 957 CONSTRAINT_PARSERS = { 958 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 959 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 960 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 961 "CHARACTER SET": lambda self: self.expression( 962 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 963 ), 964 "CHECK": lambda self: self.expression( 965 exp.CheckColumnConstraint, 966 this=self._parse_wrapped(self._parse_assignment), 967 enforced=self._match_text_seq("ENFORCED"), 968 ), 969 "COLLATE": lambda self: self.expression( 970 exp.CollateColumnConstraint, 971 this=self._parse_identifier() or self._parse_column(), 972 ), 973 "COMMENT": lambda self: self.expression( 974 exp.CommentColumnConstraint, this=self._parse_string() 975 ), 976 "COMPRESS": lambda self: self._parse_compress(), 977 "CLUSTERED": lambda self: self.expression( 978 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 979 ), 980 "NONCLUSTERED": lambda self: self.expression( 981 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 982 ), 983 "DEFAULT": lambda self: self.expression( 984 exp.DefaultColumnConstraint, this=self._parse_bitwise() 985 ), 986 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 987 "EPHEMERAL": lambda self: self.expression( 988 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 989 ), 990 "EXCLUDE": lambda self: self.expression( 991 exp.ExcludeColumnConstraint, this=self._parse_index_params() 992 ), 993 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 994 "FORMAT": lambda self: self.expression( 995 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 996 ), 997 "GENERATED": lambda self: self._parse_generated_as_identity(), 998 "IDENTITY": lambda self: self._parse_auto_increment(), 999 "INLINE": lambda self: self._parse_inline(), 1000 "LIKE": lambda self: self._parse_create_like(), 1001 "NOT": lambda self: self._parse_not_constraint(), 1002 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1003 "ON": lambda self: ( 1004 self._match(TokenType.UPDATE) 1005 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1006 ) 1007 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1008 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1009 "PERIOD": lambda self: self._parse_period_for_system_time(), 1010 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1011 "REFERENCES": lambda self: self._parse_references(match=False), 1012 "TITLE": lambda self: self.expression( 1013 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1014 ), 1015 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1016 "UNIQUE": lambda self: self._parse_unique(), 1017 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1018 "WITH": lambda self: self.expression( 1019 exp.Properties, expressions=self._parse_wrapped_properties() 1020 ), 1021 } 1022 1023 ALTER_PARSERS = { 1024 "ADD": lambda self: self._parse_alter_table_add(), 1025 "ALTER": lambda self: self._parse_alter_table_alter(), 1026 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1027 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1028 "DROP": lambda self: self._parse_alter_table_drop(), 1029 "RENAME": lambda self: self._parse_alter_table_rename(), 1030 "SET": lambda self: self._parse_alter_table_set(), 1031 "AS": lambda self: self._parse_select(), 1032 } 1033 1034 ALTER_ALTER_PARSERS = { 1035 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1036 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1037 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1038 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1039 } 1040 1041 SCHEMA_UNNAMED_CONSTRAINTS = { 1042 "CHECK", 1043 "EXCLUDE", 1044 "FOREIGN KEY", 1045 "LIKE", 1046 "PERIOD", 1047 "PRIMARY KEY", 1048 "UNIQUE", 1049 } 1050 1051 NO_PAREN_FUNCTION_PARSERS = { 1052 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1053 "CASE": lambda self: self._parse_case(), 1054 "CONNECT_BY_ROOT": lambda self: self.expression( 1055 exp.ConnectByRoot, this=self._parse_column() 1056 ), 1057 "IF": lambda self: self._parse_if(), 1058 "NEXT": lambda self: self._parse_next_value_for(), 1059 } 1060 1061 INVALID_FUNC_NAME_TOKENS = { 1062 TokenType.IDENTIFIER, 1063 TokenType.STRING, 1064 } 1065 1066 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1067 1068 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1069 1070 FUNCTION_PARSERS = { 1071 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1072 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1073 "DECODE": lambda self: self._parse_decode(), 1074 "EXTRACT": lambda self: self._parse_extract(), 1075 "GAP_FILL": lambda self: self._parse_gap_fill(), 1076 "JSON_OBJECT": lambda self: self._parse_json_object(), 1077 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1078 "JSON_TABLE": lambda self: self._parse_json_table(), 1079 "MATCH": lambda self: self._parse_match_against(), 1080 "OPENJSON": lambda self: self._parse_open_json(), 1081 "POSITION": lambda self: self._parse_position(), 1082 "PREDICT": lambda self: self._parse_predict(), 1083 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1084 "STRING_AGG": lambda self: self._parse_string_agg(), 1085 "SUBSTRING": lambda self: self._parse_substring(), 1086 "TRIM": lambda self: self._parse_trim(), 1087 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1088 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1089 } 1090 1091 QUERY_MODIFIER_PARSERS = { 1092 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1093 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1094 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1095 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1096 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1097 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1098 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1099 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1100 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1101 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1102 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1103 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1104 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1105 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1106 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1107 TokenType.CLUSTER_BY: lambda self: ( 1108 "cluster", 1109 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1110 ), 1111 TokenType.DISTRIBUTE_BY: lambda self: ( 1112 "distribute", 1113 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1114 ), 1115 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1116 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1117 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1118 } 1119 1120 SET_PARSERS = { 1121 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1122 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1123 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1124 "TRANSACTION": lambda self: self._parse_set_transaction(), 1125 } 1126 1127 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1128 1129 TYPE_LITERAL_PARSERS = { 1130 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1131 } 1132 1133 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1134 1135 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1136 1137 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1138 1139 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1140 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1141 "ISOLATION": ( 1142 ("LEVEL", "REPEATABLE", "READ"), 1143 ("LEVEL", "READ", "COMMITTED"), 1144 ("LEVEL", "READ", "UNCOMITTED"), 1145 ("LEVEL", "SERIALIZABLE"), 1146 ), 1147 "READ": ("WRITE", "ONLY"), 1148 } 1149 1150 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1151 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1152 ) 1153 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1154 1155 CREATE_SEQUENCE: OPTIONS_TYPE = { 1156 "SCALE": ("EXTEND", "NOEXTEND"), 1157 "SHARD": ("EXTEND", "NOEXTEND"), 1158 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1159 **dict.fromkeys( 1160 ( 1161 "SESSION", 1162 "GLOBAL", 1163 "KEEP", 1164 "NOKEEP", 1165 "ORDER", 1166 "NOORDER", 1167 "NOCACHE", 1168 "CYCLE", 1169 "NOCYCLE", 1170 "NOMINVALUE", 1171 "NOMAXVALUE", 1172 "NOSCALE", 1173 "NOSHARD", 1174 ), 1175 tuple(), 1176 ), 1177 } 1178 1179 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1180 1181 USABLES: OPTIONS_TYPE = dict.fromkeys( 1182 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1183 ) 1184 1185 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1186 1187 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1188 "TYPE": ("EVOLUTION",), 1189 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1190 } 1191 1192 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1193 "NOT": ("ENFORCED",), 1194 "MATCH": ( 1195 "FULL", 1196 "PARTIAL", 1197 "SIMPLE", 1198 ), 1199 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1200 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1201 } 1202 1203 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1204 1205 CLONE_KEYWORDS = {"CLONE", "COPY"} 1206 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1207 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1208 1209 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1210 1211 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1212 1213 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1214 1215 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1216 1217 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1218 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1219 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1220 1221 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1222 1223 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1224 1225 ADD_CONSTRAINT_TOKENS = { 1226 TokenType.CONSTRAINT, 1227 TokenType.FOREIGN_KEY, 1228 TokenType.INDEX, 1229 TokenType.KEY, 1230 TokenType.PRIMARY_KEY, 1231 TokenType.UNIQUE, 1232 } 1233 1234 DISTINCT_TOKENS = {TokenType.DISTINCT} 1235 1236 NULL_TOKENS = {TokenType.NULL} 1237 1238 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1239 1240 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1241 1242 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1243 1244 STRICT_CAST = True 1245 1246 PREFIXED_PIVOT_COLUMNS = False 1247 IDENTIFY_PIVOT_STRINGS = False 1248 1249 LOG_DEFAULTS_TO_LN = False 1250 1251 # Whether ADD is present for each column added by ALTER TABLE 1252 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1253 1254 # Whether the table sample clause expects CSV syntax 1255 TABLESAMPLE_CSV = False 1256 1257 # The default method used for table sampling 1258 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1259 1260 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1261 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1262 1263 # Whether the TRIM function expects the characters to trim as its first argument 1264 TRIM_PATTERN_FIRST = False 1265 1266 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1267 STRING_ALIASES = False 1268 1269 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1270 MODIFIERS_ATTACHED_TO_SET_OP = True 1271 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1272 1273 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1274 NO_PAREN_IF_COMMANDS = True 1275 1276 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1277 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1278 1279 # Whether the `:` operator is used to extract a value from a VARIANT column 1280 COLON_IS_VARIANT_EXTRACT = False 1281 1282 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1283 # If this is True and '(' is not found, the keyword will be treated as an identifier 1284 VALUES_FOLLOWED_BY_PAREN = True 1285 1286 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1287 SUPPORTS_IMPLICIT_UNNEST = False 1288 1289 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1290 INTERVAL_SPANS = True 1291 1292 # Whether a PARTITION clause can follow a table reference 1293 SUPPORTS_PARTITION_SELECTION = False 1294 1295 __slots__ = ( 1296 "error_level", 1297 "error_message_context", 1298 "max_errors", 1299 "dialect", 1300 "sql", 1301 "errors", 1302 "_tokens", 1303 "_index", 1304 "_curr", 1305 "_next", 1306 "_prev", 1307 "_prev_comments", 1308 ) 1309 1310 # Autofilled 1311 SHOW_TRIE: t.Dict = {} 1312 SET_TRIE: t.Dict = {} 1313 1314 def __init__( 1315 self, 1316 error_level: t.Optional[ErrorLevel] = None, 1317 error_message_context: int = 100, 1318 max_errors: int = 3, 1319 dialect: DialectType = None, 1320 ): 1321 from sqlglot.dialects import Dialect 1322 1323 self.error_level = error_level or ErrorLevel.IMMEDIATE 1324 self.error_message_context = error_message_context 1325 self.max_errors = max_errors 1326 self.dialect = Dialect.get_or_raise(dialect) 1327 self.reset() 1328 1329 def reset(self): 1330 self.sql = "" 1331 self.errors = [] 1332 self._tokens = [] 1333 self._index = 0 1334 self._curr = None 1335 self._next = None 1336 self._prev = None 1337 self._prev_comments = None 1338 1339 def parse( 1340 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1341 ) -> t.List[t.Optional[exp.Expression]]: 1342 """ 1343 Parses a list of tokens and returns a list of syntax trees, one tree 1344 per parsed SQL statement. 1345 1346 Args: 1347 raw_tokens: The list of tokens. 1348 sql: The original SQL string, used to produce helpful debug messages. 1349 1350 Returns: 1351 The list of the produced syntax trees. 1352 """ 1353 return self._parse( 1354 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1355 ) 1356 1357 def parse_into( 1358 self, 1359 expression_types: exp.IntoType, 1360 raw_tokens: t.List[Token], 1361 sql: t.Optional[str] = None, 1362 ) -> t.List[t.Optional[exp.Expression]]: 1363 """ 1364 Parses a list of tokens into a given Expression type. If a collection of Expression 1365 types is given instead, this method will try to parse the token list into each one 1366 of them, stopping at the first for which the parsing succeeds. 1367 1368 Args: 1369 expression_types: The expression type(s) to try and parse the token list into. 1370 raw_tokens: The list of tokens. 1371 sql: The original SQL string, used to produce helpful debug messages. 1372 1373 Returns: 1374 The target Expression. 1375 """ 1376 errors = [] 1377 for expression_type in ensure_list(expression_types): 1378 parser = self.EXPRESSION_PARSERS.get(expression_type) 1379 if not parser: 1380 raise TypeError(f"No parser registered for {expression_type}") 1381 1382 try: 1383 return self._parse(parser, raw_tokens, sql) 1384 except ParseError as e: 1385 e.errors[0]["into_expression"] = expression_type 1386 errors.append(e) 1387 1388 raise ParseError( 1389 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1390 errors=merge_errors(errors), 1391 ) from errors[-1] 1392 1393 def _parse( 1394 self, 1395 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1396 raw_tokens: t.List[Token], 1397 sql: t.Optional[str] = None, 1398 ) -> t.List[t.Optional[exp.Expression]]: 1399 self.reset() 1400 self.sql = sql or "" 1401 1402 total = len(raw_tokens) 1403 chunks: t.List[t.List[Token]] = [[]] 1404 1405 for i, token in enumerate(raw_tokens): 1406 if token.token_type == TokenType.SEMICOLON: 1407 if token.comments: 1408 chunks.append([token]) 1409 1410 if i < total - 1: 1411 chunks.append([]) 1412 else: 1413 chunks[-1].append(token) 1414 1415 expressions = [] 1416 1417 for tokens in chunks: 1418 self._index = -1 1419 self._tokens = tokens 1420 self._advance() 1421 1422 expressions.append(parse_method(self)) 1423 1424 if self._index < len(self._tokens): 1425 self.raise_error("Invalid expression / Unexpected token") 1426 1427 self.check_errors() 1428 1429 return expressions 1430 1431 def check_errors(self) -> None: 1432 """Logs or raises any found errors, depending on the chosen error level setting.""" 1433 if self.error_level == ErrorLevel.WARN: 1434 for error in self.errors: 1435 logger.error(str(error)) 1436 elif self.error_level == ErrorLevel.RAISE and self.errors: 1437 raise ParseError( 1438 concat_messages(self.errors, self.max_errors), 1439 errors=merge_errors(self.errors), 1440 ) 1441 1442 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1443 """ 1444 Appends an error in the list of recorded errors or raises it, depending on the chosen 1445 error level setting. 1446 """ 1447 token = token or self._curr or self._prev or Token.string("") 1448 start = token.start 1449 end = token.end + 1 1450 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1451 highlight = self.sql[start:end] 1452 end_context = self.sql[end : end + self.error_message_context] 1453 1454 error = ParseError.new( 1455 f"{message}. Line {token.line}, Col: {token.col}.\n" 1456 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1457 description=message, 1458 line=token.line, 1459 col=token.col, 1460 start_context=start_context, 1461 highlight=highlight, 1462 end_context=end_context, 1463 ) 1464 1465 if self.error_level == ErrorLevel.IMMEDIATE: 1466 raise error 1467 1468 self.errors.append(error) 1469 1470 def expression( 1471 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1472 ) -> E: 1473 """ 1474 Creates a new, validated Expression. 1475 1476 Args: 1477 exp_class: The expression class to instantiate. 1478 comments: An optional list of comments to attach to the expression. 1479 kwargs: The arguments to set for the expression along with their respective values. 1480 1481 Returns: 1482 The target expression. 1483 """ 1484 instance = exp_class(**kwargs) 1485 instance.add_comments(comments) if comments else self._add_comments(instance) 1486 return self.validate_expression(instance) 1487 1488 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1489 if expression and self._prev_comments: 1490 expression.add_comments(self._prev_comments) 1491 self._prev_comments = None 1492 1493 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1494 """ 1495 Validates an Expression, making sure that all its mandatory arguments are set. 1496 1497 Args: 1498 expression: The expression to validate. 1499 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1500 1501 Returns: 1502 The validated expression. 1503 """ 1504 if self.error_level != ErrorLevel.IGNORE: 1505 for error_message in expression.error_messages(args): 1506 self.raise_error(error_message) 1507 1508 return expression 1509 1510 def _find_sql(self, start: Token, end: Token) -> str: 1511 return self.sql[start.start : end.end + 1] 1512 1513 def _is_connected(self) -> bool: 1514 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1515 1516 def _advance(self, times: int = 1) -> None: 1517 self._index += times 1518 self._curr = seq_get(self._tokens, self._index) 1519 self._next = seq_get(self._tokens, self._index + 1) 1520 1521 if self._index > 0: 1522 self._prev = self._tokens[self._index - 1] 1523 self._prev_comments = self._prev.comments 1524 else: 1525 self._prev = None 1526 self._prev_comments = None 1527 1528 def _retreat(self, index: int) -> None: 1529 if index != self._index: 1530 self._advance(index - self._index) 1531 1532 def _warn_unsupported(self) -> None: 1533 if len(self._tokens) <= 1: 1534 return 1535 1536 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1537 # interested in emitting a warning for the one being currently processed. 1538 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1539 1540 logger.warning( 1541 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1542 ) 1543 1544 def _parse_command(self) -> exp.Command: 1545 self._warn_unsupported() 1546 return self.expression( 1547 exp.Command, 1548 comments=self._prev_comments, 1549 this=self._prev.text.upper(), 1550 expression=self._parse_string(), 1551 ) 1552 1553 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1554 """ 1555 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1556 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1557 solve this by setting & resetting the parser state accordingly 1558 """ 1559 index = self._index 1560 error_level = self.error_level 1561 1562 self.error_level = ErrorLevel.IMMEDIATE 1563 try: 1564 this = parse_method() 1565 except ParseError: 1566 this = None 1567 finally: 1568 if not this or retreat: 1569 self._retreat(index) 1570 self.error_level = error_level 1571 1572 return this 1573 1574 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1575 start = self._prev 1576 exists = self._parse_exists() if allow_exists else None 1577 1578 self._match(TokenType.ON) 1579 1580 materialized = self._match_text_seq("MATERIALIZED") 1581 kind = self._match_set(self.CREATABLES) and self._prev 1582 if not kind: 1583 return self._parse_as_command(start) 1584 1585 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1586 this = self._parse_user_defined_function(kind=kind.token_type) 1587 elif kind.token_type == TokenType.TABLE: 1588 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1589 elif kind.token_type == TokenType.COLUMN: 1590 this = self._parse_column() 1591 else: 1592 this = self._parse_id_var() 1593 1594 self._match(TokenType.IS) 1595 1596 return self.expression( 1597 exp.Comment, 1598 this=this, 1599 kind=kind.text, 1600 expression=self._parse_string(), 1601 exists=exists, 1602 materialized=materialized, 1603 ) 1604 1605 def _parse_to_table( 1606 self, 1607 ) -> exp.ToTableProperty: 1608 table = self._parse_table_parts(schema=True) 1609 return self.expression(exp.ToTableProperty, this=table) 1610 1611 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1612 def _parse_ttl(self) -> exp.Expression: 1613 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1614 this = self._parse_bitwise() 1615 1616 if self._match_text_seq("DELETE"): 1617 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1618 if self._match_text_seq("RECOMPRESS"): 1619 return self.expression( 1620 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1621 ) 1622 if self._match_text_seq("TO", "DISK"): 1623 return self.expression( 1624 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1625 ) 1626 if self._match_text_seq("TO", "VOLUME"): 1627 return self.expression( 1628 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1629 ) 1630 1631 return this 1632 1633 expressions = self._parse_csv(_parse_ttl_action) 1634 where = self._parse_where() 1635 group = self._parse_group() 1636 1637 aggregates = None 1638 if group and self._match(TokenType.SET): 1639 aggregates = self._parse_csv(self._parse_set_item) 1640 1641 return self.expression( 1642 exp.MergeTreeTTL, 1643 expressions=expressions, 1644 where=where, 1645 group=group, 1646 aggregates=aggregates, 1647 ) 1648 1649 def _parse_statement(self) -> t.Optional[exp.Expression]: 1650 if self._curr is None: 1651 return None 1652 1653 if self._match_set(self.STATEMENT_PARSERS): 1654 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1655 1656 if self._match_set(self.dialect.tokenizer.COMMANDS): 1657 return self._parse_command() 1658 1659 expression = self._parse_expression() 1660 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1661 return self._parse_query_modifiers(expression) 1662 1663 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1664 start = self._prev 1665 temporary = self._match(TokenType.TEMPORARY) 1666 materialized = self._match_text_seq("MATERIALIZED") 1667 1668 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1669 if not kind: 1670 return self._parse_as_command(start) 1671 1672 if_exists = exists or self._parse_exists() 1673 table = self._parse_table_parts( 1674 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1675 ) 1676 1677 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1678 1679 if self._match(TokenType.L_PAREN, advance=False): 1680 expressions = self._parse_wrapped_csv(self._parse_types) 1681 else: 1682 expressions = None 1683 1684 return self.expression( 1685 exp.Drop, 1686 comments=start.comments, 1687 exists=if_exists, 1688 this=table, 1689 expressions=expressions, 1690 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1691 temporary=temporary, 1692 materialized=materialized, 1693 cascade=self._match_text_seq("CASCADE"), 1694 constraints=self._match_text_seq("CONSTRAINTS"), 1695 purge=self._match_text_seq("PURGE"), 1696 cluster=cluster, 1697 ) 1698 1699 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1700 return ( 1701 self._match_text_seq("IF") 1702 and (not not_ or self._match(TokenType.NOT)) 1703 and self._match(TokenType.EXISTS) 1704 ) 1705 1706 def _parse_create(self) -> exp.Create | exp.Command: 1707 # Note: this can't be None because we've matched a statement parser 1708 start = self._prev 1709 comments = self._prev_comments 1710 1711 replace = ( 1712 start.token_type == TokenType.REPLACE 1713 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1714 or self._match_pair(TokenType.OR, TokenType.ALTER) 1715 ) 1716 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1717 1718 unique = self._match(TokenType.UNIQUE) 1719 1720 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1721 clustered = True 1722 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1723 "COLUMNSTORE" 1724 ): 1725 clustered = False 1726 else: 1727 clustered = None 1728 1729 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1730 self._advance() 1731 1732 properties = None 1733 create_token = self._match_set(self.CREATABLES) and self._prev 1734 1735 if not create_token: 1736 # exp.Properties.Location.POST_CREATE 1737 properties = self._parse_properties() 1738 create_token = self._match_set(self.CREATABLES) and self._prev 1739 1740 if not properties or not create_token: 1741 return self._parse_as_command(start) 1742 1743 concurrently = self._match_text_seq("CONCURRENTLY") 1744 exists = self._parse_exists(not_=True) 1745 this = None 1746 expression: t.Optional[exp.Expression] = None 1747 indexes = None 1748 no_schema_binding = None 1749 begin = None 1750 end = None 1751 clone = None 1752 1753 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1754 nonlocal properties 1755 if properties and temp_props: 1756 properties.expressions.extend(temp_props.expressions) 1757 elif temp_props: 1758 properties = temp_props 1759 1760 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1761 this = self._parse_user_defined_function(kind=create_token.token_type) 1762 1763 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1764 extend_props(self._parse_properties()) 1765 1766 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1767 extend_props(self._parse_properties()) 1768 1769 if not expression: 1770 if self._match(TokenType.COMMAND): 1771 expression = self._parse_as_command(self._prev) 1772 else: 1773 begin = self._match(TokenType.BEGIN) 1774 return_ = self._match_text_seq("RETURN") 1775 1776 if self._match(TokenType.STRING, advance=False): 1777 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1778 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1779 expression = self._parse_string() 1780 extend_props(self._parse_properties()) 1781 else: 1782 expression = self._parse_statement() 1783 1784 end = self._match_text_seq("END") 1785 1786 if return_: 1787 expression = self.expression(exp.Return, this=expression) 1788 elif create_token.token_type == TokenType.INDEX: 1789 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1790 if not self._match(TokenType.ON): 1791 index = self._parse_id_var() 1792 anonymous = False 1793 else: 1794 index = None 1795 anonymous = True 1796 1797 this = self._parse_index(index=index, anonymous=anonymous) 1798 elif create_token.token_type in self.DB_CREATABLES: 1799 table_parts = self._parse_table_parts( 1800 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1801 ) 1802 1803 # exp.Properties.Location.POST_NAME 1804 self._match(TokenType.COMMA) 1805 extend_props(self._parse_properties(before=True)) 1806 1807 this = self._parse_schema(this=table_parts) 1808 1809 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1810 extend_props(self._parse_properties()) 1811 1812 self._match(TokenType.ALIAS) 1813 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1814 # exp.Properties.Location.POST_ALIAS 1815 extend_props(self._parse_properties()) 1816 1817 if create_token.token_type == TokenType.SEQUENCE: 1818 expression = self._parse_types() 1819 extend_props(self._parse_properties()) 1820 else: 1821 expression = self._parse_ddl_select() 1822 1823 if create_token.token_type == TokenType.TABLE: 1824 # exp.Properties.Location.POST_EXPRESSION 1825 extend_props(self._parse_properties()) 1826 1827 indexes = [] 1828 while True: 1829 index = self._parse_index() 1830 1831 # exp.Properties.Location.POST_INDEX 1832 extend_props(self._parse_properties()) 1833 if not index: 1834 break 1835 else: 1836 self._match(TokenType.COMMA) 1837 indexes.append(index) 1838 elif create_token.token_type == TokenType.VIEW: 1839 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1840 no_schema_binding = True 1841 1842 shallow = self._match_text_seq("SHALLOW") 1843 1844 if self._match_texts(self.CLONE_KEYWORDS): 1845 copy = self._prev.text.lower() == "copy" 1846 clone = self.expression( 1847 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1848 ) 1849 1850 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1851 return self._parse_as_command(start) 1852 1853 create_kind_text = create_token.text.upper() 1854 return self.expression( 1855 exp.Create, 1856 comments=comments, 1857 this=this, 1858 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1859 replace=replace, 1860 refresh=refresh, 1861 unique=unique, 1862 expression=expression, 1863 exists=exists, 1864 properties=properties, 1865 indexes=indexes, 1866 no_schema_binding=no_schema_binding, 1867 begin=begin, 1868 end=end, 1869 clone=clone, 1870 concurrently=concurrently, 1871 clustered=clustered, 1872 ) 1873 1874 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1875 seq = exp.SequenceProperties() 1876 1877 options = [] 1878 index = self._index 1879 1880 while self._curr: 1881 self._match(TokenType.COMMA) 1882 if self._match_text_seq("INCREMENT"): 1883 self._match_text_seq("BY") 1884 self._match_text_seq("=") 1885 seq.set("increment", self._parse_term()) 1886 elif self._match_text_seq("MINVALUE"): 1887 seq.set("minvalue", self._parse_term()) 1888 elif self._match_text_seq("MAXVALUE"): 1889 seq.set("maxvalue", self._parse_term()) 1890 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1891 self._match_text_seq("=") 1892 seq.set("start", self._parse_term()) 1893 elif self._match_text_seq("CACHE"): 1894 # T-SQL allows empty CACHE which is initialized dynamically 1895 seq.set("cache", self._parse_number() or True) 1896 elif self._match_text_seq("OWNED", "BY"): 1897 # "OWNED BY NONE" is the default 1898 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1899 else: 1900 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1901 if opt: 1902 options.append(opt) 1903 else: 1904 break 1905 1906 seq.set("options", options if options else None) 1907 return None if self._index == index else seq 1908 1909 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1910 # only used for teradata currently 1911 self._match(TokenType.COMMA) 1912 1913 kwargs = { 1914 "no": self._match_text_seq("NO"), 1915 "dual": self._match_text_seq("DUAL"), 1916 "before": self._match_text_seq("BEFORE"), 1917 "default": self._match_text_seq("DEFAULT"), 1918 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1919 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1920 "after": self._match_text_seq("AFTER"), 1921 "minimum": self._match_texts(("MIN", "MINIMUM")), 1922 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1923 } 1924 1925 if self._match_texts(self.PROPERTY_PARSERS): 1926 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1927 try: 1928 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1929 except TypeError: 1930 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1931 1932 return None 1933 1934 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1935 return self._parse_wrapped_csv(self._parse_property) 1936 1937 def _parse_property(self) -> t.Optional[exp.Expression]: 1938 if self._match_texts(self.PROPERTY_PARSERS): 1939 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1940 1941 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1942 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1943 1944 if self._match_text_seq("COMPOUND", "SORTKEY"): 1945 return self._parse_sortkey(compound=True) 1946 1947 if self._match_text_seq("SQL", "SECURITY"): 1948 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1949 1950 index = self._index 1951 key = self._parse_column() 1952 1953 if not self._match(TokenType.EQ): 1954 self._retreat(index) 1955 return self._parse_sequence_properties() 1956 1957 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1958 if isinstance(key, exp.Column): 1959 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1960 1961 value = self._parse_bitwise() or self._parse_var(any_token=True) 1962 1963 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1964 if isinstance(value, exp.Column): 1965 value = exp.var(value.name) 1966 1967 return self.expression(exp.Property, this=key, value=value) 1968 1969 def _parse_stored(self) -> exp.FileFormatProperty: 1970 self._match(TokenType.ALIAS) 1971 1972 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1973 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1974 1975 return self.expression( 1976 exp.FileFormatProperty, 1977 this=( 1978 self.expression( 1979 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1980 ) 1981 if input_format or output_format 1982 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1983 ), 1984 ) 1985 1986 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1987 field = self._parse_field() 1988 if isinstance(field, exp.Identifier) and not field.quoted: 1989 field = exp.var(field) 1990 1991 return field 1992 1993 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1994 self._match(TokenType.EQ) 1995 self._match(TokenType.ALIAS) 1996 1997 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1998 1999 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2000 properties = [] 2001 while True: 2002 if before: 2003 prop = self._parse_property_before() 2004 else: 2005 prop = self._parse_property() 2006 if not prop: 2007 break 2008 for p in ensure_list(prop): 2009 properties.append(p) 2010 2011 if properties: 2012 return self.expression(exp.Properties, expressions=properties) 2013 2014 return None 2015 2016 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2017 return self.expression( 2018 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2019 ) 2020 2021 def _parse_settings_property(self) -> exp.SettingsProperty: 2022 return self.expression( 2023 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2024 ) 2025 2026 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2027 if self._index >= 2: 2028 pre_volatile_token = self._tokens[self._index - 2] 2029 else: 2030 pre_volatile_token = None 2031 2032 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2033 return exp.VolatileProperty() 2034 2035 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2036 2037 def _parse_retention_period(self) -> exp.Var: 2038 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2039 number = self._parse_number() 2040 number_str = f"{number} " if number else "" 2041 unit = self._parse_var(any_token=True) 2042 return exp.var(f"{number_str}{unit}") 2043 2044 def _parse_system_versioning_property( 2045 self, with_: bool = False 2046 ) -> exp.WithSystemVersioningProperty: 2047 self._match(TokenType.EQ) 2048 prop = self.expression( 2049 exp.WithSystemVersioningProperty, 2050 **{ # type: ignore 2051 "on": True, 2052 "with": with_, 2053 }, 2054 ) 2055 2056 if self._match_text_seq("OFF"): 2057 prop.set("on", False) 2058 return prop 2059 2060 self._match(TokenType.ON) 2061 if self._match(TokenType.L_PAREN): 2062 while self._curr and not self._match(TokenType.R_PAREN): 2063 if self._match_text_seq("HISTORY_TABLE", "="): 2064 prop.set("this", self._parse_table_parts()) 2065 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2066 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2067 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2068 prop.set("retention_period", self._parse_retention_period()) 2069 2070 self._match(TokenType.COMMA) 2071 2072 return prop 2073 2074 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2075 self._match(TokenType.EQ) 2076 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2077 prop = self.expression(exp.DataDeletionProperty, on=on) 2078 2079 if self._match(TokenType.L_PAREN): 2080 while self._curr and not self._match(TokenType.R_PAREN): 2081 if self._match_text_seq("FILTER_COLUMN", "="): 2082 prop.set("filter_column", self._parse_column()) 2083 elif self._match_text_seq("RETENTION_PERIOD", "="): 2084 prop.set("retention_period", self._parse_retention_period()) 2085 2086 self._match(TokenType.COMMA) 2087 2088 return prop 2089 2090 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2091 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2092 prop = self._parse_system_versioning_property(with_=True) 2093 self._match_r_paren() 2094 return prop 2095 2096 if self._match(TokenType.L_PAREN, advance=False): 2097 return self._parse_wrapped_properties() 2098 2099 if self._match_text_seq("JOURNAL"): 2100 return self._parse_withjournaltable() 2101 2102 if self._match_texts(self.VIEW_ATTRIBUTES): 2103 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2104 2105 if self._match_text_seq("DATA"): 2106 return self._parse_withdata(no=False) 2107 elif self._match_text_seq("NO", "DATA"): 2108 return self._parse_withdata(no=True) 2109 2110 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2111 return self._parse_serde_properties(with_=True) 2112 2113 if self._match(TokenType.SCHEMA): 2114 return self.expression( 2115 exp.WithSchemaBindingProperty, 2116 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2117 ) 2118 2119 if not self._next: 2120 return None 2121 2122 return self._parse_withisolatedloading() 2123 2124 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2125 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2126 self._match(TokenType.EQ) 2127 2128 user = self._parse_id_var() 2129 self._match(TokenType.PARAMETER) 2130 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2131 2132 if not user or not host: 2133 return None 2134 2135 return exp.DefinerProperty(this=f"{user}@{host}") 2136 2137 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2138 self._match(TokenType.TABLE) 2139 self._match(TokenType.EQ) 2140 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2141 2142 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2143 return self.expression(exp.LogProperty, no=no) 2144 2145 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2146 return self.expression(exp.JournalProperty, **kwargs) 2147 2148 def _parse_checksum(self) -> exp.ChecksumProperty: 2149 self._match(TokenType.EQ) 2150 2151 on = None 2152 if self._match(TokenType.ON): 2153 on = True 2154 elif self._match_text_seq("OFF"): 2155 on = False 2156 2157 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2158 2159 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2160 return self.expression( 2161 exp.Cluster, 2162 expressions=( 2163 self._parse_wrapped_csv(self._parse_ordered) 2164 if wrapped 2165 else self._parse_csv(self._parse_ordered) 2166 ), 2167 ) 2168 2169 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2170 self._match_text_seq("BY") 2171 2172 self._match_l_paren() 2173 expressions = self._parse_csv(self._parse_column) 2174 self._match_r_paren() 2175 2176 if self._match_text_seq("SORTED", "BY"): 2177 self._match_l_paren() 2178 sorted_by = self._parse_csv(self._parse_ordered) 2179 self._match_r_paren() 2180 else: 2181 sorted_by = None 2182 2183 self._match(TokenType.INTO) 2184 buckets = self._parse_number() 2185 self._match_text_seq("BUCKETS") 2186 2187 return self.expression( 2188 exp.ClusteredByProperty, 2189 expressions=expressions, 2190 sorted_by=sorted_by, 2191 buckets=buckets, 2192 ) 2193 2194 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2195 if not self._match_text_seq("GRANTS"): 2196 self._retreat(self._index - 1) 2197 return None 2198 2199 return self.expression(exp.CopyGrantsProperty) 2200 2201 def _parse_freespace(self) -> exp.FreespaceProperty: 2202 self._match(TokenType.EQ) 2203 return self.expression( 2204 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2205 ) 2206 2207 def _parse_mergeblockratio( 2208 self, no: bool = False, default: bool = False 2209 ) -> exp.MergeBlockRatioProperty: 2210 if self._match(TokenType.EQ): 2211 return self.expression( 2212 exp.MergeBlockRatioProperty, 2213 this=self._parse_number(), 2214 percent=self._match(TokenType.PERCENT), 2215 ) 2216 2217 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2218 2219 def _parse_datablocksize( 2220 self, 2221 default: t.Optional[bool] = None, 2222 minimum: t.Optional[bool] = None, 2223 maximum: t.Optional[bool] = None, 2224 ) -> exp.DataBlocksizeProperty: 2225 self._match(TokenType.EQ) 2226 size = self._parse_number() 2227 2228 units = None 2229 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2230 units = self._prev.text 2231 2232 return self.expression( 2233 exp.DataBlocksizeProperty, 2234 size=size, 2235 units=units, 2236 default=default, 2237 minimum=minimum, 2238 maximum=maximum, 2239 ) 2240 2241 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2242 self._match(TokenType.EQ) 2243 always = self._match_text_seq("ALWAYS") 2244 manual = self._match_text_seq("MANUAL") 2245 never = self._match_text_seq("NEVER") 2246 default = self._match_text_seq("DEFAULT") 2247 2248 autotemp = None 2249 if self._match_text_seq("AUTOTEMP"): 2250 autotemp = self._parse_schema() 2251 2252 return self.expression( 2253 exp.BlockCompressionProperty, 2254 always=always, 2255 manual=manual, 2256 never=never, 2257 default=default, 2258 autotemp=autotemp, 2259 ) 2260 2261 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2262 index = self._index 2263 no = self._match_text_seq("NO") 2264 concurrent = self._match_text_seq("CONCURRENT") 2265 2266 if not self._match_text_seq("ISOLATED", "LOADING"): 2267 self._retreat(index) 2268 return None 2269 2270 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2271 return self.expression( 2272 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2273 ) 2274 2275 def _parse_locking(self) -> exp.LockingProperty: 2276 if self._match(TokenType.TABLE): 2277 kind = "TABLE" 2278 elif self._match(TokenType.VIEW): 2279 kind = "VIEW" 2280 elif self._match(TokenType.ROW): 2281 kind = "ROW" 2282 elif self._match_text_seq("DATABASE"): 2283 kind = "DATABASE" 2284 else: 2285 kind = None 2286 2287 if kind in ("DATABASE", "TABLE", "VIEW"): 2288 this = self._parse_table_parts() 2289 else: 2290 this = None 2291 2292 if self._match(TokenType.FOR): 2293 for_or_in = "FOR" 2294 elif self._match(TokenType.IN): 2295 for_or_in = "IN" 2296 else: 2297 for_or_in = None 2298 2299 if self._match_text_seq("ACCESS"): 2300 lock_type = "ACCESS" 2301 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2302 lock_type = "EXCLUSIVE" 2303 elif self._match_text_seq("SHARE"): 2304 lock_type = "SHARE" 2305 elif self._match_text_seq("READ"): 2306 lock_type = "READ" 2307 elif self._match_text_seq("WRITE"): 2308 lock_type = "WRITE" 2309 elif self._match_text_seq("CHECKSUM"): 2310 lock_type = "CHECKSUM" 2311 else: 2312 lock_type = None 2313 2314 override = self._match_text_seq("OVERRIDE") 2315 2316 return self.expression( 2317 exp.LockingProperty, 2318 this=this, 2319 kind=kind, 2320 for_or_in=for_or_in, 2321 lock_type=lock_type, 2322 override=override, 2323 ) 2324 2325 def _parse_partition_by(self) -> t.List[exp.Expression]: 2326 if self._match(TokenType.PARTITION_BY): 2327 return self._parse_csv(self._parse_assignment) 2328 return [] 2329 2330 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2331 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2332 if self._match_text_seq("MINVALUE"): 2333 return exp.var("MINVALUE") 2334 if self._match_text_seq("MAXVALUE"): 2335 return exp.var("MAXVALUE") 2336 return self._parse_bitwise() 2337 2338 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2339 expression = None 2340 from_expressions = None 2341 to_expressions = None 2342 2343 if self._match(TokenType.IN): 2344 this = self._parse_wrapped_csv(self._parse_bitwise) 2345 elif self._match(TokenType.FROM): 2346 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2347 self._match_text_seq("TO") 2348 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2349 elif self._match_text_seq("WITH", "(", "MODULUS"): 2350 this = self._parse_number() 2351 self._match_text_seq(",", "REMAINDER") 2352 expression = self._parse_number() 2353 self._match_r_paren() 2354 else: 2355 self.raise_error("Failed to parse partition bound spec.") 2356 2357 return self.expression( 2358 exp.PartitionBoundSpec, 2359 this=this, 2360 expression=expression, 2361 from_expressions=from_expressions, 2362 to_expressions=to_expressions, 2363 ) 2364 2365 # https://www.postgresql.org/docs/current/sql-createtable.html 2366 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2367 if not self._match_text_seq("OF"): 2368 self._retreat(self._index - 1) 2369 return None 2370 2371 this = self._parse_table(schema=True) 2372 2373 if self._match(TokenType.DEFAULT): 2374 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2375 elif self._match_text_seq("FOR", "VALUES"): 2376 expression = self._parse_partition_bound_spec() 2377 else: 2378 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2379 2380 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2381 2382 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2383 self._match(TokenType.EQ) 2384 return self.expression( 2385 exp.PartitionedByProperty, 2386 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2387 ) 2388 2389 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2390 if self._match_text_seq("AND", "STATISTICS"): 2391 statistics = True 2392 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2393 statistics = False 2394 else: 2395 statistics = None 2396 2397 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2398 2399 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2400 if self._match_text_seq("SQL"): 2401 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2402 return None 2403 2404 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2405 if self._match_text_seq("SQL", "DATA"): 2406 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2407 return None 2408 2409 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2410 if self._match_text_seq("PRIMARY", "INDEX"): 2411 return exp.NoPrimaryIndexProperty() 2412 if self._match_text_seq("SQL"): 2413 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2414 return None 2415 2416 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2417 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2418 return exp.OnCommitProperty() 2419 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2420 return exp.OnCommitProperty(delete=True) 2421 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2422 2423 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2424 if self._match_text_seq("SQL", "DATA"): 2425 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2426 return None 2427 2428 def _parse_distkey(self) -> exp.DistKeyProperty: 2429 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2430 2431 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2432 table = self._parse_table(schema=True) 2433 2434 options = [] 2435 while self._match_texts(("INCLUDING", "EXCLUDING")): 2436 this = self._prev.text.upper() 2437 2438 id_var = self._parse_id_var() 2439 if not id_var: 2440 return None 2441 2442 options.append( 2443 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2444 ) 2445 2446 return self.expression(exp.LikeProperty, this=table, expressions=options) 2447 2448 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2449 return self.expression( 2450 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2451 ) 2452 2453 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2454 self._match(TokenType.EQ) 2455 return self.expression( 2456 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2457 ) 2458 2459 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2460 self._match_text_seq("WITH", "CONNECTION") 2461 return self.expression( 2462 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2463 ) 2464 2465 def _parse_returns(self) -> exp.ReturnsProperty: 2466 value: t.Optional[exp.Expression] 2467 null = None 2468 is_table = self._match(TokenType.TABLE) 2469 2470 if is_table: 2471 if self._match(TokenType.LT): 2472 value = self.expression( 2473 exp.Schema, 2474 this="TABLE", 2475 expressions=self._parse_csv(self._parse_struct_types), 2476 ) 2477 if not self._match(TokenType.GT): 2478 self.raise_error("Expecting >") 2479 else: 2480 value = self._parse_schema(exp.var("TABLE")) 2481 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2482 null = True 2483 value = None 2484 else: 2485 value = self._parse_types() 2486 2487 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2488 2489 def _parse_describe(self) -> exp.Describe: 2490 kind = self._match_set(self.CREATABLES) and self._prev.text 2491 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2492 if self._match(TokenType.DOT): 2493 style = None 2494 self._retreat(self._index - 2) 2495 this = self._parse_table(schema=True) 2496 properties = self._parse_properties() 2497 expressions = properties.expressions if properties else None 2498 return self.expression( 2499 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2500 ) 2501 2502 def _parse_insert(self) -> exp.Insert: 2503 comments = ensure_list(self._prev_comments) 2504 hint = self._parse_hint() 2505 overwrite = self._match(TokenType.OVERWRITE) 2506 ignore = self._match(TokenType.IGNORE) 2507 local = self._match_text_seq("LOCAL") 2508 alternative = None 2509 is_function = None 2510 2511 if self._match_text_seq("DIRECTORY"): 2512 this: t.Optional[exp.Expression] = self.expression( 2513 exp.Directory, 2514 this=self._parse_var_or_string(), 2515 local=local, 2516 row_format=self._parse_row_format(match_row=True), 2517 ) 2518 else: 2519 if self._match(TokenType.OR): 2520 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2521 2522 self._match(TokenType.INTO) 2523 comments += ensure_list(self._prev_comments) 2524 self._match(TokenType.TABLE) 2525 is_function = self._match(TokenType.FUNCTION) 2526 2527 this = ( 2528 self._parse_table(schema=True, parse_partition=True) 2529 if not is_function 2530 else self._parse_function() 2531 ) 2532 2533 returning = self._parse_returning() 2534 2535 return self.expression( 2536 exp.Insert, 2537 comments=comments, 2538 hint=hint, 2539 is_function=is_function, 2540 this=this, 2541 stored=self._match_text_seq("STORED") and self._parse_stored(), 2542 by_name=self._match_text_seq("BY", "NAME"), 2543 exists=self._parse_exists(), 2544 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2545 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2546 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2547 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2548 conflict=self._parse_on_conflict(), 2549 returning=returning or self._parse_returning(), 2550 overwrite=overwrite, 2551 alternative=alternative, 2552 ignore=ignore, 2553 ) 2554 2555 def _parse_kill(self) -> exp.Kill: 2556 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2557 2558 return self.expression( 2559 exp.Kill, 2560 this=self._parse_primary(), 2561 kind=kind, 2562 ) 2563 2564 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2565 conflict = self._match_text_seq("ON", "CONFLICT") 2566 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2567 2568 if not conflict and not duplicate: 2569 return None 2570 2571 conflict_keys = None 2572 constraint = None 2573 2574 if conflict: 2575 if self._match_text_seq("ON", "CONSTRAINT"): 2576 constraint = self._parse_id_var() 2577 elif self._match(TokenType.L_PAREN): 2578 conflict_keys = self._parse_csv(self._parse_id_var) 2579 self._match_r_paren() 2580 2581 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2582 if self._prev.token_type == TokenType.UPDATE: 2583 self._match(TokenType.SET) 2584 expressions = self._parse_csv(self._parse_equality) 2585 else: 2586 expressions = None 2587 2588 return self.expression( 2589 exp.OnConflict, 2590 duplicate=duplicate, 2591 expressions=expressions, 2592 action=action, 2593 conflict_keys=conflict_keys, 2594 constraint=constraint, 2595 ) 2596 2597 def _parse_returning(self) -> t.Optional[exp.Returning]: 2598 if not self._match(TokenType.RETURNING): 2599 return None 2600 return self.expression( 2601 exp.Returning, 2602 expressions=self._parse_csv(self._parse_expression), 2603 into=self._match(TokenType.INTO) and self._parse_table_part(), 2604 ) 2605 2606 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2607 if not self._match(TokenType.FORMAT): 2608 return None 2609 return self._parse_row_format() 2610 2611 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2612 index = self._index 2613 with_ = with_ or self._match_text_seq("WITH") 2614 2615 if not self._match(TokenType.SERDE_PROPERTIES): 2616 self._retreat(index) 2617 return None 2618 return self.expression( 2619 exp.SerdeProperties, 2620 **{ # type: ignore 2621 "expressions": self._parse_wrapped_properties(), 2622 "with": with_, 2623 }, 2624 ) 2625 2626 def _parse_row_format( 2627 self, match_row: bool = False 2628 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2629 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2630 return None 2631 2632 if self._match_text_seq("SERDE"): 2633 this = self._parse_string() 2634 2635 serde_properties = self._parse_serde_properties() 2636 2637 return self.expression( 2638 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2639 ) 2640 2641 self._match_text_seq("DELIMITED") 2642 2643 kwargs = {} 2644 2645 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2646 kwargs["fields"] = self._parse_string() 2647 if self._match_text_seq("ESCAPED", "BY"): 2648 kwargs["escaped"] = self._parse_string() 2649 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2650 kwargs["collection_items"] = self._parse_string() 2651 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2652 kwargs["map_keys"] = self._parse_string() 2653 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2654 kwargs["lines"] = self._parse_string() 2655 if self._match_text_seq("NULL", "DEFINED", "AS"): 2656 kwargs["null"] = self._parse_string() 2657 2658 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2659 2660 def _parse_load(self) -> exp.LoadData | exp.Command: 2661 if self._match_text_seq("DATA"): 2662 local = self._match_text_seq("LOCAL") 2663 self._match_text_seq("INPATH") 2664 inpath = self._parse_string() 2665 overwrite = self._match(TokenType.OVERWRITE) 2666 self._match_pair(TokenType.INTO, TokenType.TABLE) 2667 2668 return self.expression( 2669 exp.LoadData, 2670 this=self._parse_table(schema=True), 2671 local=local, 2672 overwrite=overwrite, 2673 inpath=inpath, 2674 partition=self._parse_partition(), 2675 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2676 serde=self._match_text_seq("SERDE") and self._parse_string(), 2677 ) 2678 return self._parse_as_command(self._prev) 2679 2680 def _parse_delete(self) -> exp.Delete: 2681 # This handles MySQL's "Multiple-Table Syntax" 2682 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2683 tables = None 2684 comments = self._prev_comments 2685 if not self._match(TokenType.FROM, advance=False): 2686 tables = self._parse_csv(self._parse_table) or None 2687 2688 returning = self._parse_returning() 2689 2690 return self.expression( 2691 exp.Delete, 2692 comments=comments, 2693 tables=tables, 2694 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2695 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2696 where=self._parse_where(), 2697 returning=returning or self._parse_returning(), 2698 limit=self._parse_limit(), 2699 ) 2700 2701 def _parse_update(self) -> exp.Update: 2702 comments = self._prev_comments 2703 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2704 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2705 returning = self._parse_returning() 2706 return self.expression( 2707 exp.Update, 2708 comments=comments, 2709 **{ # type: ignore 2710 "this": this, 2711 "expressions": expressions, 2712 "from": self._parse_from(joins=True), 2713 "where": self._parse_where(), 2714 "returning": returning or self._parse_returning(), 2715 "order": self._parse_order(), 2716 "limit": self._parse_limit(), 2717 }, 2718 ) 2719 2720 def _parse_uncache(self) -> exp.Uncache: 2721 if not self._match(TokenType.TABLE): 2722 self.raise_error("Expecting TABLE after UNCACHE") 2723 2724 return self.expression( 2725 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2726 ) 2727 2728 def _parse_cache(self) -> exp.Cache: 2729 lazy = self._match_text_seq("LAZY") 2730 self._match(TokenType.TABLE) 2731 table = self._parse_table(schema=True) 2732 2733 options = [] 2734 if self._match_text_seq("OPTIONS"): 2735 self._match_l_paren() 2736 k = self._parse_string() 2737 self._match(TokenType.EQ) 2738 v = self._parse_string() 2739 options = [k, v] 2740 self._match_r_paren() 2741 2742 self._match(TokenType.ALIAS) 2743 return self.expression( 2744 exp.Cache, 2745 this=table, 2746 lazy=lazy, 2747 options=options, 2748 expression=self._parse_select(nested=True), 2749 ) 2750 2751 def _parse_partition(self) -> t.Optional[exp.Partition]: 2752 if not self._match(TokenType.PARTITION): 2753 return None 2754 2755 return self.expression( 2756 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2757 ) 2758 2759 def _parse_value(self) -> t.Optional[exp.Tuple]: 2760 if self._match(TokenType.L_PAREN): 2761 expressions = self._parse_csv(self._parse_expression) 2762 self._match_r_paren() 2763 return self.expression(exp.Tuple, expressions=expressions) 2764 2765 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2766 expression = self._parse_expression() 2767 if expression: 2768 return self.expression(exp.Tuple, expressions=[expression]) 2769 return None 2770 2771 def _parse_projections(self) -> t.List[exp.Expression]: 2772 return self._parse_expressions() 2773 2774 def _parse_select( 2775 self, 2776 nested: bool = False, 2777 table: bool = False, 2778 parse_subquery_alias: bool = True, 2779 parse_set_operation: bool = True, 2780 ) -> t.Optional[exp.Expression]: 2781 cte = self._parse_with() 2782 2783 if cte: 2784 this = self._parse_statement() 2785 2786 if not this: 2787 self.raise_error("Failed to parse any statement following CTE") 2788 return cte 2789 2790 if "with" in this.arg_types: 2791 this.set("with", cte) 2792 else: 2793 self.raise_error(f"{this.key} does not support CTE") 2794 this = cte 2795 2796 return this 2797 2798 # duckdb supports leading with FROM x 2799 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2800 2801 if self._match(TokenType.SELECT): 2802 comments = self._prev_comments 2803 2804 hint = self._parse_hint() 2805 2806 if self._next and not self._next.token_type == TokenType.DOT: 2807 all_ = self._match(TokenType.ALL) 2808 distinct = self._match_set(self.DISTINCT_TOKENS) 2809 else: 2810 all_, distinct = None, None 2811 2812 kind = ( 2813 self._match(TokenType.ALIAS) 2814 and self._match_texts(("STRUCT", "VALUE")) 2815 and self._prev.text.upper() 2816 ) 2817 2818 if distinct: 2819 distinct = self.expression( 2820 exp.Distinct, 2821 on=self._parse_value() if self._match(TokenType.ON) else None, 2822 ) 2823 2824 if all_ and distinct: 2825 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2826 2827 limit = self._parse_limit(top=True) 2828 projections = self._parse_projections() 2829 2830 this = self.expression( 2831 exp.Select, 2832 kind=kind, 2833 hint=hint, 2834 distinct=distinct, 2835 expressions=projections, 2836 limit=limit, 2837 ) 2838 this.comments = comments 2839 2840 into = self._parse_into() 2841 if into: 2842 this.set("into", into) 2843 2844 if not from_: 2845 from_ = self._parse_from() 2846 2847 if from_: 2848 this.set("from", from_) 2849 2850 this = self._parse_query_modifiers(this) 2851 elif (table or nested) and self._match(TokenType.L_PAREN): 2852 if self._match(TokenType.PIVOT): 2853 this = self._parse_simplified_pivot() 2854 elif self._match(TokenType.FROM): 2855 this = exp.select("*").from_( 2856 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2857 ) 2858 else: 2859 this = ( 2860 self._parse_table() 2861 if table 2862 else self._parse_select(nested=True, parse_set_operation=False) 2863 ) 2864 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2865 2866 self._match_r_paren() 2867 2868 # We return early here so that the UNION isn't attached to the subquery by the 2869 # following call to _parse_set_operations, but instead becomes the parent node 2870 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2871 elif self._match(TokenType.VALUES, advance=False): 2872 this = self._parse_derived_table_values() 2873 elif from_: 2874 this = exp.select("*").from_(from_.this, copy=False) 2875 elif self._match(TokenType.SUMMARIZE): 2876 table = self._match(TokenType.TABLE) 2877 this = self._parse_select() or self._parse_string() or self._parse_table() 2878 return self.expression(exp.Summarize, this=this, table=table) 2879 elif self._match(TokenType.DESCRIBE): 2880 this = self._parse_describe() 2881 elif self._match_text_seq("STREAM"): 2882 this = self.expression(exp.Stream, this=self._parse_function()) 2883 else: 2884 this = None 2885 2886 return self._parse_set_operations(this) if parse_set_operation else this 2887 2888 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2889 if not skip_with_token and not self._match(TokenType.WITH): 2890 return None 2891 2892 comments = self._prev_comments 2893 recursive = self._match(TokenType.RECURSIVE) 2894 2895 expressions = [] 2896 while True: 2897 expressions.append(self._parse_cte()) 2898 2899 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2900 break 2901 else: 2902 self._match(TokenType.WITH) 2903 2904 return self.expression( 2905 exp.With, comments=comments, expressions=expressions, recursive=recursive 2906 ) 2907 2908 def _parse_cte(self) -> exp.CTE: 2909 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2910 if not alias or not alias.this: 2911 self.raise_error("Expected CTE to have alias") 2912 2913 self._match(TokenType.ALIAS) 2914 comments = self._prev_comments 2915 2916 if self._match_text_seq("NOT", "MATERIALIZED"): 2917 materialized = False 2918 elif self._match_text_seq("MATERIALIZED"): 2919 materialized = True 2920 else: 2921 materialized = None 2922 2923 return self.expression( 2924 exp.CTE, 2925 this=self._parse_wrapped(self._parse_statement), 2926 alias=alias, 2927 materialized=materialized, 2928 comments=comments, 2929 ) 2930 2931 def _parse_table_alias( 2932 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2933 ) -> t.Optional[exp.TableAlias]: 2934 any_token = self._match(TokenType.ALIAS) 2935 alias = ( 2936 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2937 or self._parse_string_as_identifier() 2938 ) 2939 2940 index = self._index 2941 if self._match(TokenType.L_PAREN): 2942 columns = self._parse_csv(self._parse_function_parameter) 2943 self._match_r_paren() if columns else self._retreat(index) 2944 else: 2945 columns = None 2946 2947 if not alias and not columns: 2948 return None 2949 2950 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2951 2952 # We bubble up comments from the Identifier to the TableAlias 2953 if isinstance(alias, exp.Identifier): 2954 table_alias.add_comments(alias.pop_comments()) 2955 2956 return table_alias 2957 2958 def _parse_subquery( 2959 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2960 ) -> t.Optional[exp.Subquery]: 2961 if not this: 2962 return None 2963 2964 return self.expression( 2965 exp.Subquery, 2966 this=this, 2967 pivots=self._parse_pivots(), 2968 alias=self._parse_table_alias() if parse_alias else None, 2969 ) 2970 2971 def _implicit_unnests_to_explicit(self, this: E) -> E: 2972 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2973 2974 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2975 for i, join in enumerate(this.args.get("joins") or []): 2976 table = join.this 2977 normalized_table = table.copy() 2978 normalized_table.meta["maybe_column"] = True 2979 normalized_table = _norm(normalized_table, dialect=self.dialect) 2980 2981 if isinstance(table, exp.Table) and not join.args.get("on"): 2982 if normalized_table.parts[0].name in refs: 2983 table_as_column = table.to_column() 2984 unnest = exp.Unnest(expressions=[table_as_column]) 2985 2986 # Table.to_column creates a parent Alias node that we want to convert to 2987 # a TableAlias and attach to the Unnest, so it matches the parser's output 2988 if isinstance(table.args.get("alias"), exp.TableAlias): 2989 table_as_column.replace(table_as_column.this) 2990 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2991 2992 table.replace(unnest) 2993 2994 refs.add(normalized_table.alias_or_name) 2995 2996 return this 2997 2998 def _parse_query_modifiers( 2999 self, this: t.Optional[exp.Expression] 3000 ) -> t.Optional[exp.Expression]: 3001 if isinstance(this, (exp.Query, exp.Table)): 3002 for join in self._parse_joins(): 3003 this.append("joins", join) 3004 for lateral in iter(self._parse_lateral, None): 3005 this.append("laterals", lateral) 3006 3007 while True: 3008 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3009 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3010 key, expression = parser(self) 3011 3012 if expression: 3013 this.set(key, expression) 3014 if key == "limit": 3015 offset = expression.args.pop("offset", None) 3016 3017 if offset: 3018 offset = exp.Offset(expression=offset) 3019 this.set("offset", offset) 3020 3021 limit_by_expressions = expression.expressions 3022 expression.set("expressions", None) 3023 offset.set("expressions", limit_by_expressions) 3024 continue 3025 break 3026 3027 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3028 this = self._implicit_unnests_to_explicit(this) 3029 3030 return this 3031 3032 def _parse_hint(self) -> t.Optional[exp.Hint]: 3033 if self._match(TokenType.HINT): 3034 hints = [] 3035 for hint in iter( 3036 lambda: self._parse_csv( 3037 lambda: self._parse_function() or self._parse_var(upper=True) 3038 ), 3039 [], 3040 ): 3041 hints.extend(hint) 3042 3043 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3044 self.raise_error("Expected */ after HINT") 3045 3046 return self.expression(exp.Hint, expressions=hints) 3047 3048 return None 3049 3050 def _parse_into(self) -> t.Optional[exp.Into]: 3051 if not self._match(TokenType.INTO): 3052 return None 3053 3054 temp = self._match(TokenType.TEMPORARY) 3055 unlogged = self._match_text_seq("UNLOGGED") 3056 self._match(TokenType.TABLE) 3057 3058 return self.expression( 3059 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3060 ) 3061 3062 def _parse_from( 3063 self, joins: bool = False, skip_from_token: bool = False 3064 ) -> t.Optional[exp.From]: 3065 if not skip_from_token and not self._match(TokenType.FROM): 3066 return None 3067 3068 return self.expression( 3069 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3070 ) 3071 3072 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3073 return self.expression( 3074 exp.MatchRecognizeMeasure, 3075 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3076 this=self._parse_expression(), 3077 ) 3078 3079 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3080 if not self._match(TokenType.MATCH_RECOGNIZE): 3081 return None 3082 3083 self._match_l_paren() 3084 3085 partition = self._parse_partition_by() 3086 order = self._parse_order() 3087 3088 measures = ( 3089 self._parse_csv(self._parse_match_recognize_measure) 3090 if self._match_text_seq("MEASURES") 3091 else None 3092 ) 3093 3094 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3095 rows = exp.var("ONE ROW PER MATCH") 3096 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3097 text = "ALL ROWS PER MATCH" 3098 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3099 text += " SHOW EMPTY MATCHES" 3100 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3101 text += " OMIT EMPTY MATCHES" 3102 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3103 text += " WITH UNMATCHED ROWS" 3104 rows = exp.var(text) 3105 else: 3106 rows = None 3107 3108 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3109 text = "AFTER MATCH SKIP" 3110 if self._match_text_seq("PAST", "LAST", "ROW"): 3111 text += " PAST LAST ROW" 3112 elif self._match_text_seq("TO", "NEXT", "ROW"): 3113 text += " TO NEXT ROW" 3114 elif self._match_text_seq("TO", "FIRST"): 3115 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3116 elif self._match_text_seq("TO", "LAST"): 3117 text += f" TO LAST {self._advance_any().text}" # type: ignore 3118 after = exp.var(text) 3119 else: 3120 after = None 3121 3122 if self._match_text_seq("PATTERN"): 3123 self._match_l_paren() 3124 3125 if not self._curr: 3126 self.raise_error("Expecting )", self._curr) 3127 3128 paren = 1 3129 start = self._curr 3130 3131 while self._curr and paren > 0: 3132 if self._curr.token_type == TokenType.L_PAREN: 3133 paren += 1 3134 if self._curr.token_type == TokenType.R_PAREN: 3135 paren -= 1 3136 3137 end = self._prev 3138 self._advance() 3139 3140 if paren > 0: 3141 self.raise_error("Expecting )", self._curr) 3142 3143 pattern = exp.var(self._find_sql(start, end)) 3144 else: 3145 pattern = None 3146 3147 define = ( 3148 self._parse_csv(self._parse_name_as_expression) 3149 if self._match_text_seq("DEFINE") 3150 else None 3151 ) 3152 3153 self._match_r_paren() 3154 3155 return self.expression( 3156 exp.MatchRecognize, 3157 partition_by=partition, 3158 order=order, 3159 measures=measures, 3160 rows=rows, 3161 after=after, 3162 pattern=pattern, 3163 define=define, 3164 alias=self._parse_table_alias(), 3165 ) 3166 3167 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3168 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3169 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3170 cross_apply = False 3171 3172 if cross_apply is not None: 3173 this = self._parse_select(table=True) 3174 view = None 3175 outer = None 3176 elif self._match(TokenType.LATERAL): 3177 this = self._parse_select(table=True) 3178 view = self._match(TokenType.VIEW) 3179 outer = self._match(TokenType.OUTER) 3180 else: 3181 return None 3182 3183 if not this: 3184 this = ( 3185 self._parse_unnest() 3186 or self._parse_function() 3187 or self._parse_id_var(any_token=False) 3188 ) 3189 3190 while self._match(TokenType.DOT): 3191 this = exp.Dot( 3192 this=this, 3193 expression=self._parse_function() or self._parse_id_var(any_token=False), 3194 ) 3195 3196 if view: 3197 table = self._parse_id_var(any_token=False) 3198 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3199 table_alias: t.Optional[exp.TableAlias] = self.expression( 3200 exp.TableAlias, this=table, columns=columns 3201 ) 3202 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3203 # We move the alias from the lateral's child node to the lateral itself 3204 table_alias = this.args["alias"].pop() 3205 else: 3206 table_alias = self._parse_table_alias() 3207 3208 return self.expression( 3209 exp.Lateral, 3210 this=this, 3211 view=view, 3212 outer=outer, 3213 alias=table_alias, 3214 cross_apply=cross_apply, 3215 ) 3216 3217 def _parse_join_parts( 3218 self, 3219 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3220 return ( 3221 self._match_set(self.JOIN_METHODS) and self._prev, 3222 self._match_set(self.JOIN_SIDES) and self._prev, 3223 self._match_set(self.JOIN_KINDS) and self._prev, 3224 ) 3225 3226 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3227 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3228 this = self._parse_column() 3229 if isinstance(this, exp.Column): 3230 return this.this 3231 return this 3232 3233 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3234 3235 def _parse_join( 3236 self, skip_join_token: bool = False, parse_bracket: bool = False 3237 ) -> t.Optional[exp.Join]: 3238 if self._match(TokenType.COMMA): 3239 return self.expression(exp.Join, this=self._parse_table()) 3240 3241 index = self._index 3242 method, side, kind = self._parse_join_parts() 3243 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3244 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3245 3246 if not skip_join_token and not join: 3247 self._retreat(index) 3248 kind = None 3249 method = None 3250 side = None 3251 3252 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3253 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3254 3255 if not skip_join_token and not join and not outer_apply and not cross_apply: 3256 return None 3257 3258 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3259 3260 if method: 3261 kwargs["method"] = method.text 3262 if side: 3263 kwargs["side"] = side.text 3264 if kind: 3265 kwargs["kind"] = kind.text 3266 if hint: 3267 kwargs["hint"] = hint 3268 3269 if self._match(TokenType.MATCH_CONDITION): 3270 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3271 3272 if self._match(TokenType.ON): 3273 kwargs["on"] = self._parse_assignment() 3274 elif self._match(TokenType.USING): 3275 kwargs["using"] = self._parse_using_identifiers() 3276 elif ( 3277 not (outer_apply or cross_apply) 3278 and not isinstance(kwargs["this"], exp.Unnest) 3279 and not (kind and kind.token_type == TokenType.CROSS) 3280 ): 3281 index = self._index 3282 joins: t.Optional[list] = list(self._parse_joins()) 3283 3284 if joins and self._match(TokenType.ON): 3285 kwargs["on"] = self._parse_assignment() 3286 elif joins and self._match(TokenType.USING): 3287 kwargs["using"] = self._parse_using_identifiers() 3288 else: 3289 joins = None 3290 self._retreat(index) 3291 3292 kwargs["this"].set("joins", joins if joins else None) 3293 3294 comments = [c for token in (method, side, kind) if token for c in token.comments] 3295 return self.expression(exp.Join, comments=comments, **kwargs) 3296 3297 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3298 this = self._parse_assignment() 3299 3300 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3301 return this 3302 3303 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3304 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3305 3306 return this 3307 3308 def _parse_index_params(self) -> exp.IndexParameters: 3309 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3310 3311 if self._match(TokenType.L_PAREN, advance=False): 3312 columns = self._parse_wrapped_csv(self._parse_with_operator) 3313 else: 3314 columns = None 3315 3316 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3317 partition_by = self._parse_partition_by() 3318 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3319 tablespace = ( 3320 self._parse_var(any_token=True) 3321 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3322 else None 3323 ) 3324 where = self._parse_where() 3325 3326 on = self._parse_field() if self._match(TokenType.ON) else None 3327 3328 return self.expression( 3329 exp.IndexParameters, 3330 using=using, 3331 columns=columns, 3332 include=include, 3333 partition_by=partition_by, 3334 where=where, 3335 with_storage=with_storage, 3336 tablespace=tablespace, 3337 on=on, 3338 ) 3339 3340 def _parse_index( 3341 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3342 ) -> t.Optional[exp.Index]: 3343 if index or anonymous: 3344 unique = None 3345 primary = None 3346 amp = None 3347 3348 self._match(TokenType.ON) 3349 self._match(TokenType.TABLE) # hive 3350 table = self._parse_table_parts(schema=True) 3351 else: 3352 unique = self._match(TokenType.UNIQUE) 3353 primary = self._match_text_seq("PRIMARY") 3354 amp = self._match_text_seq("AMP") 3355 3356 if not self._match(TokenType.INDEX): 3357 return None 3358 3359 index = self._parse_id_var() 3360 table = None 3361 3362 params = self._parse_index_params() 3363 3364 return self.expression( 3365 exp.Index, 3366 this=index, 3367 table=table, 3368 unique=unique, 3369 primary=primary, 3370 amp=amp, 3371 params=params, 3372 ) 3373 3374 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3375 hints: t.List[exp.Expression] = [] 3376 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3377 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3378 hints.append( 3379 self.expression( 3380 exp.WithTableHint, 3381 expressions=self._parse_csv( 3382 lambda: self._parse_function() or self._parse_var(any_token=True) 3383 ), 3384 ) 3385 ) 3386 self._match_r_paren() 3387 else: 3388 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3389 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3390 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3391 3392 self._match_set((TokenType.INDEX, TokenType.KEY)) 3393 if self._match(TokenType.FOR): 3394 hint.set("target", self._advance_any() and self._prev.text.upper()) 3395 3396 hint.set("expressions", self._parse_wrapped_id_vars()) 3397 hints.append(hint) 3398 3399 return hints or None 3400 3401 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3402 return ( 3403 (not schema and self._parse_function(optional_parens=False)) 3404 or self._parse_id_var(any_token=False) 3405 or self._parse_string_as_identifier() 3406 or self._parse_placeholder() 3407 ) 3408 3409 def _parse_table_parts( 3410 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3411 ) -> exp.Table: 3412 catalog = None 3413 db = None 3414 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3415 3416 while self._match(TokenType.DOT): 3417 if catalog: 3418 # This allows nesting the table in arbitrarily many dot expressions if needed 3419 table = self.expression( 3420 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3421 ) 3422 else: 3423 catalog = db 3424 db = table 3425 # "" used for tsql FROM a..b case 3426 table = self._parse_table_part(schema=schema) or "" 3427 3428 if ( 3429 wildcard 3430 and self._is_connected() 3431 and (isinstance(table, exp.Identifier) or not table) 3432 and self._match(TokenType.STAR) 3433 ): 3434 if isinstance(table, exp.Identifier): 3435 table.args["this"] += "*" 3436 else: 3437 table = exp.Identifier(this="*") 3438 3439 # We bubble up comments from the Identifier to the Table 3440 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3441 3442 if is_db_reference: 3443 catalog = db 3444 db = table 3445 table = None 3446 3447 if not table and not is_db_reference: 3448 self.raise_error(f"Expected table name but got {self._curr}") 3449 if not db and is_db_reference: 3450 self.raise_error(f"Expected database name but got {self._curr}") 3451 3452 table = self.expression( 3453 exp.Table, 3454 comments=comments, 3455 this=table, 3456 db=db, 3457 catalog=catalog, 3458 ) 3459 3460 changes = self._parse_changes() 3461 if changes: 3462 table.set("changes", changes) 3463 3464 at_before = self._parse_historical_data() 3465 if at_before: 3466 table.set("when", at_before) 3467 3468 pivots = self._parse_pivots() 3469 if pivots: 3470 table.set("pivots", pivots) 3471 3472 return table 3473 3474 def _parse_table( 3475 self, 3476 schema: bool = False, 3477 joins: bool = False, 3478 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3479 parse_bracket: bool = False, 3480 is_db_reference: bool = False, 3481 parse_partition: bool = False, 3482 ) -> t.Optional[exp.Expression]: 3483 lateral = self._parse_lateral() 3484 if lateral: 3485 return lateral 3486 3487 unnest = self._parse_unnest() 3488 if unnest: 3489 return unnest 3490 3491 values = self._parse_derived_table_values() 3492 if values: 3493 return values 3494 3495 subquery = self._parse_select(table=True) 3496 if subquery: 3497 if not subquery.args.get("pivots"): 3498 subquery.set("pivots", self._parse_pivots()) 3499 return subquery 3500 3501 bracket = parse_bracket and self._parse_bracket(None) 3502 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3503 3504 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3505 self._parse_table 3506 ) 3507 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3508 3509 only = self._match(TokenType.ONLY) 3510 3511 this = t.cast( 3512 exp.Expression, 3513 bracket 3514 or rows_from 3515 or self._parse_bracket( 3516 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3517 ), 3518 ) 3519 3520 if only: 3521 this.set("only", only) 3522 3523 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3524 self._match_text_seq("*") 3525 3526 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3527 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3528 this.set("partition", self._parse_partition()) 3529 3530 if schema: 3531 return self._parse_schema(this=this) 3532 3533 version = self._parse_version() 3534 3535 if version: 3536 this.set("version", version) 3537 3538 if self.dialect.ALIAS_POST_TABLESAMPLE: 3539 table_sample = self._parse_table_sample() 3540 3541 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3542 if alias: 3543 this.set("alias", alias) 3544 3545 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3546 return self.expression( 3547 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3548 ) 3549 3550 this.set("hints", self._parse_table_hints()) 3551 3552 if not this.args.get("pivots"): 3553 this.set("pivots", self._parse_pivots()) 3554 3555 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3556 table_sample = self._parse_table_sample() 3557 3558 if table_sample: 3559 table_sample.set("this", this) 3560 this = table_sample 3561 3562 if joins: 3563 for join in self._parse_joins(): 3564 this.append("joins", join) 3565 3566 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3567 this.set("ordinality", True) 3568 this.set("alias", self._parse_table_alias()) 3569 3570 return this 3571 3572 def _parse_version(self) -> t.Optional[exp.Version]: 3573 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3574 this = "TIMESTAMP" 3575 elif self._match(TokenType.VERSION_SNAPSHOT): 3576 this = "VERSION" 3577 else: 3578 return None 3579 3580 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3581 kind = self._prev.text.upper() 3582 start = self._parse_bitwise() 3583 self._match_texts(("TO", "AND")) 3584 end = self._parse_bitwise() 3585 expression: t.Optional[exp.Expression] = self.expression( 3586 exp.Tuple, expressions=[start, end] 3587 ) 3588 elif self._match_text_seq("CONTAINED", "IN"): 3589 kind = "CONTAINED IN" 3590 expression = self.expression( 3591 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3592 ) 3593 elif self._match(TokenType.ALL): 3594 kind = "ALL" 3595 expression = None 3596 else: 3597 self._match_text_seq("AS", "OF") 3598 kind = "AS OF" 3599 expression = self._parse_type() 3600 3601 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3602 3603 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3604 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3605 index = self._index 3606 historical_data = None 3607 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3608 this = self._prev.text.upper() 3609 kind = ( 3610 self._match(TokenType.L_PAREN) 3611 and self._match_texts(self.HISTORICAL_DATA_KIND) 3612 and self._prev.text.upper() 3613 ) 3614 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3615 3616 if expression: 3617 self._match_r_paren() 3618 historical_data = self.expression( 3619 exp.HistoricalData, this=this, kind=kind, expression=expression 3620 ) 3621 else: 3622 self._retreat(index) 3623 3624 return historical_data 3625 3626 def _parse_changes(self) -> t.Optional[exp.Changes]: 3627 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3628 return None 3629 3630 information = self._parse_var(any_token=True) 3631 self._match_r_paren() 3632 3633 return self.expression( 3634 exp.Changes, 3635 information=information, 3636 at_before=self._parse_historical_data(), 3637 end=self._parse_historical_data(), 3638 ) 3639 3640 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3641 if not self._match(TokenType.UNNEST): 3642 return None 3643 3644 expressions = self._parse_wrapped_csv(self._parse_equality) 3645 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3646 3647 alias = self._parse_table_alias() if with_alias else None 3648 3649 if alias: 3650 if self.dialect.UNNEST_COLUMN_ONLY: 3651 if alias.args.get("columns"): 3652 self.raise_error("Unexpected extra column alias in unnest.") 3653 3654 alias.set("columns", [alias.this]) 3655 alias.set("this", None) 3656 3657 columns = alias.args.get("columns") or [] 3658 if offset and len(expressions) < len(columns): 3659 offset = columns.pop() 3660 3661 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3662 self._match(TokenType.ALIAS) 3663 offset = self._parse_id_var( 3664 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3665 ) or exp.to_identifier("offset") 3666 3667 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3668 3669 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3670 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3671 if not is_derived and not ( 3672 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3673 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3674 ): 3675 return None 3676 3677 expressions = self._parse_csv(self._parse_value) 3678 alias = self._parse_table_alias() 3679 3680 if is_derived: 3681 self._match_r_paren() 3682 3683 return self.expression( 3684 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3685 ) 3686 3687 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3688 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3689 as_modifier and self._match_text_seq("USING", "SAMPLE") 3690 ): 3691 return None 3692 3693 bucket_numerator = None 3694 bucket_denominator = None 3695 bucket_field = None 3696 percent = None 3697 size = None 3698 seed = None 3699 3700 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3701 matched_l_paren = self._match(TokenType.L_PAREN) 3702 3703 if self.TABLESAMPLE_CSV: 3704 num = None 3705 expressions = self._parse_csv(self._parse_primary) 3706 else: 3707 expressions = None 3708 num = ( 3709 self._parse_factor() 3710 if self._match(TokenType.NUMBER, advance=False) 3711 else self._parse_primary() or self._parse_placeholder() 3712 ) 3713 3714 if self._match_text_seq("BUCKET"): 3715 bucket_numerator = self._parse_number() 3716 self._match_text_seq("OUT", "OF") 3717 bucket_denominator = bucket_denominator = self._parse_number() 3718 self._match(TokenType.ON) 3719 bucket_field = self._parse_field() 3720 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3721 percent = num 3722 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3723 size = num 3724 else: 3725 percent = num 3726 3727 if matched_l_paren: 3728 self._match_r_paren() 3729 3730 if self._match(TokenType.L_PAREN): 3731 method = self._parse_var(upper=True) 3732 seed = self._match(TokenType.COMMA) and self._parse_number() 3733 self._match_r_paren() 3734 elif self._match_texts(("SEED", "REPEATABLE")): 3735 seed = self._parse_wrapped(self._parse_number) 3736 3737 if not method and self.DEFAULT_SAMPLING_METHOD: 3738 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3739 3740 return self.expression( 3741 exp.TableSample, 3742 expressions=expressions, 3743 method=method, 3744 bucket_numerator=bucket_numerator, 3745 bucket_denominator=bucket_denominator, 3746 bucket_field=bucket_field, 3747 percent=percent, 3748 size=size, 3749 seed=seed, 3750 ) 3751 3752 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3753 return list(iter(self._parse_pivot, None)) or None 3754 3755 def _parse_joins(self) -> t.Iterator[exp.Join]: 3756 return iter(self._parse_join, None) 3757 3758 # https://duckdb.org/docs/sql/statements/pivot 3759 def _parse_simplified_pivot(self) -> exp.Pivot: 3760 def _parse_on() -> t.Optional[exp.Expression]: 3761 this = self._parse_bitwise() 3762 return self._parse_in(this) if self._match(TokenType.IN) else this 3763 3764 this = self._parse_table() 3765 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3766 using = self._match(TokenType.USING) and self._parse_csv( 3767 lambda: self._parse_alias(self._parse_function()) 3768 ) 3769 group = self._parse_group() 3770 return self.expression( 3771 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3772 ) 3773 3774 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3775 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3776 this = self._parse_select_or_expression() 3777 3778 self._match(TokenType.ALIAS) 3779 alias = self._parse_field() 3780 if alias: 3781 return self.expression(exp.PivotAlias, this=this, alias=alias) 3782 3783 return this 3784 3785 value = self._parse_column() 3786 3787 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3788 self.raise_error("Expecting IN (") 3789 3790 if self._match(TokenType.ANY): 3791 expr: exp.PivotAny | exp.In = self.expression(exp.PivotAny, this=self._parse_order()) 3792 else: 3793 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3794 expr = self.expression(exp.In, this=value, expressions=aliased_expressions) 3795 3796 self._match_r_paren() 3797 return expr 3798 3799 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3800 index = self._index 3801 include_nulls = None 3802 3803 if self._match(TokenType.PIVOT): 3804 unpivot = False 3805 elif self._match(TokenType.UNPIVOT): 3806 unpivot = True 3807 3808 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3809 if self._match_text_seq("INCLUDE", "NULLS"): 3810 include_nulls = True 3811 elif self._match_text_seq("EXCLUDE", "NULLS"): 3812 include_nulls = False 3813 else: 3814 return None 3815 3816 expressions = [] 3817 3818 if not self._match(TokenType.L_PAREN): 3819 self._retreat(index) 3820 return None 3821 3822 if unpivot: 3823 expressions = self._parse_csv(self._parse_column) 3824 else: 3825 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3826 3827 if not expressions: 3828 self.raise_error("Failed to parse PIVOT's aggregation list") 3829 3830 if not self._match(TokenType.FOR): 3831 self.raise_error("Expecting FOR") 3832 3833 field = self._parse_pivot_in() 3834 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3835 self._parse_bitwise 3836 ) 3837 3838 self._match_r_paren() 3839 3840 pivot = self.expression( 3841 exp.Pivot, 3842 expressions=expressions, 3843 field=field, 3844 unpivot=unpivot, 3845 include_nulls=include_nulls, 3846 default_on_null=default_on_null, 3847 ) 3848 3849 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3850 pivot.set("alias", self._parse_table_alias()) 3851 3852 if not unpivot: 3853 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3854 3855 columns: t.List[exp.Expression] = [] 3856 for fld in pivot.args["field"].expressions: 3857 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3858 for name in names: 3859 if self.PREFIXED_PIVOT_COLUMNS: 3860 name = f"{name}_{field_name}" if name else field_name 3861 else: 3862 name = f"{field_name}_{name}" if name else field_name 3863 3864 columns.append(exp.to_identifier(name)) 3865 3866 pivot.set("columns", columns) 3867 3868 return pivot 3869 3870 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3871 return [agg.alias for agg in aggregations] 3872 3873 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3874 if not skip_where_token and not self._match(TokenType.PREWHERE): 3875 return None 3876 3877 return self.expression( 3878 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3879 ) 3880 3881 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3882 if not skip_where_token and not self._match(TokenType.WHERE): 3883 return None 3884 3885 return self.expression( 3886 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3887 ) 3888 3889 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3890 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3891 return None 3892 3893 elements: t.Dict[str, t.Any] = defaultdict(list) 3894 3895 if self._match(TokenType.ALL): 3896 elements["all"] = True 3897 elif self._match(TokenType.DISTINCT): 3898 elements["all"] = False 3899 3900 while True: 3901 expressions = self._parse_csv( 3902 lambda: None 3903 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 3904 else self._parse_assignment() 3905 ) 3906 if expressions: 3907 elements["expressions"].extend(expressions) 3908 3909 grouping_sets = self._parse_grouping_sets() 3910 if grouping_sets: 3911 elements["grouping_sets"].extend(grouping_sets) 3912 3913 rollup = None 3914 cube = None 3915 totals = None 3916 3917 index = self._index 3918 with_ = self._match(TokenType.WITH) 3919 if self._match(TokenType.ROLLUP): 3920 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3921 elements["rollup"].extend(ensure_list(rollup)) 3922 3923 if self._match(TokenType.CUBE): 3924 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3925 elements["cube"].extend(ensure_list(cube)) 3926 3927 if self._match_text_seq("TOTALS"): 3928 totals = True 3929 elements["totals"] = True # type: ignore 3930 3931 if not (grouping_sets or rollup or cube or totals): 3932 if with_: 3933 self._retreat(index) 3934 break 3935 3936 return self.expression(exp.Group, **elements) # type: ignore 3937 3938 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3939 if not self._match(TokenType.GROUPING_SETS): 3940 return None 3941 3942 return self._parse_wrapped_csv(self._parse_grouping_set) 3943 3944 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3945 if self._match(TokenType.L_PAREN): 3946 grouping_set = self._parse_csv(self._parse_column) 3947 self._match_r_paren() 3948 return self.expression(exp.Tuple, expressions=grouping_set) 3949 3950 return self._parse_column() 3951 3952 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3953 if not skip_having_token and not self._match(TokenType.HAVING): 3954 return None 3955 return self.expression(exp.Having, this=self._parse_assignment()) 3956 3957 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3958 if not self._match(TokenType.QUALIFY): 3959 return None 3960 return self.expression(exp.Qualify, this=self._parse_assignment()) 3961 3962 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3963 if skip_start_token: 3964 start = None 3965 elif self._match(TokenType.START_WITH): 3966 start = self._parse_assignment() 3967 else: 3968 return None 3969 3970 self._match(TokenType.CONNECT_BY) 3971 nocycle = self._match_text_seq("NOCYCLE") 3972 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3973 exp.Prior, this=self._parse_bitwise() 3974 ) 3975 connect = self._parse_assignment() 3976 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3977 3978 if not start and self._match(TokenType.START_WITH): 3979 start = self._parse_assignment() 3980 3981 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3982 3983 def _parse_name_as_expression(self) -> exp.Alias: 3984 return self.expression( 3985 exp.Alias, 3986 alias=self._parse_id_var(any_token=True), 3987 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3988 ) 3989 3990 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3991 if self._match_text_seq("INTERPOLATE"): 3992 return self._parse_wrapped_csv(self._parse_name_as_expression) 3993 return None 3994 3995 def _parse_order( 3996 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3997 ) -> t.Optional[exp.Expression]: 3998 siblings = None 3999 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4000 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4001 return this 4002 4003 siblings = True 4004 4005 return self.expression( 4006 exp.Order, 4007 this=this, 4008 expressions=self._parse_csv(self._parse_ordered), 4009 interpolate=self._parse_interpolate(), 4010 siblings=siblings, 4011 ) 4012 4013 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4014 if not self._match(token): 4015 return None 4016 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4017 4018 def _parse_ordered( 4019 self, parse_method: t.Optional[t.Callable] = None 4020 ) -> t.Optional[exp.Ordered]: 4021 this = parse_method() if parse_method else self._parse_assignment() 4022 if not this: 4023 return None 4024 4025 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4026 this = exp.var("ALL") 4027 4028 asc = self._match(TokenType.ASC) 4029 desc = self._match(TokenType.DESC) or (asc and False) 4030 4031 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4032 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4033 4034 nulls_first = is_nulls_first or False 4035 explicitly_null_ordered = is_nulls_first or is_nulls_last 4036 4037 if ( 4038 not explicitly_null_ordered 4039 and ( 4040 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4041 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4042 ) 4043 and self.dialect.NULL_ORDERING != "nulls_are_last" 4044 ): 4045 nulls_first = True 4046 4047 if self._match_text_seq("WITH", "FILL"): 4048 with_fill = self.expression( 4049 exp.WithFill, 4050 **{ # type: ignore 4051 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4052 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4053 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4054 }, 4055 ) 4056 else: 4057 with_fill = None 4058 4059 return self.expression( 4060 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4061 ) 4062 4063 def _parse_limit( 4064 self, 4065 this: t.Optional[exp.Expression] = None, 4066 top: bool = False, 4067 skip_limit_token: bool = False, 4068 ) -> t.Optional[exp.Expression]: 4069 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4070 comments = self._prev_comments 4071 if top: 4072 limit_paren = self._match(TokenType.L_PAREN) 4073 expression = self._parse_term() if limit_paren else self._parse_number() 4074 4075 if limit_paren: 4076 self._match_r_paren() 4077 else: 4078 expression = self._parse_term() 4079 4080 if self._match(TokenType.COMMA): 4081 offset = expression 4082 expression = self._parse_term() 4083 else: 4084 offset = None 4085 4086 limit_exp = self.expression( 4087 exp.Limit, 4088 this=this, 4089 expression=expression, 4090 offset=offset, 4091 comments=comments, 4092 expressions=self._parse_limit_by(), 4093 ) 4094 4095 return limit_exp 4096 4097 if self._match(TokenType.FETCH): 4098 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4099 direction = self._prev.text.upper() if direction else "FIRST" 4100 4101 count = self._parse_field(tokens=self.FETCH_TOKENS) 4102 percent = self._match(TokenType.PERCENT) 4103 4104 self._match_set((TokenType.ROW, TokenType.ROWS)) 4105 4106 only = self._match_text_seq("ONLY") 4107 with_ties = self._match_text_seq("WITH", "TIES") 4108 4109 if only and with_ties: 4110 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4111 4112 return self.expression( 4113 exp.Fetch, 4114 direction=direction, 4115 count=count, 4116 percent=percent, 4117 with_ties=with_ties, 4118 ) 4119 4120 return this 4121 4122 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4123 if not self._match(TokenType.OFFSET): 4124 return this 4125 4126 count = self._parse_term() 4127 self._match_set((TokenType.ROW, TokenType.ROWS)) 4128 4129 return self.expression( 4130 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4131 ) 4132 4133 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4134 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4135 4136 def _parse_locks(self) -> t.List[exp.Lock]: 4137 locks = [] 4138 while True: 4139 if self._match_text_seq("FOR", "UPDATE"): 4140 update = True 4141 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4142 "LOCK", "IN", "SHARE", "MODE" 4143 ): 4144 update = False 4145 else: 4146 break 4147 4148 expressions = None 4149 if self._match_text_seq("OF"): 4150 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4151 4152 wait: t.Optional[bool | exp.Expression] = None 4153 if self._match_text_seq("NOWAIT"): 4154 wait = True 4155 elif self._match_text_seq("WAIT"): 4156 wait = self._parse_primary() 4157 elif self._match_text_seq("SKIP", "LOCKED"): 4158 wait = False 4159 4160 locks.append( 4161 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4162 ) 4163 4164 return locks 4165 4166 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4167 while this and self._match_set(self.SET_OPERATIONS): 4168 token_type = self._prev.token_type 4169 4170 if token_type == TokenType.UNION: 4171 operation: t.Type[exp.SetOperation] = exp.Union 4172 elif token_type == TokenType.EXCEPT: 4173 operation = exp.Except 4174 else: 4175 operation = exp.Intersect 4176 4177 comments = self._prev.comments 4178 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4179 by_name = self._match_text_seq("BY", "NAME") 4180 expression = self._parse_select(nested=True, parse_set_operation=False) 4181 4182 this = self.expression( 4183 operation, 4184 comments=comments, 4185 this=this, 4186 distinct=distinct, 4187 by_name=by_name, 4188 expression=expression, 4189 ) 4190 4191 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4192 expression = this.expression 4193 4194 if expression: 4195 for arg in self.SET_OP_MODIFIERS: 4196 expr = expression.args.get(arg) 4197 if expr: 4198 this.set(arg, expr.pop()) 4199 4200 return this 4201 4202 def _parse_expression(self) -> t.Optional[exp.Expression]: 4203 return self._parse_alias(self._parse_assignment()) 4204 4205 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4206 this = self._parse_disjunction() 4207 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4208 # This allows us to parse <non-identifier token> := <expr> 4209 this = exp.column( 4210 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4211 ) 4212 4213 while self._match_set(self.ASSIGNMENT): 4214 this = self.expression( 4215 self.ASSIGNMENT[self._prev.token_type], 4216 this=this, 4217 comments=self._prev_comments, 4218 expression=self._parse_assignment(), 4219 ) 4220 4221 return this 4222 4223 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4224 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4225 4226 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4227 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4228 4229 def _parse_equality(self) -> t.Optional[exp.Expression]: 4230 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4231 4232 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4233 return self._parse_tokens(self._parse_range, self.COMPARISON) 4234 4235 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4236 this = this or self._parse_bitwise() 4237 negate = self._match(TokenType.NOT) 4238 4239 if self._match_set(self.RANGE_PARSERS): 4240 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4241 if not expression: 4242 return this 4243 4244 this = expression 4245 elif self._match(TokenType.ISNULL): 4246 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4247 4248 # Postgres supports ISNULL and NOTNULL for conditions. 4249 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4250 if self._match(TokenType.NOTNULL): 4251 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4252 this = self.expression(exp.Not, this=this) 4253 4254 if negate: 4255 this = self._negate_range(this) 4256 4257 if self._match(TokenType.IS): 4258 this = self._parse_is(this) 4259 4260 return this 4261 4262 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4263 if not this: 4264 return this 4265 4266 return self.expression(exp.Not, this=this) 4267 4268 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4269 index = self._index - 1 4270 negate = self._match(TokenType.NOT) 4271 4272 if self._match_text_seq("DISTINCT", "FROM"): 4273 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4274 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4275 4276 expression = self._parse_null() or self._parse_boolean() 4277 if not expression: 4278 self._retreat(index) 4279 return None 4280 4281 this = self.expression(exp.Is, this=this, expression=expression) 4282 return self.expression(exp.Not, this=this) if negate else this 4283 4284 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4285 unnest = self._parse_unnest(with_alias=False) 4286 if unnest: 4287 this = self.expression(exp.In, this=this, unnest=unnest) 4288 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4289 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4290 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4291 4292 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4293 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4294 else: 4295 this = self.expression(exp.In, this=this, expressions=expressions) 4296 4297 if matched_l_paren: 4298 self._match_r_paren(this) 4299 elif not self._match(TokenType.R_BRACKET, expression=this): 4300 self.raise_error("Expecting ]") 4301 else: 4302 this = self.expression(exp.In, this=this, field=self._parse_field()) 4303 4304 return this 4305 4306 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4307 low = self._parse_bitwise() 4308 self._match(TokenType.AND) 4309 high = self._parse_bitwise() 4310 return self.expression(exp.Between, this=this, low=low, high=high) 4311 4312 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4313 if not self._match(TokenType.ESCAPE): 4314 return this 4315 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4316 4317 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4318 index = self._index 4319 4320 if not self._match(TokenType.INTERVAL) and match_interval: 4321 return None 4322 4323 if self._match(TokenType.STRING, advance=False): 4324 this = self._parse_primary() 4325 else: 4326 this = self._parse_term() 4327 4328 if not this or ( 4329 isinstance(this, exp.Column) 4330 and not this.table 4331 and not this.this.quoted 4332 and this.name.upper() == "IS" 4333 ): 4334 self._retreat(index) 4335 return None 4336 4337 unit = self._parse_function() or ( 4338 not self._match(TokenType.ALIAS, advance=False) 4339 and self._parse_var(any_token=True, upper=True) 4340 ) 4341 4342 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4343 # each INTERVAL expression into this canonical form so it's easy to transpile 4344 if this and this.is_number: 4345 this = exp.Literal.string(this.to_py()) 4346 elif this and this.is_string: 4347 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4348 if len(parts) == 1: 4349 if unit: 4350 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4351 self._retreat(self._index - 1) 4352 4353 this = exp.Literal.string(parts[0][0]) 4354 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4355 4356 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4357 unit = self.expression( 4358 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4359 ) 4360 4361 interval = self.expression(exp.Interval, this=this, unit=unit) 4362 4363 index = self._index 4364 self._match(TokenType.PLUS) 4365 4366 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4367 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4368 return self.expression( 4369 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4370 ) 4371 4372 self._retreat(index) 4373 return interval 4374 4375 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4376 this = self._parse_term() 4377 4378 while True: 4379 if self._match_set(self.BITWISE): 4380 this = self.expression( 4381 self.BITWISE[self._prev.token_type], 4382 this=this, 4383 expression=self._parse_term(), 4384 ) 4385 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4386 this = self.expression( 4387 exp.DPipe, 4388 this=this, 4389 expression=self._parse_term(), 4390 safe=not self.dialect.STRICT_STRING_CONCAT, 4391 ) 4392 elif self._match(TokenType.DQMARK): 4393 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4394 elif self._match_pair(TokenType.LT, TokenType.LT): 4395 this = self.expression( 4396 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4397 ) 4398 elif self._match_pair(TokenType.GT, TokenType.GT): 4399 this = self.expression( 4400 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4401 ) 4402 else: 4403 break 4404 4405 return this 4406 4407 def _parse_term(self) -> t.Optional[exp.Expression]: 4408 this = self._parse_factor() 4409 4410 while self._match_set(self.TERM): 4411 klass = self.TERM[self._prev.token_type] 4412 comments = self._prev_comments 4413 expression = self._parse_factor() 4414 4415 this = self.expression(klass, this=this, comments=comments, expression=expression) 4416 4417 if isinstance(this, exp.Collate): 4418 expr = this.expression 4419 4420 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4421 # fallback to Identifier / Var 4422 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4423 ident = expr.this 4424 if isinstance(ident, exp.Identifier): 4425 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4426 4427 return this 4428 4429 def _parse_factor(self) -> t.Optional[exp.Expression]: 4430 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4431 this = parse_method() 4432 4433 while self._match_set(self.FACTOR): 4434 klass = self.FACTOR[self._prev.token_type] 4435 comments = self._prev_comments 4436 expression = parse_method() 4437 4438 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4439 self._retreat(self._index - 1) 4440 return this 4441 4442 this = self.expression(klass, this=this, comments=comments, expression=expression) 4443 4444 if isinstance(this, exp.Div): 4445 this.args["typed"] = self.dialect.TYPED_DIVISION 4446 this.args["safe"] = self.dialect.SAFE_DIVISION 4447 4448 return this 4449 4450 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4451 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4452 4453 def _parse_unary(self) -> t.Optional[exp.Expression]: 4454 if self._match_set(self.UNARY_PARSERS): 4455 return self.UNARY_PARSERS[self._prev.token_type](self) 4456 return self._parse_at_time_zone(self._parse_type()) 4457 4458 def _parse_type( 4459 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4460 ) -> t.Optional[exp.Expression]: 4461 interval = parse_interval and self._parse_interval() 4462 if interval: 4463 return interval 4464 4465 index = self._index 4466 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4467 4468 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4469 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4470 if isinstance(data_type, exp.Cast): 4471 # This constructor can contain ops directly after it, for instance struct unnesting: 4472 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4473 return self._parse_column_ops(data_type) 4474 4475 if data_type: 4476 index2 = self._index 4477 this = self._parse_primary() 4478 4479 if isinstance(this, exp.Literal): 4480 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4481 if parser: 4482 return parser(self, this, data_type) 4483 4484 return self.expression(exp.Cast, this=this, to=data_type) 4485 4486 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4487 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4488 # 4489 # If the index difference here is greater than 1, that means the parser itself must have 4490 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4491 # 4492 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4493 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4494 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4495 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4496 # 4497 # In these cases, we don't really want to return the converted type, but instead retreat 4498 # and try to parse a Column or Identifier in the section below. 4499 if data_type.expressions and index2 - index > 1: 4500 self._retreat(index2) 4501 return self._parse_column_ops(data_type) 4502 4503 self._retreat(index) 4504 4505 if fallback_to_identifier: 4506 return self._parse_id_var() 4507 4508 this = self._parse_column() 4509 return this and self._parse_column_ops(this) 4510 4511 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4512 this = self._parse_type() 4513 if not this: 4514 return None 4515 4516 if isinstance(this, exp.Column) and not this.table: 4517 this = exp.var(this.name.upper()) 4518 4519 return self.expression( 4520 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4521 ) 4522 4523 def _parse_types( 4524 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4525 ) -> t.Optional[exp.Expression]: 4526 index = self._index 4527 4528 this: t.Optional[exp.Expression] = None 4529 prefix = self._match_text_seq("SYSUDTLIB", ".") 4530 4531 if not self._match_set(self.TYPE_TOKENS): 4532 identifier = allow_identifiers and self._parse_id_var( 4533 any_token=False, tokens=(TokenType.VAR,) 4534 ) 4535 if isinstance(identifier, exp.Identifier): 4536 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4537 4538 if len(tokens) != 1: 4539 self.raise_error("Unexpected identifier", self._prev) 4540 4541 if tokens[0].token_type in self.TYPE_TOKENS: 4542 self._prev = tokens[0] 4543 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4544 type_name = identifier.name 4545 4546 while self._match(TokenType.DOT): 4547 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4548 4549 this = exp.DataType.build(type_name, udt=True) 4550 else: 4551 self._retreat(self._index - 1) 4552 return None 4553 else: 4554 return None 4555 4556 type_token = self._prev.token_type 4557 4558 if type_token == TokenType.PSEUDO_TYPE: 4559 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4560 4561 if type_token == TokenType.OBJECT_IDENTIFIER: 4562 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4563 4564 # https://materialize.com/docs/sql/types/map/ 4565 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4566 key_type = self._parse_types( 4567 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4568 ) 4569 if not self._match(TokenType.FARROW): 4570 self._retreat(index) 4571 return None 4572 4573 value_type = self._parse_types( 4574 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4575 ) 4576 if not self._match(TokenType.R_BRACKET): 4577 self._retreat(index) 4578 return None 4579 4580 return exp.DataType( 4581 this=exp.DataType.Type.MAP, 4582 expressions=[key_type, value_type], 4583 nested=True, 4584 prefix=prefix, 4585 ) 4586 4587 nested = type_token in self.NESTED_TYPE_TOKENS 4588 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4589 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4590 expressions = None 4591 maybe_func = False 4592 4593 if self._match(TokenType.L_PAREN): 4594 if is_struct: 4595 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4596 elif nested: 4597 expressions = self._parse_csv( 4598 lambda: self._parse_types( 4599 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4600 ) 4601 ) 4602 elif type_token in self.ENUM_TYPE_TOKENS: 4603 expressions = self._parse_csv(self._parse_equality) 4604 elif is_aggregate: 4605 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4606 any_token=False, tokens=(TokenType.VAR,) 4607 ) 4608 if not func_or_ident or not self._match(TokenType.COMMA): 4609 return None 4610 expressions = self._parse_csv( 4611 lambda: self._parse_types( 4612 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4613 ) 4614 ) 4615 expressions.insert(0, func_or_ident) 4616 else: 4617 expressions = self._parse_csv(self._parse_type_size) 4618 4619 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4620 if type_token == TokenType.VECTOR and len(expressions) == 2: 4621 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4622 4623 if not expressions or not self._match(TokenType.R_PAREN): 4624 self._retreat(index) 4625 return None 4626 4627 maybe_func = True 4628 4629 values: t.Optional[t.List[exp.Expression]] = None 4630 4631 if nested and self._match(TokenType.LT): 4632 if is_struct: 4633 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4634 else: 4635 expressions = self._parse_csv( 4636 lambda: self._parse_types( 4637 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4638 ) 4639 ) 4640 4641 if not self._match(TokenType.GT): 4642 self.raise_error("Expecting >") 4643 4644 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4645 values = self._parse_csv(self._parse_assignment) 4646 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4647 4648 if type_token in self.TIMESTAMPS: 4649 if self._match_text_seq("WITH", "TIME", "ZONE"): 4650 maybe_func = False 4651 tz_type = ( 4652 exp.DataType.Type.TIMETZ 4653 if type_token in self.TIMES 4654 else exp.DataType.Type.TIMESTAMPTZ 4655 ) 4656 this = exp.DataType(this=tz_type, expressions=expressions) 4657 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4658 maybe_func = False 4659 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4660 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4661 maybe_func = False 4662 elif type_token == TokenType.INTERVAL: 4663 unit = self._parse_var(upper=True) 4664 if unit: 4665 if self._match_text_seq("TO"): 4666 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4667 4668 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4669 else: 4670 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4671 4672 if maybe_func and check_func: 4673 index2 = self._index 4674 peek = self._parse_string() 4675 4676 if not peek: 4677 self._retreat(index) 4678 return None 4679 4680 self._retreat(index2) 4681 4682 if not this: 4683 if self._match_text_seq("UNSIGNED"): 4684 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4685 if not unsigned_type_token: 4686 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4687 4688 type_token = unsigned_type_token or type_token 4689 4690 this = exp.DataType( 4691 this=exp.DataType.Type[type_token.value], 4692 expressions=expressions, 4693 nested=nested, 4694 prefix=prefix, 4695 ) 4696 4697 # Empty arrays/structs are allowed 4698 if values is not None: 4699 cls = exp.Struct if is_struct else exp.Array 4700 this = exp.cast(cls(expressions=values), this, copy=False) 4701 4702 elif expressions: 4703 this.set("expressions", expressions) 4704 4705 # https://materialize.com/docs/sql/types/list/#type-name 4706 while self._match(TokenType.LIST): 4707 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4708 4709 index = self._index 4710 4711 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4712 matched_array = self._match(TokenType.ARRAY) 4713 4714 while self._curr: 4715 datatype_token = self._prev.token_type 4716 matched_l_bracket = self._match(TokenType.L_BRACKET) 4717 if not matched_l_bracket and not matched_array: 4718 break 4719 4720 matched_array = False 4721 values = self._parse_csv(self._parse_assignment) or None 4722 if ( 4723 values 4724 and not schema 4725 and ( 4726 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4727 ) 4728 ): 4729 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4730 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4731 self._retreat(index) 4732 break 4733 4734 this = exp.DataType( 4735 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4736 ) 4737 self._match(TokenType.R_BRACKET) 4738 4739 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4740 converter = self.TYPE_CONVERTERS.get(this.this) 4741 if converter: 4742 this = converter(t.cast(exp.DataType, this)) 4743 4744 return this 4745 4746 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4747 index = self._index 4748 4749 if ( 4750 self._curr 4751 and self._next 4752 and self._curr.token_type in self.TYPE_TOKENS 4753 and self._next.token_type in self.TYPE_TOKENS 4754 ): 4755 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4756 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4757 this = self._parse_id_var() 4758 else: 4759 this = ( 4760 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4761 or self._parse_id_var() 4762 ) 4763 4764 self._match(TokenType.COLON) 4765 4766 if ( 4767 type_required 4768 and not isinstance(this, exp.DataType) 4769 and not self._match_set(self.TYPE_TOKENS, advance=False) 4770 ): 4771 self._retreat(index) 4772 return self._parse_types() 4773 4774 return self._parse_column_def(this) 4775 4776 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4777 if not self._match_text_seq("AT", "TIME", "ZONE"): 4778 return this 4779 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4780 4781 def _parse_column(self) -> t.Optional[exp.Expression]: 4782 this = self._parse_column_reference() 4783 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4784 4785 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4786 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4787 4788 return column 4789 4790 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4791 this = self._parse_field() 4792 if ( 4793 not this 4794 and self._match(TokenType.VALUES, advance=False) 4795 and self.VALUES_FOLLOWED_BY_PAREN 4796 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4797 ): 4798 this = self._parse_id_var() 4799 4800 if isinstance(this, exp.Identifier): 4801 # We bubble up comments from the Identifier to the Column 4802 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4803 4804 return this 4805 4806 def _parse_colon_as_variant_extract( 4807 self, this: t.Optional[exp.Expression] 4808 ) -> t.Optional[exp.Expression]: 4809 casts = [] 4810 json_path = [] 4811 4812 while self._match(TokenType.COLON): 4813 start_index = self._index 4814 4815 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4816 path = self._parse_column_ops( 4817 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4818 ) 4819 4820 # The cast :: operator has a lower precedence than the extraction operator :, so 4821 # we rearrange the AST appropriately to avoid casting the JSON path 4822 while isinstance(path, exp.Cast): 4823 casts.append(path.to) 4824 path = path.this 4825 4826 if casts: 4827 dcolon_offset = next( 4828 i 4829 for i, t in enumerate(self._tokens[start_index:]) 4830 if t.token_type == TokenType.DCOLON 4831 ) 4832 end_token = self._tokens[start_index + dcolon_offset - 1] 4833 else: 4834 end_token = self._prev 4835 4836 if path: 4837 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4838 4839 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4840 # Databricks transforms it back to the colon/dot notation 4841 if json_path: 4842 this = self.expression( 4843 exp.JSONExtract, 4844 this=this, 4845 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4846 variant_extract=True, 4847 ) 4848 4849 while casts: 4850 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4851 4852 return this 4853 4854 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4855 return self._parse_types() 4856 4857 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4858 this = self._parse_bracket(this) 4859 4860 while self._match_set(self.COLUMN_OPERATORS): 4861 op_token = self._prev.token_type 4862 op = self.COLUMN_OPERATORS.get(op_token) 4863 4864 if op_token == TokenType.DCOLON: 4865 field = self._parse_dcolon() 4866 if not field: 4867 self.raise_error("Expected type") 4868 elif op and self._curr: 4869 field = self._parse_column_reference() 4870 else: 4871 field = self._parse_field(any_token=True, anonymous_func=True) 4872 4873 if isinstance(field, exp.Func) and this: 4874 # bigquery allows function calls like x.y.count(...) 4875 # SAFE.SUBSTR(...) 4876 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4877 this = exp.replace_tree( 4878 this, 4879 lambda n: ( 4880 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4881 if n.table 4882 else n.this 4883 ) 4884 if isinstance(n, exp.Column) 4885 else n, 4886 ) 4887 4888 if op: 4889 this = op(self, this, field) 4890 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4891 this = self.expression( 4892 exp.Column, 4893 this=field, 4894 table=this.this, 4895 db=this.args.get("table"), 4896 catalog=this.args.get("db"), 4897 ) 4898 else: 4899 this = self.expression(exp.Dot, this=this, expression=field) 4900 4901 this = self._parse_bracket(this) 4902 4903 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4904 4905 def _parse_primary(self) -> t.Optional[exp.Expression]: 4906 if self._match_set(self.PRIMARY_PARSERS): 4907 token_type = self._prev.token_type 4908 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4909 4910 if token_type == TokenType.STRING: 4911 expressions = [primary] 4912 while self._match(TokenType.STRING): 4913 expressions.append(exp.Literal.string(self._prev.text)) 4914 4915 if len(expressions) > 1: 4916 return self.expression(exp.Concat, expressions=expressions) 4917 4918 return primary 4919 4920 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4921 return exp.Literal.number(f"0.{self._prev.text}") 4922 4923 if self._match(TokenType.L_PAREN): 4924 comments = self._prev_comments 4925 query = self._parse_select() 4926 4927 if query: 4928 expressions = [query] 4929 else: 4930 expressions = self._parse_expressions() 4931 4932 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4933 4934 if not this and self._match(TokenType.R_PAREN, advance=False): 4935 this = self.expression(exp.Tuple) 4936 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4937 this = self._parse_subquery(this=this, parse_alias=False) 4938 elif isinstance(this, exp.Subquery): 4939 this = self._parse_subquery( 4940 this=self._parse_set_operations(this), parse_alias=False 4941 ) 4942 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4943 this = self.expression(exp.Tuple, expressions=expressions) 4944 else: 4945 this = self.expression(exp.Paren, this=this) 4946 4947 if this: 4948 this.add_comments(comments) 4949 4950 self._match_r_paren(expression=this) 4951 return this 4952 4953 return None 4954 4955 def _parse_field( 4956 self, 4957 any_token: bool = False, 4958 tokens: t.Optional[t.Collection[TokenType]] = None, 4959 anonymous_func: bool = False, 4960 ) -> t.Optional[exp.Expression]: 4961 if anonymous_func: 4962 field = ( 4963 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4964 or self._parse_primary() 4965 ) 4966 else: 4967 field = self._parse_primary() or self._parse_function( 4968 anonymous=anonymous_func, any_token=any_token 4969 ) 4970 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4971 4972 def _parse_function( 4973 self, 4974 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4975 anonymous: bool = False, 4976 optional_parens: bool = True, 4977 any_token: bool = False, 4978 ) -> t.Optional[exp.Expression]: 4979 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4980 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4981 fn_syntax = False 4982 if ( 4983 self._match(TokenType.L_BRACE, advance=False) 4984 and self._next 4985 and self._next.text.upper() == "FN" 4986 ): 4987 self._advance(2) 4988 fn_syntax = True 4989 4990 func = self._parse_function_call( 4991 functions=functions, 4992 anonymous=anonymous, 4993 optional_parens=optional_parens, 4994 any_token=any_token, 4995 ) 4996 4997 if fn_syntax: 4998 self._match(TokenType.R_BRACE) 4999 5000 return func 5001 5002 def _parse_function_call( 5003 self, 5004 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5005 anonymous: bool = False, 5006 optional_parens: bool = True, 5007 any_token: bool = False, 5008 ) -> t.Optional[exp.Expression]: 5009 if not self._curr: 5010 return None 5011 5012 comments = self._curr.comments 5013 token_type = self._curr.token_type 5014 this = self._curr.text 5015 upper = this.upper() 5016 5017 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5018 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5019 self._advance() 5020 return self._parse_window(parser(self)) 5021 5022 if not self._next or self._next.token_type != TokenType.L_PAREN: 5023 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5024 self._advance() 5025 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5026 5027 return None 5028 5029 if any_token: 5030 if token_type in self.RESERVED_TOKENS: 5031 return None 5032 elif token_type not in self.FUNC_TOKENS: 5033 return None 5034 5035 self._advance(2) 5036 5037 parser = self.FUNCTION_PARSERS.get(upper) 5038 if parser and not anonymous: 5039 this = parser(self) 5040 else: 5041 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5042 5043 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5044 this = self.expression(subquery_predicate, this=self._parse_select()) 5045 self._match_r_paren() 5046 return this 5047 5048 if functions is None: 5049 functions = self.FUNCTIONS 5050 5051 function = functions.get(upper) 5052 5053 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5054 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5055 5056 if alias: 5057 args = self._kv_to_prop_eq(args) 5058 5059 if function and not anonymous: 5060 if "dialect" in function.__code__.co_varnames: 5061 func = function(args, dialect=self.dialect) 5062 else: 5063 func = function(args) 5064 5065 func = self.validate_expression(func, args) 5066 if not self.dialect.NORMALIZE_FUNCTIONS: 5067 func.meta["name"] = this 5068 5069 this = func 5070 else: 5071 if token_type == TokenType.IDENTIFIER: 5072 this = exp.Identifier(this=this, quoted=True) 5073 this = self.expression(exp.Anonymous, this=this, expressions=args) 5074 5075 if isinstance(this, exp.Expression): 5076 this.add_comments(comments) 5077 5078 self._match_r_paren(this) 5079 return self._parse_window(this) 5080 5081 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5082 transformed = [] 5083 5084 for e in expressions: 5085 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5086 if isinstance(e, exp.Alias): 5087 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5088 5089 if not isinstance(e, exp.PropertyEQ): 5090 e = self.expression( 5091 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5092 ) 5093 5094 if isinstance(e.this, exp.Column): 5095 e.this.replace(e.this.this) 5096 5097 transformed.append(e) 5098 5099 return transformed 5100 5101 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5102 return self._parse_column_def(self._parse_id_var()) 5103 5104 def _parse_user_defined_function( 5105 self, kind: t.Optional[TokenType] = None 5106 ) -> t.Optional[exp.Expression]: 5107 this = self._parse_id_var() 5108 5109 while self._match(TokenType.DOT): 5110 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5111 5112 if not self._match(TokenType.L_PAREN): 5113 return this 5114 5115 expressions = self._parse_csv(self._parse_function_parameter) 5116 self._match_r_paren() 5117 return self.expression( 5118 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5119 ) 5120 5121 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5122 literal = self._parse_primary() 5123 if literal: 5124 return self.expression(exp.Introducer, this=token.text, expression=literal) 5125 5126 return self.expression(exp.Identifier, this=token.text) 5127 5128 def _parse_session_parameter(self) -> exp.SessionParameter: 5129 kind = None 5130 this = self._parse_id_var() or self._parse_primary() 5131 5132 if this and self._match(TokenType.DOT): 5133 kind = this.name 5134 this = self._parse_var() or self._parse_primary() 5135 5136 return self.expression(exp.SessionParameter, this=this, kind=kind) 5137 5138 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5139 return self._parse_id_var() 5140 5141 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5142 index = self._index 5143 5144 if self._match(TokenType.L_PAREN): 5145 expressions = t.cast( 5146 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5147 ) 5148 5149 if not self._match(TokenType.R_PAREN): 5150 self._retreat(index) 5151 else: 5152 expressions = [self._parse_lambda_arg()] 5153 5154 if self._match_set(self.LAMBDAS): 5155 return self.LAMBDAS[self._prev.token_type](self, expressions) 5156 5157 self._retreat(index) 5158 5159 this: t.Optional[exp.Expression] 5160 5161 if self._match(TokenType.DISTINCT): 5162 this = self.expression( 5163 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5164 ) 5165 else: 5166 this = self._parse_select_or_expression(alias=alias) 5167 5168 return self._parse_limit( 5169 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5170 ) 5171 5172 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5173 index = self._index 5174 if not self._match(TokenType.L_PAREN): 5175 return this 5176 5177 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5178 # expr can be of both types 5179 if self._match_set(self.SELECT_START_TOKENS): 5180 self._retreat(index) 5181 return this 5182 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5183 self._match_r_paren() 5184 return self.expression(exp.Schema, this=this, expressions=args) 5185 5186 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5187 return self._parse_column_def(self._parse_field(any_token=True)) 5188 5189 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5190 # column defs are not really columns, they're identifiers 5191 if isinstance(this, exp.Column): 5192 this = this.this 5193 5194 kind = self._parse_types(schema=True) 5195 5196 if self._match_text_seq("FOR", "ORDINALITY"): 5197 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5198 5199 constraints: t.List[exp.Expression] = [] 5200 5201 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5202 ("ALIAS", "MATERIALIZED") 5203 ): 5204 persisted = self._prev.text.upper() == "MATERIALIZED" 5205 constraints.append( 5206 self.expression( 5207 exp.ComputedColumnConstraint, 5208 this=self._parse_assignment(), 5209 persisted=persisted or self._match_text_seq("PERSISTED"), 5210 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5211 ) 5212 ) 5213 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5214 self._match(TokenType.ALIAS) 5215 constraints.append( 5216 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5217 ) 5218 5219 while True: 5220 constraint = self._parse_column_constraint() 5221 if not constraint: 5222 break 5223 constraints.append(constraint) 5224 5225 if not kind and not constraints: 5226 return this 5227 5228 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5229 5230 def _parse_auto_increment( 5231 self, 5232 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5233 start = None 5234 increment = None 5235 5236 if self._match(TokenType.L_PAREN, advance=False): 5237 args = self._parse_wrapped_csv(self._parse_bitwise) 5238 start = seq_get(args, 0) 5239 increment = seq_get(args, 1) 5240 elif self._match_text_seq("START"): 5241 start = self._parse_bitwise() 5242 self._match_text_seq("INCREMENT") 5243 increment = self._parse_bitwise() 5244 5245 if start and increment: 5246 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5247 5248 return exp.AutoIncrementColumnConstraint() 5249 5250 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5251 if not self._match_text_seq("REFRESH"): 5252 self._retreat(self._index - 1) 5253 return None 5254 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5255 5256 def _parse_compress(self) -> exp.CompressColumnConstraint: 5257 if self._match(TokenType.L_PAREN, advance=False): 5258 return self.expression( 5259 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5260 ) 5261 5262 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5263 5264 def _parse_generated_as_identity( 5265 self, 5266 ) -> ( 5267 exp.GeneratedAsIdentityColumnConstraint 5268 | exp.ComputedColumnConstraint 5269 | exp.GeneratedAsRowColumnConstraint 5270 ): 5271 if self._match_text_seq("BY", "DEFAULT"): 5272 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5273 this = self.expression( 5274 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5275 ) 5276 else: 5277 self._match_text_seq("ALWAYS") 5278 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5279 5280 self._match(TokenType.ALIAS) 5281 5282 if self._match_text_seq("ROW"): 5283 start = self._match_text_seq("START") 5284 if not start: 5285 self._match(TokenType.END) 5286 hidden = self._match_text_seq("HIDDEN") 5287 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5288 5289 identity = self._match_text_seq("IDENTITY") 5290 5291 if self._match(TokenType.L_PAREN): 5292 if self._match(TokenType.START_WITH): 5293 this.set("start", self._parse_bitwise()) 5294 if self._match_text_seq("INCREMENT", "BY"): 5295 this.set("increment", self._parse_bitwise()) 5296 if self._match_text_seq("MINVALUE"): 5297 this.set("minvalue", self._parse_bitwise()) 5298 if self._match_text_seq("MAXVALUE"): 5299 this.set("maxvalue", self._parse_bitwise()) 5300 5301 if self._match_text_seq("CYCLE"): 5302 this.set("cycle", True) 5303 elif self._match_text_seq("NO", "CYCLE"): 5304 this.set("cycle", False) 5305 5306 if not identity: 5307 this.set("expression", self._parse_range()) 5308 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5309 args = self._parse_csv(self._parse_bitwise) 5310 this.set("start", seq_get(args, 0)) 5311 this.set("increment", seq_get(args, 1)) 5312 5313 self._match_r_paren() 5314 5315 return this 5316 5317 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5318 self._match_text_seq("LENGTH") 5319 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5320 5321 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5322 if self._match_text_seq("NULL"): 5323 return self.expression(exp.NotNullColumnConstraint) 5324 if self._match_text_seq("CASESPECIFIC"): 5325 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5326 if self._match_text_seq("FOR", "REPLICATION"): 5327 return self.expression(exp.NotForReplicationColumnConstraint) 5328 return None 5329 5330 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5331 if self._match(TokenType.CONSTRAINT): 5332 this = self._parse_id_var() 5333 else: 5334 this = None 5335 5336 if self._match_texts(self.CONSTRAINT_PARSERS): 5337 return self.expression( 5338 exp.ColumnConstraint, 5339 this=this, 5340 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5341 ) 5342 5343 return this 5344 5345 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5346 if not self._match(TokenType.CONSTRAINT): 5347 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5348 5349 return self.expression( 5350 exp.Constraint, 5351 this=self._parse_id_var(), 5352 expressions=self._parse_unnamed_constraints(), 5353 ) 5354 5355 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5356 constraints = [] 5357 while True: 5358 constraint = self._parse_unnamed_constraint() or self._parse_function() 5359 if not constraint: 5360 break 5361 constraints.append(constraint) 5362 5363 return constraints 5364 5365 def _parse_unnamed_constraint( 5366 self, constraints: t.Optional[t.Collection[str]] = None 5367 ) -> t.Optional[exp.Expression]: 5368 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5369 constraints or self.CONSTRAINT_PARSERS 5370 ): 5371 return None 5372 5373 constraint = self._prev.text.upper() 5374 if constraint not in self.CONSTRAINT_PARSERS: 5375 self.raise_error(f"No parser found for schema constraint {constraint}.") 5376 5377 return self.CONSTRAINT_PARSERS[constraint](self) 5378 5379 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5380 return self._parse_id_var(any_token=False) 5381 5382 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5383 self._match_text_seq("KEY") 5384 return self.expression( 5385 exp.UniqueColumnConstraint, 5386 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5387 this=self._parse_schema(self._parse_unique_key()), 5388 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5389 on_conflict=self._parse_on_conflict(), 5390 ) 5391 5392 def _parse_key_constraint_options(self) -> t.List[str]: 5393 options = [] 5394 while True: 5395 if not self._curr: 5396 break 5397 5398 if self._match(TokenType.ON): 5399 action = None 5400 on = self._advance_any() and self._prev.text 5401 5402 if self._match_text_seq("NO", "ACTION"): 5403 action = "NO ACTION" 5404 elif self._match_text_seq("CASCADE"): 5405 action = "CASCADE" 5406 elif self._match_text_seq("RESTRICT"): 5407 action = "RESTRICT" 5408 elif self._match_pair(TokenType.SET, TokenType.NULL): 5409 action = "SET NULL" 5410 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5411 action = "SET DEFAULT" 5412 else: 5413 self.raise_error("Invalid key constraint") 5414 5415 options.append(f"ON {on} {action}") 5416 else: 5417 var = self._parse_var_from_options( 5418 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5419 ) 5420 if not var: 5421 break 5422 options.append(var.name) 5423 5424 return options 5425 5426 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5427 if match and not self._match(TokenType.REFERENCES): 5428 return None 5429 5430 expressions = None 5431 this = self._parse_table(schema=True) 5432 options = self._parse_key_constraint_options() 5433 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5434 5435 def _parse_foreign_key(self) -> exp.ForeignKey: 5436 expressions = self._parse_wrapped_id_vars() 5437 reference = self._parse_references() 5438 options = {} 5439 5440 while self._match(TokenType.ON): 5441 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5442 self.raise_error("Expected DELETE or UPDATE") 5443 5444 kind = self._prev.text.lower() 5445 5446 if self._match_text_seq("NO", "ACTION"): 5447 action = "NO ACTION" 5448 elif self._match(TokenType.SET): 5449 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5450 action = "SET " + self._prev.text.upper() 5451 else: 5452 self._advance() 5453 action = self._prev.text.upper() 5454 5455 options[kind] = action 5456 5457 return self.expression( 5458 exp.ForeignKey, 5459 expressions=expressions, 5460 reference=reference, 5461 **options, # type: ignore 5462 ) 5463 5464 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5465 return self._parse_field() 5466 5467 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5468 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5469 self._retreat(self._index - 1) 5470 return None 5471 5472 id_vars = self._parse_wrapped_id_vars() 5473 return self.expression( 5474 exp.PeriodForSystemTimeConstraint, 5475 this=seq_get(id_vars, 0), 5476 expression=seq_get(id_vars, 1), 5477 ) 5478 5479 def _parse_primary_key( 5480 self, wrapped_optional: bool = False, in_props: bool = False 5481 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5482 desc = ( 5483 self._match_set((TokenType.ASC, TokenType.DESC)) 5484 and self._prev.token_type == TokenType.DESC 5485 ) 5486 5487 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5488 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5489 5490 expressions = self._parse_wrapped_csv( 5491 self._parse_primary_key_part, optional=wrapped_optional 5492 ) 5493 options = self._parse_key_constraint_options() 5494 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5495 5496 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5497 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5498 5499 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5500 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5501 return this 5502 5503 bracket_kind = self._prev.token_type 5504 expressions = self._parse_csv( 5505 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5506 ) 5507 5508 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5509 self.raise_error("Expected ]") 5510 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5511 self.raise_error("Expected }") 5512 5513 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5514 if bracket_kind == TokenType.L_BRACE: 5515 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5516 elif not this: 5517 this = build_array_constructor( 5518 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5519 ) 5520 else: 5521 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5522 if constructor_type: 5523 return build_array_constructor( 5524 constructor_type, 5525 args=expressions, 5526 bracket_kind=bracket_kind, 5527 dialect=self.dialect, 5528 ) 5529 5530 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5531 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5532 5533 self._add_comments(this) 5534 return self._parse_bracket(this) 5535 5536 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5537 if self._match(TokenType.COLON): 5538 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5539 return this 5540 5541 def _parse_case(self) -> t.Optional[exp.Expression]: 5542 ifs = [] 5543 default = None 5544 5545 comments = self._prev_comments 5546 expression = self._parse_assignment() 5547 5548 while self._match(TokenType.WHEN): 5549 this = self._parse_assignment() 5550 self._match(TokenType.THEN) 5551 then = self._parse_assignment() 5552 ifs.append(self.expression(exp.If, this=this, true=then)) 5553 5554 if self._match(TokenType.ELSE): 5555 default = self._parse_assignment() 5556 5557 if not self._match(TokenType.END): 5558 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5559 default = exp.column("interval") 5560 else: 5561 self.raise_error("Expected END after CASE", self._prev) 5562 5563 return self.expression( 5564 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5565 ) 5566 5567 def _parse_if(self) -> t.Optional[exp.Expression]: 5568 if self._match(TokenType.L_PAREN): 5569 args = self._parse_csv(self._parse_assignment) 5570 this = self.validate_expression(exp.If.from_arg_list(args), args) 5571 self._match_r_paren() 5572 else: 5573 index = self._index - 1 5574 5575 if self.NO_PAREN_IF_COMMANDS and index == 0: 5576 return self._parse_as_command(self._prev) 5577 5578 condition = self._parse_assignment() 5579 5580 if not condition: 5581 self._retreat(index) 5582 return None 5583 5584 self._match(TokenType.THEN) 5585 true = self._parse_assignment() 5586 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5587 self._match(TokenType.END) 5588 this = self.expression(exp.If, this=condition, true=true, false=false) 5589 5590 return this 5591 5592 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5593 if not self._match_text_seq("VALUE", "FOR"): 5594 self._retreat(self._index - 1) 5595 return None 5596 5597 return self.expression( 5598 exp.NextValueFor, 5599 this=self._parse_column(), 5600 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5601 ) 5602 5603 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5604 this = self._parse_function() or self._parse_var_or_string(upper=True) 5605 5606 if self._match(TokenType.FROM): 5607 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5608 5609 if not self._match(TokenType.COMMA): 5610 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5611 5612 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5613 5614 def _parse_gap_fill(self) -> exp.GapFill: 5615 self._match(TokenType.TABLE) 5616 this = self._parse_table() 5617 5618 self._match(TokenType.COMMA) 5619 args = [this, *self._parse_csv(self._parse_lambda)] 5620 5621 gap_fill = exp.GapFill.from_arg_list(args) 5622 return self.validate_expression(gap_fill, args) 5623 5624 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5625 this = self._parse_assignment() 5626 5627 if not self._match(TokenType.ALIAS): 5628 if self._match(TokenType.COMMA): 5629 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5630 5631 self.raise_error("Expected AS after CAST") 5632 5633 fmt = None 5634 to = self._parse_types() 5635 5636 if self._match(TokenType.FORMAT): 5637 fmt_string = self._parse_string() 5638 fmt = self._parse_at_time_zone(fmt_string) 5639 5640 if not to: 5641 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5642 if to.this in exp.DataType.TEMPORAL_TYPES: 5643 this = self.expression( 5644 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5645 this=this, 5646 format=exp.Literal.string( 5647 format_time( 5648 fmt_string.this if fmt_string else "", 5649 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5650 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5651 ) 5652 ), 5653 safe=safe, 5654 ) 5655 5656 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5657 this.set("zone", fmt.args["zone"]) 5658 return this 5659 elif not to: 5660 self.raise_error("Expected TYPE after CAST") 5661 elif isinstance(to, exp.Identifier): 5662 to = exp.DataType.build(to.name, udt=True) 5663 elif to.this == exp.DataType.Type.CHAR: 5664 if self._match(TokenType.CHARACTER_SET): 5665 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5666 5667 return self.expression( 5668 exp.Cast if strict else exp.TryCast, 5669 this=this, 5670 to=to, 5671 format=fmt, 5672 safe=safe, 5673 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5674 ) 5675 5676 def _parse_string_agg(self) -> exp.Expression: 5677 if self._match(TokenType.DISTINCT): 5678 args: t.List[t.Optional[exp.Expression]] = [ 5679 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5680 ] 5681 if self._match(TokenType.COMMA): 5682 args.extend(self._parse_csv(self._parse_assignment)) 5683 else: 5684 args = self._parse_csv(self._parse_assignment) # type: ignore 5685 5686 index = self._index 5687 if not self._match(TokenType.R_PAREN) and args: 5688 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5689 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5690 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5691 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5692 5693 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5694 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5695 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5696 if not self._match_text_seq("WITHIN", "GROUP"): 5697 self._retreat(index) 5698 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5699 5700 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5701 order = self._parse_order(this=seq_get(args, 0)) 5702 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5703 5704 def _parse_convert( 5705 self, strict: bool, safe: t.Optional[bool] = None 5706 ) -> t.Optional[exp.Expression]: 5707 this = self._parse_bitwise() 5708 5709 if self._match(TokenType.USING): 5710 to: t.Optional[exp.Expression] = self.expression( 5711 exp.CharacterSet, this=self._parse_var() 5712 ) 5713 elif self._match(TokenType.COMMA): 5714 to = self._parse_types() 5715 else: 5716 to = None 5717 5718 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5719 5720 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5721 """ 5722 There are generally two variants of the DECODE function: 5723 5724 - DECODE(bin, charset) 5725 - DECODE(expression, search, result [, search, result] ... [, default]) 5726 5727 The second variant will always be parsed into a CASE expression. Note that NULL 5728 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5729 instead of relying on pattern matching. 5730 """ 5731 args = self._parse_csv(self._parse_assignment) 5732 5733 if len(args) < 3: 5734 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5735 5736 expression, *expressions = args 5737 if not expression: 5738 return None 5739 5740 ifs = [] 5741 for search, result in zip(expressions[::2], expressions[1::2]): 5742 if not search or not result: 5743 return None 5744 5745 if isinstance(search, exp.Literal): 5746 ifs.append( 5747 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5748 ) 5749 elif isinstance(search, exp.Null): 5750 ifs.append( 5751 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5752 ) 5753 else: 5754 cond = exp.or_( 5755 exp.EQ(this=expression.copy(), expression=search), 5756 exp.and_( 5757 exp.Is(this=expression.copy(), expression=exp.Null()), 5758 exp.Is(this=search.copy(), expression=exp.Null()), 5759 copy=False, 5760 ), 5761 copy=False, 5762 ) 5763 ifs.append(exp.If(this=cond, true=result)) 5764 5765 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5766 5767 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5768 self._match_text_seq("KEY") 5769 key = self._parse_column() 5770 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5771 self._match_text_seq("VALUE") 5772 value = self._parse_bitwise() 5773 5774 if not key and not value: 5775 return None 5776 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5777 5778 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5779 if not this or not self._match_text_seq("FORMAT", "JSON"): 5780 return this 5781 5782 return self.expression(exp.FormatJson, this=this) 5783 5784 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5785 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5786 for value in values: 5787 if self._match_text_seq(value, "ON", on): 5788 return f"{value} ON {on}" 5789 5790 return None 5791 5792 @t.overload 5793 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5794 5795 @t.overload 5796 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5797 5798 def _parse_json_object(self, agg=False): 5799 star = self._parse_star() 5800 expressions = ( 5801 [star] 5802 if star 5803 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5804 ) 5805 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5806 5807 unique_keys = None 5808 if self._match_text_seq("WITH", "UNIQUE"): 5809 unique_keys = True 5810 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5811 unique_keys = False 5812 5813 self._match_text_seq("KEYS") 5814 5815 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5816 self._parse_type() 5817 ) 5818 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5819 5820 return self.expression( 5821 exp.JSONObjectAgg if agg else exp.JSONObject, 5822 expressions=expressions, 5823 null_handling=null_handling, 5824 unique_keys=unique_keys, 5825 return_type=return_type, 5826 encoding=encoding, 5827 ) 5828 5829 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5830 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5831 if not self._match_text_seq("NESTED"): 5832 this = self._parse_id_var() 5833 kind = self._parse_types(allow_identifiers=False) 5834 nested = None 5835 else: 5836 this = None 5837 kind = None 5838 nested = True 5839 5840 path = self._match_text_seq("PATH") and self._parse_string() 5841 nested_schema = nested and self._parse_json_schema() 5842 5843 return self.expression( 5844 exp.JSONColumnDef, 5845 this=this, 5846 kind=kind, 5847 path=path, 5848 nested_schema=nested_schema, 5849 ) 5850 5851 def _parse_json_schema(self) -> exp.JSONSchema: 5852 self._match_text_seq("COLUMNS") 5853 return self.expression( 5854 exp.JSONSchema, 5855 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5856 ) 5857 5858 def _parse_json_table(self) -> exp.JSONTable: 5859 this = self._parse_format_json(self._parse_bitwise()) 5860 path = self._match(TokenType.COMMA) and self._parse_string() 5861 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5862 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5863 schema = self._parse_json_schema() 5864 5865 return exp.JSONTable( 5866 this=this, 5867 schema=schema, 5868 path=path, 5869 error_handling=error_handling, 5870 empty_handling=empty_handling, 5871 ) 5872 5873 def _parse_match_against(self) -> exp.MatchAgainst: 5874 expressions = self._parse_csv(self._parse_column) 5875 5876 self._match_text_seq(")", "AGAINST", "(") 5877 5878 this = self._parse_string() 5879 5880 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5881 modifier = "IN NATURAL LANGUAGE MODE" 5882 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5883 modifier = f"{modifier} WITH QUERY EXPANSION" 5884 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5885 modifier = "IN BOOLEAN MODE" 5886 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5887 modifier = "WITH QUERY EXPANSION" 5888 else: 5889 modifier = None 5890 5891 return self.expression( 5892 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5893 ) 5894 5895 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5896 def _parse_open_json(self) -> exp.OpenJSON: 5897 this = self._parse_bitwise() 5898 path = self._match(TokenType.COMMA) and self._parse_string() 5899 5900 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5901 this = self._parse_field(any_token=True) 5902 kind = self._parse_types() 5903 path = self._parse_string() 5904 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5905 5906 return self.expression( 5907 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5908 ) 5909 5910 expressions = None 5911 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5912 self._match_l_paren() 5913 expressions = self._parse_csv(_parse_open_json_column_def) 5914 5915 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5916 5917 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5918 args = self._parse_csv(self._parse_bitwise) 5919 5920 if self._match(TokenType.IN): 5921 return self.expression( 5922 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5923 ) 5924 5925 if haystack_first: 5926 haystack = seq_get(args, 0) 5927 needle = seq_get(args, 1) 5928 else: 5929 needle = seq_get(args, 0) 5930 haystack = seq_get(args, 1) 5931 5932 return self.expression( 5933 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5934 ) 5935 5936 def _parse_predict(self) -> exp.Predict: 5937 self._match_text_seq("MODEL") 5938 this = self._parse_table() 5939 5940 self._match(TokenType.COMMA) 5941 self._match_text_seq("TABLE") 5942 5943 return self.expression( 5944 exp.Predict, 5945 this=this, 5946 expression=self._parse_table(), 5947 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5948 ) 5949 5950 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5951 args = self._parse_csv(self._parse_table) 5952 return exp.JoinHint(this=func_name.upper(), expressions=args) 5953 5954 def _parse_substring(self) -> exp.Substring: 5955 # Postgres supports the form: substring(string [from int] [for int]) 5956 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5957 5958 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5959 5960 if self._match(TokenType.FROM): 5961 args.append(self._parse_bitwise()) 5962 if self._match(TokenType.FOR): 5963 if len(args) == 1: 5964 args.append(exp.Literal.number(1)) 5965 args.append(self._parse_bitwise()) 5966 5967 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5968 5969 def _parse_trim(self) -> exp.Trim: 5970 # https://www.w3resource.com/sql/character-functions/trim.php 5971 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5972 5973 position = None 5974 collation = None 5975 expression = None 5976 5977 if self._match_texts(self.TRIM_TYPES): 5978 position = self._prev.text.upper() 5979 5980 this = self._parse_bitwise() 5981 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5982 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5983 expression = self._parse_bitwise() 5984 5985 if invert_order: 5986 this, expression = expression, this 5987 5988 if self._match(TokenType.COLLATE): 5989 collation = self._parse_bitwise() 5990 5991 return self.expression( 5992 exp.Trim, this=this, position=position, expression=expression, collation=collation 5993 ) 5994 5995 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5996 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5997 5998 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5999 return self._parse_window(self._parse_id_var(), alias=True) 6000 6001 def _parse_respect_or_ignore_nulls( 6002 self, this: t.Optional[exp.Expression] 6003 ) -> t.Optional[exp.Expression]: 6004 if self._match_text_seq("IGNORE", "NULLS"): 6005 return self.expression(exp.IgnoreNulls, this=this) 6006 if self._match_text_seq("RESPECT", "NULLS"): 6007 return self.expression(exp.RespectNulls, this=this) 6008 return this 6009 6010 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6011 if self._match(TokenType.HAVING): 6012 self._match_texts(("MAX", "MIN")) 6013 max = self._prev.text.upper() != "MIN" 6014 return self.expression( 6015 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6016 ) 6017 6018 return this 6019 6020 def _parse_window( 6021 self, this: t.Optional[exp.Expression], alias: bool = False 6022 ) -> t.Optional[exp.Expression]: 6023 func = this 6024 comments = func.comments if isinstance(func, exp.Expression) else None 6025 6026 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6027 self._match(TokenType.WHERE) 6028 this = self.expression( 6029 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6030 ) 6031 self._match_r_paren() 6032 6033 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6034 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6035 if self._match_text_seq("WITHIN", "GROUP"): 6036 order = self._parse_wrapped(self._parse_order) 6037 this = self.expression(exp.WithinGroup, this=this, expression=order) 6038 6039 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6040 # Some dialects choose to implement and some do not. 6041 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6042 6043 # There is some code above in _parse_lambda that handles 6044 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6045 6046 # The below changes handle 6047 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6048 6049 # Oracle allows both formats 6050 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6051 # and Snowflake chose to do the same for familiarity 6052 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6053 if isinstance(this, exp.AggFunc): 6054 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6055 6056 if ignore_respect and ignore_respect is not this: 6057 ignore_respect.replace(ignore_respect.this) 6058 this = self.expression(ignore_respect.__class__, this=this) 6059 6060 this = self._parse_respect_or_ignore_nulls(this) 6061 6062 # bigquery select from window x AS (partition by ...) 6063 if alias: 6064 over = None 6065 self._match(TokenType.ALIAS) 6066 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6067 return this 6068 else: 6069 over = self._prev.text.upper() 6070 6071 if comments and isinstance(func, exp.Expression): 6072 func.pop_comments() 6073 6074 if not self._match(TokenType.L_PAREN): 6075 return self.expression( 6076 exp.Window, 6077 comments=comments, 6078 this=this, 6079 alias=self._parse_id_var(False), 6080 over=over, 6081 ) 6082 6083 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6084 6085 first = self._match(TokenType.FIRST) 6086 if self._match_text_seq("LAST"): 6087 first = False 6088 6089 partition, order = self._parse_partition_and_order() 6090 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6091 6092 if kind: 6093 self._match(TokenType.BETWEEN) 6094 start = self._parse_window_spec() 6095 self._match(TokenType.AND) 6096 end = self._parse_window_spec() 6097 6098 spec = self.expression( 6099 exp.WindowSpec, 6100 kind=kind, 6101 start=start["value"], 6102 start_side=start["side"], 6103 end=end["value"], 6104 end_side=end["side"], 6105 ) 6106 else: 6107 spec = None 6108 6109 self._match_r_paren() 6110 6111 window = self.expression( 6112 exp.Window, 6113 comments=comments, 6114 this=this, 6115 partition_by=partition, 6116 order=order, 6117 spec=spec, 6118 alias=window_alias, 6119 over=over, 6120 first=first, 6121 ) 6122 6123 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6124 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6125 return self._parse_window(window, alias=alias) 6126 6127 return window 6128 6129 def _parse_partition_and_order( 6130 self, 6131 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6132 return self._parse_partition_by(), self._parse_order() 6133 6134 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6135 self._match(TokenType.BETWEEN) 6136 6137 return { 6138 "value": ( 6139 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6140 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6141 or self._parse_bitwise() 6142 ), 6143 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6144 } 6145 6146 def _parse_alias( 6147 self, this: t.Optional[exp.Expression], explicit: bool = False 6148 ) -> t.Optional[exp.Expression]: 6149 any_token = self._match(TokenType.ALIAS) 6150 comments = self._prev_comments or [] 6151 6152 if explicit and not any_token: 6153 return this 6154 6155 if self._match(TokenType.L_PAREN): 6156 aliases = self.expression( 6157 exp.Aliases, 6158 comments=comments, 6159 this=this, 6160 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6161 ) 6162 self._match_r_paren(aliases) 6163 return aliases 6164 6165 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6166 self.STRING_ALIASES and self._parse_string_as_identifier() 6167 ) 6168 6169 if alias: 6170 comments.extend(alias.pop_comments()) 6171 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6172 column = this.this 6173 6174 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6175 if not this.comments and column and column.comments: 6176 this.comments = column.pop_comments() 6177 6178 return this 6179 6180 def _parse_id_var( 6181 self, 6182 any_token: bool = True, 6183 tokens: t.Optional[t.Collection[TokenType]] = None, 6184 ) -> t.Optional[exp.Expression]: 6185 expression = self._parse_identifier() 6186 if not expression and ( 6187 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6188 ): 6189 quoted = self._prev.token_type == TokenType.STRING 6190 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6191 6192 return expression 6193 6194 def _parse_string(self) -> t.Optional[exp.Expression]: 6195 if self._match_set(self.STRING_PARSERS): 6196 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6197 return self._parse_placeholder() 6198 6199 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6200 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6201 6202 def _parse_number(self) -> t.Optional[exp.Expression]: 6203 if self._match_set(self.NUMERIC_PARSERS): 6204 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6205 return self._parse_placeholder() 6206 6207 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6208 if self._match(TokenType.IDENTIFIER): 6209 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6210 return self._parse_placeholder() 6211 6212 def _parse_var( 6213 self, 6214 any_token: bool = False, 6215 tokens: t.Optional[t.Collection[TokenType]] = None, 6216 upper: bool = False, 6217 ) -> t.Optional[exp.Expression]: 6218 if ( 6219 (any_token and self._advance_any()) 6220 or self._match(TokenType.VAR) 6221 or (self._match_set(tokens) if tokens else False) 6222 ): 6223 return self.expression( 6224 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6225 ) 6226 return self._parse_placeholder() 6227 6228 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6229 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6230 self._advance() 6231 return self._prev 6232 return None 6233 6234 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6235 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6236 6237 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6238 return self._parse_primary() or self._parse_var(any_token=True) 6239 6240 def _parse_null(self) -> t.Optional[exp.Expression]: 6241 if self._match_set(self.NULL_TOKENS): 6242 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6243 return self._parse_placeholder() 6244 6245 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6246 if self._match(TokenType.TRUE): 6247 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6248 if self._match(TokenType.FALSE): 6249 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6250 return self._parse_placeholder() 6251 6252 def _parse_star(self) -> t.Optional[exp.Expression]: 6253 if self._match(TokenType.STAR): 6254 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6255 return self._parse_placeholder() 6256 6257 def _parse_parameter(self) -> exp.Parameter: 6258 this = self._parse_identifier() or self._parse_primary_or_var() 6259 return self.expression(exp.Parameter, this=this) 6260 6261 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6262 if self._match_set(self.PLACEHOLDER_PARSERS): 6263 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6264 if placeholder: 6265 return placeholder 6266 self._advance(-1) 6267 return None 6268 6269 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6270 if not self._match_texts(keywords): 6271 return None 6272 if self._match(TokenType.L_PAREN, advance=False): 6273 return self._parse_wrapped_csv(self._parse_expression) 6274 6275 expression = self._parse_expression() 6276 return [expression] if expression else None 6277 6278 def _parse_csv( 6279 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6280 ) -> t.List[exp.Expression]: 6281 parse_result = parse_method() 6282 items = [parse_result] if parse_result is not None else [] 6283 6284 while self._match(sep): 6285 self._add_comments(parse_result) 6286 parse_result = parse_method() 6287 if parse_result is not None: 6288 items.append(parse_result) 6289 6290 return items 6291 6292 def _parse_tokens( 6293 self, parse_method: t.Callable, expressions: t.Dict 6294 ) -> t.Optional[exp.Expression]: 6295 this = parse_method() 6296 6297 while self._match_set(expressions): 6298 this = self.expression( 6299 expressions[self._prev.token_type], 6300 this=this, 6301 comments=self._prev_comments, 6302 expression=parse_method(), 6303 ) 6304 6305 return this 6306 6307 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6308 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6309 6310 def _parse_wrapped_csv( 6311 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6312 ) -> t.List[exp.Expression]: 6313 return self._parse_wrapped( 6314 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6315 ) 6316 6317 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6318 wrapped = self._match(TokenType.L_PAREN) 6319 if not wrapped and not optional: 6320 self.raise_error("Expecting (") 6321 parse_result = parse_method() 6322 if wrapped: 6323 self._match_r_paren() 6324 return parse_result 6325 6326 def _parse_expressions(self) -> t.List[exp.Expression]: 6327 return self._parse_csv(self._parse_expression) 6328 6329 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6330 return self._parse_select() or self._parse_set_operations( 6331 self._parse_expression() if alias else self._parse_assignment() 6332 ) 6333 6334 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6335 return self._parse_query_modifiers( 6336 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6337 ) 6338 6339 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6340 this = None 6341 if self._match_texts(self.TRANSACTION_KIND): 6342 this = self._prev.text 6343 6344 self._match_texts(("TRANSACTION", "WORK")) 6345 6346 modes = [] 6347 while True: 6348 mode = [] 6349 while self._match(TokenType.VAR): 6350 mode.append(self._prev.text) 6351 6352 if mode: 6353 modes.append(" ".join(mode)) 6354 if not self._match(TokenType.COMMA): 6355 break 6356 6357 return self.expression(exp.Transaction, this=this, modes=modes) 6358 6359 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6360 chain = None 6361 savepoint = None 6362 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6363 6364 self._match_texts(("TRANSACTION", "WORK")) 6365 6366 if self._match_text_seq("TO"): 6367 self._match_text_seq("SAVEPOINT") 6368 savepoint = self._parse_id_var() 6369 6370 if self._match(TokenType.AND): 6371 chain = not self._match_text_seq("NO") 6372 self._match_text_seq("CHAIN") 6373 6374 if is_rollback: 6375 return self.expression(exp.Rollback, savepoint=savepoint) 6376 6377 return self.expression(exp.Commit, chain=chain) 6378 6379 def _parse_refresh(self) -> exp.Refresh: 6380 self._match(TokenType.TABLE) 6381 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6382 6383 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6384 if not self._match_text_seq("ADD"): 6385 return None 6386 6387 self._match(TokenType.COLUMN) 6388 exists_column = self._parse_exists(not_=True) 6389 expression = self._parse_field_def() 6390 6391 if expression: 6392 expression.set("exists", exists_column) 6393 6394 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6395 if self._match_texts(("FIRST", "AFTER")): 6396 position = self._prev.text 6397 column_position = self.expression( 6398 exp.ColumnPosition, this=self._parse_column(), position=position 6399 ) 6400 expression.set("position", column_position) 6401 6402 return expression 6403 6404 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6405 drop = self._match(TokenType.DROP) and self._parse_drop() 6406 if drop and not isinstance(drop, exp.Command): 6407 drop.set("kind", drop.args.get("kind", "COLUMN")) 6408 return drop 6409 6410 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6411 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6412 return self.expression( 6413 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6414 ) 6415 6416 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6417 index = self._index - 1 6418 6419 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6420 return self._parse_csv( 6421 lambda: self.expression( 6422 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6423 ) 6424 ) 6425 6426 self._retreat(index) 6427 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6428 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6429 6430 if self._match_text_seq("ADD", "COLUMNS"): 6431 schema = self._parse_schema() 6432 if schema: 6433 return [schema] 6434 return [] 6435 6436 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6437 6438 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6439 if self._match_texts(self.ALTER_ALTER_PARSERS): 6440 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6441 6442 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6443 # keyword after ALTER we default to parsing this statement 6444 self._match(TokenType.COLUMN) 6445 column = self._parse_field(any_token=True) 6446 6447 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6448 return self.expression(exp.AlterColumn, this=column, drop=True) 6449 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6450 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6451 if self._match(TokenType.COMMENT): 6452 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6453 if self._match_text_seq("DROP", "NOT", "NULL"): 6454 return self.expression( 6455 exp.AlterColumn, 6456 this=column, 6457 drop=True, 6458 allow_null=True, 6459 ) 6460 if self._match_text_seq("SET", "NOT", "NULL"): 6461 return self.expression( 6462 exp.AlterColumn, 6463 this=column, 6464 allow_null=False, 6465 ) 6466 self._match_text_seq("SET", "DATA") 6467 self._match_text_seq("TYPE") 6468 return self.expression( 6469 exp.AlterColumn, 6470 this=column, 6471 dtype=self._parse_types(), 6472 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6473 using=self._match(TokenType.USING) and self._parse_assignment(), 6474 ) 6475 6476 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6477 if self._match_texts(("ALL", "EVEN", "AUTO")): 6478 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6479 6480 self._match_text_seq("KEY", "DISTKEY") 6481 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6482 6483 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6484 if compound: 6485 self._match_text_seq("SORTKEY") 6486 6487 if self._match(TokenType.L_PAREN, advance=False): 6488 return self.expression( 6489 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6490 ) 6491 6492 self._match_texts(("AUTO", "NONE")) 6493 return self.expression( 6494 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6495 ) 6496 6497 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6498 index = self._index - 1 6499 6500 partition_exists = self._parse_exists() 6501 if self._match(TokenType.PARTITION, advance=False): 6502 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6503 6504 self._retreat(index) 6505 return self._parse_csv(self._parse_drop_column) 6506 6507 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6508 if self._match(TokenType.COLUMN): 6509 exists = self._parse_exists() 6510 old_column = self._parse_column() 6511 to = self._match_text_seq("TO") 6512 new_column = self._parse_column() 6513 6514 if old_column is None or to is None or new_column is None: 6515 return None 6516 6517 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6518 6519 self._match_text_seq("TO") 6520 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6521 6522 def _parse_alter_table_set(self) -> exp.AlterSet: 6523 alter_set = self.expression(exp.AlterSet) 6524 6525 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6526 "TABLE", "PROPERTIES" 6527 ): 6528 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6529 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6530 alter_set.set("expressions", [self._parse_assignment()]) 6531 elif self._match_texts(("LOGGED", "UNLOGGED")): 6532 alter_set.set("option", exp.var(self._prev.text.upper())) 6533 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6534 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6535 elif self._match_text_seq("LOCATION"): 6536 alter_set.set("location", self._parse_field()) 6537 elif self._match_text_seq("ACCESS", "METHOD"): 6538 alter_set.set("access_method", self._parse_field()) 6539 elif self._match_text_seq("TABLESPACE"): 6540 alter_set.set("tablespace", self._parse_field()) 6541 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6542 alter_set.set("file_format", [self._parse_field()]) 6543 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6544 alter_set.set("file_format", self._parse_wrapped_options()) 6545 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6546 alter_set.set("copy_options", self._parse_wrapped_options()) 6547 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6548 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6549 else: 6550 if self._match_text_seq("SERDE"): 6551 alter_set.set("serde", self._parse_field()) 6552 6553 alter_set.set("expressions", [self._parse_properties()]) 6554 6555 return alter_set 6556 6557 def _parse_alter(self) -> exp.Alter | exp.Command: 6558 start = self._prev 6559 6560 alter_token = self._match_set(self.ALTERABLES) and self._prev 6561 if not alter_token: 6562 return self._parse_as_command(start) 6563 6564 exists = self._parse_exists() 6565 only = self._match_text_seq("ONLY") 6566 this = self._parse_table(schema=True) 6567 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6568 6569 if self._next: 6570 self._advance() 6571 6572 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6573 if parser: 6574 actions = ensure_list(parser(self)) 6575 options = self._parse_csv(self._parse_property) 6576 6577 if not self._curr and actions: 6578 return self.expression( 6579 exp.Alter, 6580 this=this, 6581 kind=alter_token.text.upper(), 6582 exists=exists, 6583 actions=actions, 6584 only=only, 6585 options=options, 6586 cluster=cluster, 6587 ) 6588 6589 return self._parse_as_command(start) 6590 6591 def _parse_merge(self) -> exp.Merge: 6592 self._match(TokenType.INTO) 6593 target = self._parse_table() 6594 6595 if target and self._match(TokenType.ALIAS, advance=False): 6596 target.set("alias", self._parse_table_alias()) 6597 6598 self._match(TokenType.USING) 6599 using = self._parse_table() 6600 6601 self._match(TokenType.ON) 6602 on = self._parse_assignment() 6603 6604 return self.expression( 6605 exp.Merge, 6606 this=target, 6607 using=using, 6608 on=on, 6609 expressions=self._parse_when_matched(), 6610 ) 6611 6612 def _parse_when_matched(self) -> t.List[exp.When]: 6613 whens = [] 6614 6615 while self._match(TokenType.WHEN): 6616 matched = not self._match(TokenType.NOT) 6617 self._match_text_seq("MATCHED") 6618 source = ( 6619 False 6620 if self._match_text_seq("BY", "TARGET") 6621 else self._match_text_seq("BY", "SOURCE") 6622 ) 6623 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6624 6625 self._match(TokenType.THEN) 6626 6627 if self._match(TokenType.INSERT): 6628 _this = self._parse_star() 6629 if _this: 6630 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6631 else: 6632 then = self.expression( 6633 exp.Insert, 6634 this=self._parse_value(), 6635 expression=self._match_text_seq("VALUES") and self._parse_value(), 6636 ) 6637 elif self._match(TokenType.UPDATE): 6638 expressions = self._parse_star() 6639 if expressions: 6640 then = self.expression(exp.Update, expressions=expressions) 6641 else: 6642 then = self.expression( 6643 exp.Update, 6644 expressions=self._match(TokenType.SET) 6645 and self._parse_csv(self._parse_equality), 6646 ) 6647 elif self._match(TokenType.DELETE): 6648 then = self.expression(exp.Var, this=self._prev.text) 6649 else: 6650 then = None 6651 6652 whens.append( 6653 self.expression( 6654 exp.When, 6655 matched=matched, 6656 source=source, 6657 condition=condition, 6658 then=then, 6659 ) 6660 ) 6661 return whens 6662 6663 def _parse_show(self) -> t.Optional[exp.Expression]: 6664 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6665 if parser: 6666 return parser(self) 6667 return self._parse_as_command(self._prev) 6668 6669 def _parse_set_item_assignment( 6670 self, kind: t.Optional[str] = None 6671 ) -> t.Optional[exp.Expression]: 6672 index = self._index 6673 6674 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6675 return self._parse_set_transaction(global_=kind == "GLOBAL") 6676 6677 left = self._parse_primary() or self._parse_column() 6678 assignment_delimiter = self._match_texts(("=", "TO")) 6679 6680 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6681 self._retreat(index) 6682 return None 6683 6684 right = self._parse_statement() or self._parse_id_var() 6685 if isinstance(right, (exp.Column, exp.Identifier)): 6686 right = exp.var(right.name) 6687 6688 this = self.expression(exp.EQ, this=left, expression=right) 6689 return self.expression(exp.SetItem, this=this, kind=kind) 6690 6691 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6692 self._match_text_seq("TRANSACTION") 6693 characteristics = self._parse_csv( 6694 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6695 ) 6696 return self.expression( 6697 exp.SetItem, 6698 expressions=characteristics, 6699 kind="TRANSACTION", 6700 **{"global": global_}, # type: ignore 6701 ) 6702 6703 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6704 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6705 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6706 6707 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6708 index = self._index 6709 set_ = self.expression( 6710 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6711 ) 6712 6713 if self._curr: 6714 self._retreat(index) 6715 return self._parse_as_command(self._prev) 6716 6717 return set_ 6718 6719 def _parse_var_from_options( 6720 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6721 ) -> t.Optional[exp.Var]: 6722 start = self._curr 6723 if not start: 6724 return None 6725 6726 option = start.text.upper() 6727 continuations = options.get(option) 6728 6729 index = self._index 6730 self._advance() 6731 for keywords in continuations or []: 6732 if isinstance(keywords, str): 6733 keywords = (keywords,) 6734 6735 if self._match_text_seq(*keywords): 6736 option = f"{option} {' '.join(keywords)}" 6737 break 6738 else: 6739 if continuations or continuations is None: 6740 if raise_unmatched: 6741 self.raise_error(f"Unknown option {option}") 6742 6743 self._retreat(index) 6744 return None 6745 6746 return exp.var(option) 6747 6748 def _parse_as_command(self, start: Token) -> exp.Command: 6749 while self._curr: 6750 self._advance() 6751 text = self._find_sql(start, self._prev) 6752 size = len(start.text) 6753 self._warn_unsupported() 6754 return exp.Command(this=text[:size], expression=text[size:]) 6755 6756 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6757 settings = [] 6758 6759 self._match_l_paren() 6760 kind = self._parse_id_var() 6761 6762 if self._match(TokenType.L_PAREN): 6763 while True: 6764 key = self._parse_id_var() 6765 value = self._parse_primary() 6766 6767 if not key and value is None: 6768 break 6769 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6770 self._match(TokenType.R_PAREN) 6771 6772 self._match_r_paren() 6773 6774 return self.expression( 6775 exp.DictProperty, 6776 this=this, 6777 kind=kind.this if kind else None, 6778 settings=settings, 6779 ) 6780 6781 def _parse_dict_range(self, this: str) -> exp.DictRange: 6782 self._match_l_paren() 6783 has_min = self._match_text_seq("MIN") 6784 if has_min: 6785 min = self._parse_var() or self._parse_primary() 6786 self._match_text_seq("MAX") 6787 max = self._parse_var() or self._parse_primary() 6788 else: 6789 max = self._parse_var() or self._parse_primary() 6790 min = exp.Literal.number(0) 6791 self._match_r_paren() 6792 return self.expression(exp.DictRange, this=this, min=min, max=max) 6793 6794 def _parse_comprehension( 6795 self, this: t.Optional[exp.Expression] 6796 ) -> t.Optional[exp.Comprehension]: 6797 index = self._index 6798 expression = self._parse_column() 6799 if not self._match(TokenType.IN): 6800 self._retreat(index - 1) 6801 return None 6802 iterator = self._parse_column() 6803 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6804 return self.expression( 6805 exp.Comprehension, 6806 this=this, 6807 expression=expression, 6808 iterator=iterator, 6809 condition=condition, 6810 ) 6811 6812 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6813 if self._match(TokenType.HEREDOC_STRING): 6814 return self.expression(exp.Heredoc, this=self._prev.text) 6815 6816 if not self._match_text_seq("$"): 6817 return None 6818 6819 tags = ["$"] 6820 tag_text = None 6821 6822 if self._is_connected(): 6823 self._advance() 6824 tags.append(self._prev.text.upper()) 6825 else: 6826 self.raise_error("No closing $ found") 6827 6828 if tags[-1] != "$": 6829 if self._is_connected() and self._match_text_seq("$"): 6830 tag_text = tags[-1] 6831 tags.append("$") 6832 else: 6833 self.raise_error("No closing $ found") 6834 6835 heredoc_start = self._curr 6836 6837 while self._curr: 6838 if self._match_text_seq(*tags, advance=False): 6839 this = self._find_sql(heredoc_start, self._prev) 6840 self._advance(len(tags)) 6841 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6842 6843 self._advance() 6844 6845 self.raise_error(f"No closing {''.join(tags)} found") 6846 return None 6847 6848 def _find_parser( 6849 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6850 ) -> t.Optional[t.Callable]: 6851 if not self._curr: 6852 return None 6853 6854 index = self._index 6855 this = [] 6856 while True: 6857 # The current token might be multiple words 6858 curr = self._curr.text.upper() 6859 key = curr.split(" ") 6860 this.append(curr) 6861 6862 self._advance() 6863 result, trie = in_trie(trie, key) 6864 if result == TrieResult.FAILED: 6865 break 6866 6867 if result == TrieResult.EXISTS: 6868 subparser = parsers[" ".join(this)] 6869 return subparser 6870 6871 self._retreat(index) 6872 return None 6873 6874 def _match(self, token_type, advance=True, expression=None): 6875 if not self._curr: 6876 return None 6877 6878 if self._curr.token_type == token_type: 6879 if advance: 6880 self._advance() 6881 self._add_comments(expression) 6882 return True 6883 6884 return None 6885 6886 def _match_set(self, types, advance=True): 6887 if not self._curr: 6888 return None 6889 6890 if self._curr.token_type in types: 6891 if advance: 6892 self._advance() 6893 return True 6894 6895 return None 6896 6897 def _match_pair(self, token_type_a, token_type_b, advance=True): 6898 if not self._curr or not self._next: 6899 return None 6900 6901 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6902 if advance: 6903 self._advance(2) 6904 return True 6905 6906 return None 6907 6908 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6909 if not self._match(TokenType.L_PAREN, expression=expression): 6910 self.raise_error("Expecting (") 6911 6912 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6913 if not self._match(TokenType.R_PAREN, expression=expression): 6914 self.raise_error("Expecting )") 6915 6916 def _match_texts(self, texts, advance=True): 6917 if self._curr and self._curr.text.upper() in texts: 6918 if advance: 6919 self._advance() 6920 return True 6921 return None 6922 6923 def _match_text_seq(self, *texts, advance=True): 6924 index = self._index 6925 for text in texts: 6926 if self._curr and self._curr.text.upper() == text: 6927 self._advance() 6928 else: 6929 self._retreat(index) 6930 return None 6931 6932 if not advance: 6933 self._retreat(index) 6934 6935 return True 6936 6937 def _replace_lambda( 6938 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6939 ) -> t.Optional[exp.Expression]: 6940 if not node: 6941 return node 6942 6943 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6944 6945 for column in node.find_all(exp.Column): 6946 typ = lambda_types.get(column.parts[0].name) 6947 if typ is not None: 6948 dot_or_id = column.to_dot() if column.table else column.this 6949 6950 if typ: 6951 dot_or_id = self.expression( 6952 exp.Cast, 6953 this=dot_or_id, 6954 to=typ, 6955 ) 6956 6957 parent = column.parent 6958 6959 while isinstance(parent, exp.Dot): 6960 if not isinstance(parent.parent, exp.Dot): 6961 parent.replace(dot_or_id) 6962 break 6963 parent = parent.parent 6964 else: 6965 if column is node: 6966 node = dot_or_id 6967 else: 6968 column.replace(dot_or_id) 6969 return node 6970 6971 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6972 start = self._prev 6973 6974 # Not to be confused with TRUNCATE(number, decimals) function call 6975 if self._match(TokenType.L_PAREN): 6976 self._retreat(self._index - 2) 6977 return self._parse_function() 6978 6979 # Clickhouse supports TRUNCATE DATABASE as well 6980 is_database = self._match(TokenType.DATABASE) 6981 6982 self._match(TokenType.TABLE) 6983 6984 exists = self._parse_exists(not_=False) 6985 6986 expressions = self._parse_csv( 6987 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6988 ) 6989 6990 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6991 6992 if self._match_text_seq("RESTART", "IDENTITY"): 6993 identity = "RESTART" 6994 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6995 identity = "CONTINUE" 6996 else: 6997 identity = None 6998 6999 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7000 option = self._prev.text 7001 else: 7002 option = None 7003 7004 partition = self._parse_partition() 7005 7006 # Fallback case 7007 if self._curr: 7008 return self._parse_as_command(start) 7009 7010 return self.expression( 7011 exp.TruncateTable, 7012 expressions=expressions, 7013 is_database=is_database, 7014 exists=exists, 7015 cluster=cluster, 7016 identity=identity, 7017 option=option, 7018 partition=partition, 7019 ) 7020 7021 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7022 this = self._parse_ordered(self._parse_opclass) 7023 7024 if not self._match(TokenType.WITH): 7025 return this 7026 7027 op = self._parse_var(any_token=True) 7028 7029 return self.expression(exp.WithOperator, this=this, op=op) 7030 7031 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7032 self._match(TokenType.EQ) 7033 self._match(TokenType.L_PAREN) 7034 7035 opts: t.List[t.Optional[exp.Expression]] = [] 7036 while self._curr and not self._match(TokenType.R_PAREN): 7037 if self._match_text_seq("FORMAT_NAME", "="): 7038 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7039 # so we parse it separately to use _parse_field() 7040 prop = self.expression( 7041 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7042 ) 7043 opts.append(prop) 7044 else: 7045 opts.append(self._parse_property()) 7046 7047 self._match(TokenType.COMMA) 7048 7049 return opts 7050 7051 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7052 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7053 7054 options = [] 7055 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7056 option = self._parse_var(any_token=True) 7057 prev = self._prev.text.upper() 7058 7059 # Different dialects might separate options and values by white space, "=" and "AS" 7060 self._match(TokenType.EQ) 7061 self._match(TokenType.ALIAS) 7062 7063 param = self.expression(exp.CopyParameter, this=option) 7064 7065 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7066 TokenType.L_PAREN, advance=False 7067 ): 7068 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7069 param.set("expressions", self._parse_wrapped_options()) 7070 elif prev == "FILE_FORMAT": 7071 # T-SQL's external file format case 7072 param.set("expression", self._parse_field()) 7073 else: 7074 param.set("expression", self._parse_unquoted_field()) 7075 7076 options.append(param) 7077 self._match(sep) 7078 7079 return options 7080 7081 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7082 expr = self.expression(exp.Credentials) 7083 7084 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7085 expr.set("storage", self._parse_field()) 7086 if self._match_text_seq("CREDENTIALS"): 7087 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7088 creds = ( 7089 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7090 ) 7091 expr.set("credentials", creds) 7092 if self._match_text_seq("ENCRYPTION"): 7093 expr.set("encryption", self._parse_wrapped_options()) 7094 if self._match_text_seq("IAM_ROLE"): 7095 expr.set("iam_role", self._parse_field()) 7096 if self._match_text_seq("REGION"): 7097 expr.set("region", self._parse_field()) 7098 7099 return expr 7100 7101 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7102 return self._parse_field() 7103 7104 def _parse_copy(self) -> exp.Copy | exp.Command: 7105 start = self._prev 7106 7107 self._match(TokenType.INTO) 7108 7109 this = ( 7110 self._parse_select(nested=True, parse_subquery_alias=False) 7111 if self._match(TokenType.L_PAREN, advance=False) 7112 else self._parse_table(schema=True) 7113 ) 7114 7115 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7116 7117 files = self._parse_csv(self._parse_file_location) 7118 credentials = self._parse_credentials() 7119 7120 self._match_text_seq("WITH") 7121 7122 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7123 7124 # Fallback case 7125 if self._curr: 7126 return self._parse_as_command(start) 7127 7128 return self.expression( 7129 exp.Copy, 7130 this=this, 7131 kind=kind, 7132 credentials=credentials, 7133 files=files, 7134 params=params, 7135 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
154class Parser(metaclass=_Parser): 155 """ 156 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 157 158 Args: 159 error_level: The desired error level. 160 Default: ErrorLevel.IMMEDIATE 161 error_message_context: The amount of context to capture from a query string when displaying 162 the error message (in number of characters). 163 Default: 100 164 max_errors: Maximum number of error messages to include in a raised ParseError. 165 This is only relevant if error_level is ErrorLevel.RAISE. 166 Default: 3 167 """ 168 169 FUNCTIONS: t.Dict[str, t.Callable] = { 170 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 171 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 172 "CONCAT": lambda args, dialect: exp.Concat( 173 expressions=args, 174 safe=not dialect.STRICT_STRING_CONCAT, 175 coalesce=dialect.CONCAT_COALESCE, 176 ), 177 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 178 expressions=args, 179 safe=not dialect.STRICT_STRING_CONCAT, 180 coalesce=dialect.CONCAT_COALESCE, 181 ), 182 "CONVERT_TIMEZONE": build_convert_timezone, 183 "DATE_TO_DATE_STR": lambda args: exp.Cast( 184 this=seq_get(args, 0), 185 to=exp.DataType(this=exp.DataType.Type.TEXT), 186 ), 187 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 188 start=seq_get(args, 0), 189 end=seq_get(args, 1), 190 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 191 ), 192 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 193 "HEX": build_hex, 194 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 195 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 196 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 197 "LIKE": build_like, 198 "LOG": build_logarithm, 199 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 200 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 201 "LOWER": build_lower, 202 "LPAD": lambda args: build_pad(args), 203 "LEFTPAD": lambda args: build_pad(args), 204 "MOD": build_mod, 205 "RPAD": lambda args: build_pad(args, is_left=False), 206 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 207 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 208 if len(args) != 2 209 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 210 "TIME_TO_TIME_STR": lambda args: exp.Cast( 211 this=seq_get(args, 0), 212 to=exp.DataType(this=exp.DataType.Type.TEXT), 213 ), 214 "TO_HEX": build_hex, 215 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 216 this=exp.Cast( 217 this=seq_get(args, 0), 218 to=exp.DataType(this=exp.DataType.Type.TEXT), 219 ), 220 start=exp.Literal.number(1), 221 length=exp.Literal.number(10), 222 ), 223 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 224 "UPPER": build_upper, 225 "VAR_MAP": build_var_map, 226 } 227 228 NO_PAREN_FUNCTIONS = { 229 TokenType.CURRENT_DATE: exp.CurrentDate, 230 TokenType.CURRENT_DATETIME: exp.CurrentDate, 231 TokenType.CURRENT_TIME: exp.CurrentTime, 232 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 233 TokenType.CURRENT_USER: exp.CurrentUser, 234 } 235 236 STRUCT_TYPE_TOKENS = { 237 TokenType.NESTED, 238 TokenType.OBJECT, 239 TokenType.STRUCT, 240 } 241 242 NESTED_TYPE_TOKENS = { 243 TokenType.ARRAY, 244 TokenType.LIST, 245 TokenType.LOWCARDINALITY, 246 TokenType.MAP, 247 TokenType.NULLABLE, 248 *STRUCT_TYPE_TOKENS, 249 } 250 251 ENUM_TYPE_TOKENS = { 252 TokenType.ENUM, 253 TokenType.ENUM8, 254 TokenType.ENUM16, 255 } 256 257 AGGREGATE_TYPE_TOKENS = { 258 TokenType.AGGREGATEFUNCTION, 259 TokenType.SIMPLEAGGREGATEFUNCTION, 260 } 261 262 TYPE_TOKENS = { 263 TokenType.BIT, 264 TokenType.BOOLEAN, 265 TokenType.TINYINT, 266 TokenType.UTINYINT, 267 TokenType.SMALLINT, 268 TokenType.USMALLINT, 269 TokenType.INT, 270 TokenType.UINT, 271 TokenType.BIGINT, 272 TokenType.UBIGINT, 273 TokenType.INT128, 274 TokenType.UINT128, 275 TokenType.INT256, 276 TokenType.UINT256, 277 TokenType.MEDIUMINT, 278 TokenType.UMEDIUMINT, 279 TokenType.FIXEDSTRING, 280 TokenType.FLOAT, 281 TokenType.DOUBLE, 282 TokenType.CHAR, 283 TokenType.NCHAR, 284 TokenType.VARCHAR, 285 TokenType.NVARCHAR, 286 TokenType.BPCHAR, 287 TokenType.TEXT, 288 TokenType.MEDIUMTEXT, 289 TokenType.LONGTEXT, 290 TokenType.MEDIUMBLOB, 291 TokenType.LONGBLOB, 292 TokenType.BINARY, 293 TokenType.VARBINARY, 294 TokenType.JSON, 295 TokenType.JSONB, 296 TokenType.INTERVAL, 297 TokenType.TINYBLOB, 298 TokenType.TINYTEXT, 299 TokenType.TIME, 300 TokenType.TIMETZ, 301 TokenType.TIMESTAMP, 302 TokenType.TIMESTAMP_S, 303 TokenType.TIMESTAMP_MS, 304 TokenType.TIMESTAMP_NS, 305 TokenType.TIMESTAMPTZ, 306 TokenType.TIMESTAMPLTZ, 307 TokenType.TIMESTAMPNTZ, 308 TokenType.DATETIME, 309 TokenType.DATETIME64, 310 TokenType.DATE, 311 TokenType.DATE32, 312 TokenType.INT4RANGE, 313 TokenType.INT4MULTIRANGE, 314 TokenType.INT8RANGE, 315 TokenType.INT8MULTIRANGE, 316 TokenType.NUMRANGE, 317 TokenType.NUMMULTIRANGE, 318 TokenType.TSRANGE, 319 TokenType.TSMULTIRANGE, 320 TokenType.TSTZRANGE, 321 TokenType.TSTZMULTIRANGE, 322 TokenType.DATERANGE, 323 TokenType.DATEMULTIRANGE, 324 TokenType.DECIMAL, 325 TokenType.UDECIMAL, 326 TokenType.BIGDECIMAL, 327 TokenType.UUID, 328 TokenType.GEOGRAPHY, 329 TokenType.GEOMETRY, 330 TokenType.HLLSKETCH, 331 TokenType.HSTORE, 332 TokenType.PSEUDO_TYPE, 333 TokenType.SUPER, 334 TokenType.SERIAL, 335 TokenType.SMALLSERIAL, 336 TokenType.BIGSERIAL, 337 TokenType.XML, 338 TokenType.YEAR, 339 TokenType.UNIQUEIDENTIFIER, 340 TokenType.USERDEFINED, 341 TokenType.MONEY, 342 TokenType.SMALLMONEY, 343 TokenType.ROWVERSION, 344 TokenType.IMAGE, 345 TokenType.VARIANT, 346 TokenType.VECTOR, 347 TokenType.OBJECT, 348 TokenType.OBJECT_IDENTIFIER, 349 TokenType.INET, 350 TokenType.IPADDRESS, 351 TokenType.IPPREFIX, 352 TokenType.IPV4, 353 TokenType.IPV6, 354 TokenType.UNKNOWN, 355 TokenType.NULL, 356 TokenType.NAME, 357 TokenType.TDIGEST, 358 *ENUM_TYPE_TOKENS, 359 *NESTED_TYPE_TOKENS, 360 *AGGREGATE_TYPE_TOKENS, 361 } 362 363 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 364 TokenType.BIGINT: TokenType.UBIGINT, 365 TokenType.INT: TokenType.UINT, 366 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 367 TokenType.SMALLINT: TokenType.USMALLINT, 368 TokenType.TINYINT: TokenType.UTINYINT, 369 TokenType.DECIMAL: TokenType.UDECIMAL, 370 } 371 372 SUBQUERY_PREDICATES = { 373 TokenType.ANY: exp.Any, 374 TokenType.ALL: exp.All, 375 TokenType.EXISTS: exp.Exists, 376 TokenType.SOME: exp.Any, 377 } 378 379 RESERVED_TOKENS = { 380 *Tokenizer.SINGLE_TOKENS.values(), 381 TokenType.SELECT, 382 } - {TokenType.IDENTIFIER} 383 384 DB_CREATABLES = { 385 TokenType.DATABASE, 386 TokenType.DICTIONARY, 387 TokenType.MODEL, 388 TokenType.SCHEMA, 389 TokenType.SEQUENCE, 390 TokenType.STORAGE_INTEGRATION, 391 TokenType.TABLE, 392 TokenType.TAG, 393 TokenType.VIEW, 394 TokenType.WAREHOUSE, 395 TokenType.STREAMLIT, 396 } 397 398 CREATABLES = { 399 TokenType.COLUMN, 400 TokenType.CONSTRAINT, 401 TokenType.FOREIGN_KEY, 402 TokenType.FUNCTION, 403 TokenType.INDEX, 404 TokenType.PROCEDURE, 405 *DB_CREATABLES, 406 } 407 408 ALTERABLES = { 409 TokenType.TABLE, 410 TokenType.VIEW, 411 } 412 413 # Tokens that can represent identifiers 414 ID_VAR_TOKENS = { 415 TokenType.ALL, 416 TokenType.VAR, 417 TokenType.ANTI, 418 TokenType.APPLY, 419 TokenType.ASC, 420 TokenType.ASOF, 421 TokenType.AUTO_INCREMENT, 422 TokenType.BEGIN, 423 TokenType.BPCHAR, 424 TokenType.CACHE, 425 TokenType.CASE, 426 TokenType.COLLATE, 427 TokenType.COMMAND, 428 TokenType.COMMENT, 429 TokenType.COMMIT, 430 TokenType.CONSTRAINT, 431 TokenType.COPY, 432 TokenType.CUBE, 433 TokenType.DEFAULT, 434 TokenType.DELETE, 435 TokenType.DESC, 436 TokenType.DESCRIBE, 437 TokenType.DICTIONARY, 438 TokenType.DIV, 439 TokenType.END, 440 TokenType.EXECUTE, 441 TokenType.ESCAPE, 442 TokenType.FALSE, 443 TokenType.FIRST, 444 TokenType.FILTER, 445 TokenType.FINAL, 446 TokenType.FORMAT, 447 TokenType.FULL, 448 TokenType.IDENTIFIER, 449 TokenType.IS, 450 TokenType.ISNULL, 451 TokenType.INTERVAL, 452 TokenType.KEEP, 453 TokenType.KILL, 454 TokenType.LEFT, 455 TokenType.LOAD, 456 TokenType.MERGE, 457 TokenType.NATURAL, 458 TokenType.NEXT, 459 TokenType.OFFSET, 460 TokenType.OPERATOR, 461 TokenType.ORDINALITY, 462 TokenType.OVERLAPS, 463 TokenType.OVERWRITE, 464 TokenType.PARTITION, 465 TokenType.PERCENT, 466 TokenType.PIVOT, 467 TokenType.PRAGMA, 468 TokenType.RANGE, 469 TokenType.RECURSIVE, 470 TokenType.REFERENCES, 471 TokenType.REFRESH, 472 TokenType.RENAME, 473 TokenType.REPLACE, 474 TokenType.RIGHT, 475 TokenType.ROLLUP, 476 TokenType.ROW, 477 TokenType.ROWS, 478 TokenType.SEMI, 479 TokenType.SET, 480 TokenType.SETTINGS, 481 TokenType.SHOW, 482 TokenType.TEMPORARY, 483 TokenType.TOP, 484 TokenType.TRUE, 485 TokenType.TRUNCATE, 486 TokenType.UNIQUE, 487 TokenType.UNNEST, 488 TokenType.UNPIVOT, 489 TokenType.UPDATE, 490 TokenType.USE, 491 TokenType.VOLATILE, 492 TokenType.WINDOW, 493 *CREATABLES, 494 *SUBQUERY_PREDICATES, 495 *TYPE_TOKENS, 496 *NO_PAREN_FUNCTIONS, 497 } 498 499 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 500 501 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 502 TokenType.ANTI, 503 TokenType.APPLY, 504 TokenType.ASOF, 505 TokenType.FULL, 506 TokenType.LEFT, 507 TokenType.LOCK, 508 TokenType.NATURAL, 509 TokenType.OFFSET, 510 TokenType.RIGHT, 511 TokenType.SEMI, 512 TokenType.WINDOW, 513 } 514 515 ALIAS_TOKENS = ID_VAR_TOKENS 516 517 ARRAY_CONSTRUCTORS = { 518 "ARRAY": exp.Array, 519 "LIST": exp.List, 520 } 521 522 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 523 524 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 525 526 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 527 528 FUNC_TOKENS = { 529 TokenType.COLLATE, 530 TokenType.COMMAND, 531 TokenType.CURRENT_DATE, 532 TokenType.CURRENT_DATETIME, 533 TokenType.CURRENT_TIMESTAMP, 534 TokenType.CURRENT_TIME, 535 TokenType.CURRENT_USER, 536 TokenType.FILTER, 537 TokenType.FIRST, 538 TokenType.FORMAT, 539 TokenType.GLOB, 540 TokenType.IDENTIFIER, 541 TokenType.INDEX, 542 TokenType.ISNULL, 543 TokenType.ILIKE, 544 TokenType.INSERT, 545 TokenType.LIKE, 546 TokenType.MERGE, 547 TokenType.OFFSET, 548 TokenType.PRIMARY_KEY, 549 TokenType.RANGE, 550 TokenType.REPLACE, 551 TokenType.RLIKE, 552 TokenType.ROW, 553 TokenType.UNNEST, 554 TokenType.VAR, 555 TokenType.LEFT, 556 TokenType.RIGHT, 557 TokenType.SEQUENCE, 558 TokenType.DATE, 559 TokenType.DATETIME, 560 TokenType.TABLE, 561 TokenType.TIMESTAMP, 562 TokenType.TIMESTAMPTZ, 563 TokenType.TRUNCATE, 564 TokenType.WINDOW, 565 TokenType.XOR, 566 *TYPE_TOKENS, 567 *SUBQUERY_PREDICATES, 568 } 569 570 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 571 TokenType.AND: exp.And, 572 } 573 574 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 575 TokenType.COLON_EQ: exp.PropertyEQ, 576 } 577 578 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 579 TokenType.OR: exp.Or, 580 } 581 582 EQUALITY = { 583 TokenType.EQ: exp.EQ, 584 TokenType.NEQ: exp.NEQ, 585 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 586 } 587 588 COMPARISON = { 589 TokenType.GT: exp.GT, 590 TokenType.GTE: exp.GTE, 591 TokenType.LT: exp.LT, 592 TokenType.LTE: exp.LTE, 593 } 594 595 BITWISE = { 596 TokenType.AMP: exp.BitwiseAnd, 597 TokenType.CARET: exp.BitwiseXor, 598 TokenType.PIPE: exp.BitwiseOr, 599 } 600 601 TERM = { 602 TokenType.DASH: exp.Sub, 603 TokenType.PLUS: exp.Add, 604 TokenType.MOD: exp.Mod, 605 TokenType.COLLATE: exp.Collate, 606 } 607 608 FACTOR = { 609 TokenType.DIV: exp.IntDiv, 610 TokenType.LR_ARROW: exp.Distance, 611 TokenType.SLASH: exp.Div, 612 TokenType.STAR: exp.Mul, 613 } 614 615 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 616 617 TIMES = { 618 TokenType.TIME, 619 TokenType.TIMETZ, 620 } 621 622 TIMESTAMPS = { 623 TokenType.TIMESTAMP, 624 TokenType.TIMESTAMPTZ, 625 TokenType.TIMESTAMPLTZ, 626 *TIMES, 627 } 628 629 SET_OPERATIONS = { 630 TokenType.UNION, 631 TokenType.INTERSECT, 632 TokenType.EXCEPT, 633 } 634 635 JOIN_METHODS = { 636 TokenType.ASOF, 637 TokenType.NATURAL, 638 TokenType.POSITIONAL, 639 } 640 641 JOIN_SIDES = { 642 TokenType.LEFT, 643 TokenType.RIGHT, 644 TokenType.FULL, 645 } 646 647 JOIN_KINDS = { 648 TokenType.ANTI, 649 TokenType.CROSS, 650 TokenType.INNER, 651 TokenType.OUTER, 652 TokenType.SEMI, 653 TokenType.STRAIGHT_JOIN, 654 } 655 656 JOIN_HINTS: t.Set[str] = set() 657 658 LAMBDAS = { 659 TokenType.ARROW: lambda self, expressions: self.expression( 660 exp.Lambda, 661 this=self._replace_lambda( 662 self._parse_assignment(), 663 expressions, 664 ), 665 expressions=expressions, 666 ), 667 TokenType.FARROW: lambda self, expressions: self.expression( 668 exp.Kwarg, 669 this=exp.var(expressions[0].name), 670 expression=self._parse_assignment(), 671 ), 672 } 673 674 COLUMN_OPERATORS = { 675 TokenType.DOT: None, 676 TokenType.DCOLON: lambda self, this, to: self.expression( 677 exp.Cast if self.STRICT_CAST else exp.TryCast, 678 this=this, 679 to=to, 680 ), 681 TokenType.ARROW: lambda self, this, path: self.expression( 682 exp.JSONExtract, 683 this=this, 684 expression=self.dialect.to_json_path(path), 685 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 686 ), 687 TokenType.DARROW: lambda self, this, path: self.expression( 688 exp.JSONExtractScalar, 689 this=this, 690 expression=self.dialect.to_json_path(path), 691 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 692 ), 693 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 694 exp.JSONBExtract, 695 this=this, 696 expression=path, 697 ), 698 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 699 exp.JSONBExtractScalar, 700 this=this, 701 expression=path, 702 ), 703 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 704 exp.JSONBContains, 705 this=this, 706 expression=key, 707 ), 708 } 709 710 EXPRESSION_PARSERS = { 711 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 712 exp.Column: lambda self: self._parse_column(), 713 exp.Condition: lambda self: self._parse_assignment(), 714 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 715 exp.Expression: lambda self: self._parse_expression(), 716 exp.From: lambda self: self._parse_from(joins=True), 717 exp.Group: lambda self: self._parse_group(), 718 exp.Having: lambda self: self._parse_having(), 719 exp.Identifier: lambda self: self._parse_id_var(), 720 exp.Join: lambda self: self._parse_join(), 721 exp.Lambda: lambda self: self._parse_lambda(), 722 exp.Lateral: lambda self: self._parse_lateral(), 723 exp.Limit: lambda self: self._parse_limit(), 724 exp.Offset: lambda self: self._parse_offset(), 725 exp.Order: lambda self: self._parse_order(), 726 exp.Ordered: lambda self: self._parse_ordered(), 727 exp.Properties: lambda self: self._parse_properties(), 728 exp.Qualify: lambda self: self._parse_qualify(), 729 exp.Returning: lambda self: self._parse_returning(), 730 exp.Select: lambda self: self._parse_select(), 731 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 732 exp.Table: lambda self: self._parse_table_parts(), 733 exp.TableAlias: lambda self: self._parse_table_alias(), 734 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 735 exp.Where: lambda self: self._parse_where(), 736 exp.Window: lambda self: self._parse_named_window(), 737 exp.With: lambda self: self._parse_with(), 738 "JOIN_TYPE": lambda self: self._parse_join_parts(), 739 } 740 741 STATEMENT_PARSERS = { 742 TokenType.ALTER: lambda self: self._parse_alter(), 743 TokenType.BEGIN: lambda self: self._parse_transaction(), 744 TokenType.CACHE: lambda self: self._parse_cache(), 745 TokenType.COMMENT: lambda self: self._parse_comment(), 746 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 747 TokenType.COPY: lambda self: self._parse_copy(), 748 TokenType.CREATE: lambda self: self._parse_create(), 749 TokenType.DELETE: lambda self: self._parse_delete(), 750 TokenType.DESC: lambda self: self._parse_describe(), 751 TokenType.DESCRIBE: lambda self: self._parse_describe(), 752 TokenType.DROP: lambda self: self._parse_drop(), 753 TokenType.INSERT: lambda self: self._parse_insert(), 754 TokenType.KILL: lambda self: self._parse_kill(), 755 TokenType.LOAD: lambda self: self._parse_load(), 756 TokenType.MERGE: lambda self: self._parse_merge(), 757 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 758 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 759 TokenType.REFRESH: lambda self: self._parse_refresh(), 760 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 761 TokenType.SET: lambda self: self._parse_set(), 762 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 763 TokenType.UNCACHE: lambda self: self._parse_uncache(), 764 TokenType.UPDATE: lambda self: self._parse_update(), 765 TokenType.USE: lambda self: self.expression( 766 exp.Use, 767 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 768 this=self._parse_table(schema=False), 769 ), 770 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 771 } 772 773 UNARY_PARSERS = { 774 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 775 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 776 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 777 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 778 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 779 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 780 } 781 782 STRING_PARSERS = { 783 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 784 exp.RawString, this=token.text 785 ), 786 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 787 exp.National, this=token.text 788 ), 789 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 790 TokenType.STRING: lambda self, token: self.expression( 791 exp.Literal, this=token.text, is_string=True 792 ), 793 TokenType.UNICODE_STRING: lambda self, token: self.expression( 794 exp.UnicodeString, 795 this=token.text, 796 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 797 ), 798 } 799 800 NUMERIC_PARSERS = { 801 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 802 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 803 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 804 TokenType.NUMBER: lambda self, token: self.expression( 805 exp.Literal, this=token.text, is_string=False 806 ), 807 } 808 809 PRIMARY_PARSERS = { 810 **STRING_PARSERS, 811 **NUMERIC_PARSERS, 812 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 813 TokenType.NULL: lambda self, _: self.expression(exp.Null), 814 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 815 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 816 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 817 TokenType.STAR: lambda self, _: self.expression( 818 exp.Star, 819 **{ 820 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 821 "replace": self._parse_star_op("REPLACE"), 822 "rename": self._parse_star_op("RENAME"), 823 }, 824 ), 825 } 826 827 PLACEHOLDER_PARSERS = { 828 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 829 TokenType.PARAMETER: lambda self: self._parse_parameter(), 830 TokenType.COLON: lambda self: ( 831 self.expression(exp.Placeholder, this=self._prev.text) 832 if self._match_set(self.ID_VAR_TOKENS) 833 else None 834 ), 835 } 836 837 RANGE_PARSERS = { 838 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 839 TokenType.GLOB: binary_range_parser(exp.Glob), 840 TokenType.ILIKE: binary_range_parser(exp.ILike), 841 TokenType.IN: lambda self, this: self._parse_in(this), 842 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 843 TokenType.IS: lambda self, this: self._parse_is(this), 844 TokenType.LIKE: binary_range_parser(exp.Like), 845 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 846 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 847 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 848 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 849 } 850 851 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 852 "ALLOWED_VALUES": lambda self: self.expression( 853 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 854 ), 855 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 856 "AUTO": lambda self: self._parse_auto_property(), 857 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 858 "BACKUP": lambda self: self.expression( 859 exp.BackupProperty, this=self._parse_var(any_token=True) 860 ), 861 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 862 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 863 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 864 "CHECKSUM": lambda self: self._parse_checksum(), 865 "CLUSTER BY": lambda self: self._parse_cluster(), 866 "CLUSTERED": lambda self: self._parse_clustered_by(), 867 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 868 exp.CollateProperty, **kwargs 869 ), 870 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 871 "CONTAINS": lambda self: self._parse_contains_property(), 872 "COPY": lambda self: self._parse_copy_property(), 873 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 874 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 875 "DEFINER": lambda self: self._parse_definer(), 876 "DETERMINISTIC": lambda self: self.expression( 877 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 878 ), 879 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 880 "DISTKEY": lambda self: self._parse_distkey(), 881 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 882 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 883 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 884 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 885 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 886 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 887 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 888 "FREESPACE": lambda self: self._parse_freespace(), 889 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 890 "HEAP": lambda self: self.expression(exp.HeapProperty), 891 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 892 "IMMUTABLE": lambda self: self.expression( 893 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 894 ), 895 "INHERITS": lambda self: self.expression( 896 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 897 ), 898 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 899 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 900 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 901 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 902 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 903 "LIKE": lambda self: self._parse_create_like(), 904 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 905 "LOCK": lambda self: self._parse_locking(), 906 "LOCKING": lambda self: self._parse_locking(), 907 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 908 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 909 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 910 "MODIFIES": lambda self: self._parse_modifies_property(), 911 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 912 "NO": lambda self: self._parse_no_property(), 913 "ON": lambda self: self._parse_on_property(), 914 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 915 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 916 "PARTITION": lambda self: self._parse_partitioned_of(), 917 "PARTITION BY": lambda self: self._parse_partitioned_by(), 918 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 919 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 920 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 921 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 922 "READS": lambda self: self._parse_reads_property(), 923 "REMOTE": lambda self: self._parse_remote_with_connection(), 924 "RETURNS": lambda self: self._parse_returns(), 925 "STRICT": lambda self: self.expression(exp.StrictProperty), 926 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 927 "ROW": lambda self: self._parse_row(), 928 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 929 "SAMPLE": lambda self: self.expression( 930 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 931 ), 932 "SECURE": lambda self: self.expression(exp.SecureProperty), 933 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 934 "SETTINGS": lambda self: self._parse_settings_property(), 935 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 936 "SORTKEY": lambda self: self._parse_sortkey(), 937 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 938 "STABLE": lambda self: self.expression( 939 exp.StabilityProperty, this=exp.Literal.string("STABLE") 940 ), 941 "STORED": lambda self: self._parse_stored(), 942 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 943 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 944 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 945 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 946 "TO": lambda self: self._parse_to_table(), 947 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 948 "TRANSFORM": lambda self: self.expression( 949 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 950 ), 951 "TTL": lambda self: self._parse_ttl(), 952 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 953 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 954 "VOLATILE": lambda self: self._parse_volatile_property(), 955 "WITH": lambda self: self._parse_with_property(), 956 } 957 958 CONSTRAINT_PARSERS = { 959 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 960 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 961 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 962 "CHARACTER SET": lambda self: self.expression( 963 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 964 ), 965 "CHECK": lambda self: self.expression( 966 exp.CheckColumnConstraint, 967 this=self._parse_wrapped(self._parse_assignment), 968 enforced=self._match_text_seq("ENFORCED"), 969 ), 970 "COLLATE": lambda self: self.expression( 971 exp.CollateColumnConstraint, 972 this=self._parse_identifier() or self._parse_column(), 973 ), 974 "COMMENT": lambda self: self.expression( 975 exp.CommentColumnConstraint, this=self._parse_string() 976 ), 977 "COMPRESS": lambda self: self._parse_compress(), 978 "CLUSTERED": lambda self: self.expression( 979 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 980 ), 981 "NONCLUSTERED": lambda self: self.expression( 982 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 983 ), 984 "DEFAULT": lambda self: self.expression( 985 exp.DefaultColumnConstraint, this=self._parse_bitwise() 986 ), 987 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 988 "EPHEMERAL": lambda self: self.expression( 989 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 990 ), 991 "EXCLUDE": lambda self: self.expression( 992 exp.ExcludeColumnConstraint, this=self._parse_index_params() 993 ), 994 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 995 "FORMAT": lambda self: self.expression( 996 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 997 ), 998 "GENERATED": lambda self: self._parse_generated_as_identity(), 999 "IDENTITY": lambda self: self._parse_auto_increment(), 1000 "INLINE": lambda self: self._parse_inline(), 1001 "LIKE": lambda self: self._parse_create_like(), 1002 "NOT": lambda self: self._parse_not_constraint(), 1003 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1004 "ON": lambda self: ( 1005 self._match(TokenType.UPDATE) 1006 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1007 ) 1008 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1009 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1010 "PERIOD": lambda self: self._parse_period_for_system_time(), 1011 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1012 "REFERENCES": lambda self: self._parse_references(match=False), 1013 "TITLE": lambda self: self.expression( 1014 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1015 ), 1016 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1017 "UNIQUE": lambda self: self._parse_unique(), 1018 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1019 "WITH": lambda self: self.expression( 1020 exp.Properties, expressions=self._parse_wrapped_properties() 1021 ), 1022 } 1023 1024 ALTER_PARSERS = { 1025 "ADD": lambda self: self._parse_alter_table_add(), 1026 "ALTER": lambda self: self._parse_alter_table_alter(), 1027 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1028 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1029 "DROP": lambda self: self._parse_alter_table_drop(), 1030 "RENAME": lambda self: self._parse_alter_table_rename(), 1031 "SET": lambda self: self._parse_alter_table_set(), 1032 "AS": lambda self: self._parse_select(), 1033 } 1034 1035 ALTER_ALTER_PARSERS = { 1036 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1037 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1038 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1039 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1040 } 1041 1042 SCHEMA_UNNAMED_CONSTRAINTS = { 1043 "CHECK", 1044 "EXCLUDE", 1045 "FOREIGN KEY", 1046 "LIKE", 1047 "PERIOD", 1048 "PRIMARY KEY", 1049 "UNIQUE", 1050 } 1051 1052 NO_PAREN_FUNCTION_PARSERS = { 1053 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1054 "CASE": lambda self: self._parse_case(), 1055 "CONNECT_BY_ROOT": lambda self: self.expression( 1056 exp.ConnectByRoot, this=self._parse_column() 1057 ), 1058 "IF": lambda self: self._parse_if(), 1059 "NEXT": lambda self: self._parse_next_value_for(), 1060 } 1061 1062 INVALID_FUNC_NAME_TOKENS = { 1063 TokenType.IDENTIFIER, 1064 TokenType.STRING, 1065 } 1066 1067 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1068 1069 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1070 1071 FUNCTION_PARSERS = { 1072 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1073 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1074 "DECODE": lambda self: self._parse_decode(), 1075 "EXTRACT": lambda self: self._parse_extract(), 1076 "GAP_FILL": lambda self: self._parse_gap_fill(), 1077 "JSON_OBJECT": lambda self: self._parse_json_object(), 1078 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1079 "JSON_TABLE": lambda self: self._parse_json_table(), 1080 "MATCH": lambda self: self._parse_match_against(), 1081 "OPENJSON": lambda self: self._parse_open_json(), 1082 "POSITION": lambda self: self._parse_position(), 1083 "PREDICT": lambda self: self._parse_predict(), 1084 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1085 "STRING_AGG": lambda self: self._parse_string_agg(), 1086 "SUBSTRING": lambda self: self._parse_substring(), 1087 "TRIM": lambda self: self._parse_trim(), 1088 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1089 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1090 } 1091 1092 QUERY_MODIFIER_PARSERS = { 1093 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1094 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1095 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1096 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1097 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1098 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1099 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1100 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1101 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1102 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1103 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1104 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1105 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1106 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1107 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1108 TokenType.CLUSTER_BY: lambda self: ( 1109 "cluster", 1110 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1111 ), 1112 TokenType.DISTRIBUTE_BY: lambda self: ( 1113 "distribute", 1114 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1115 ), 1116 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1117 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1118 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1119 } 1120 1121 SET_PARSERS = { 1122 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1123 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1124 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1125 "TRANSACTION": lambda self: self._parse_set_transaction(), 1126 } 1127 1128 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1129 1130 TYPE_LITERAL_PARSERS = { 1131 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1132 } 1133 1134 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1135 1136 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1137 1138 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1139 1140 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1141 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1142 "ISOLATION": ( 1143 ("LEVEL", "REPEATABLE", "READ"), 1144 ("LEVEL", "READ", "COMMITTED"), 1145 ("LEVEL", "READ", "UNCOMITTED"), 1146 ("LEVEL", "SERIALIZABLE"), 1147 ), 1148 "READ": ("WRITE", "ONLY"), 1149 } 1150 1151 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1152 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1153 ) 1154 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1155 1156 CREATE_SEQUENCE: OPTIONS_TYPE = { 1157 "SCALE": ("EXTEND", "NOEXTEND"), 1158 "SHARD": ("EXTEND", "NOEXTEND"), 1159 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1160 **dict.fromkeys( 1161 ( 1162 "SESSION", 1163 "GLOBAL", 1164 "KEEP", 1165 "NOKEEP", 1166 "ORDER", 1167 "NOORDER", 1168 "NOCACHE", 1169 "CYCLE", 1170 "NOCYCLE", 1171 "NOMINVALUE", 1172 "NOMAXVALUE", 1173 "NOSCALE", 1174 "NOSHARD", 1175 ), 1176 tuple(), 1177 ), 1178 } 1179 1180 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1181 1182 USABLES: OPTIONS_TYPE = dict.fromkeys( 1183 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1184 ) 1185 1186 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1187 1188 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1189 "TYPE": ("EVOLUTION",), 1190 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1191 } 1192 1193 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1194 "NOT": ("ENFORCED",), 1195 "MATCH": ( 1196 "FULL", 1197 "PARTIAL", 1198 "SIMPLE", 1199 ), 1200 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1201 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1202 } 1203 1204 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1205 1206 CLONE_KEYWORDS = {"CLONE", "COPY"} 1207 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1208 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1209 1210 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1211 1212 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1213 1214 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1215 1216 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1217 1218 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1219 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1220 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1221 1222 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1223 1224 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1225 1226 ADD_CONSTRAINT_TOKENS = { 1227 TokenType.CONSTRAINT, 1228 TokenType.FOREIGN_KEY, 1229 TokenType.INDEX, 1230 TokenType.KEY, 1231 TokenType.PRIMARY_KEY, 1232 TokenType.UNIQUE, 1233 } 1234 1235 DISTINCT_TOKENS = {TokenType.DISTINCT} 1236 1237 NULL_TOKENS = {TokenType.NULL} 1238 1239 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1240 1241 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1242 1243 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1244 1245 STRICT_CAST = True 1246 1247 PREFIXED_PIVOT_COLUMNS = False 1248 IDENTIFY_PIVOT_STRINGS = False 1249 1250 LOG_DEFAULTS_TO_LN = False 1251 1252 # Whether ADD is present for each column added by ALTER TABLE 1253 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1254 1255 # Whether the table sample clause expects CSV syntax 1256 TABLESAMPLE_CSV = False 1257 1258 # The default method used for table sampling 1259 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1260 1261 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1262 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1263 1264 # Whether the TRIM function expects the characters to trim as its first argument 1265 TRIM_PATTERN_FIRST = False 1266 1267 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1268 STRING_ALIASES = False 1269 1270 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1271 MODIFIERS_ATTACHED_TO_SET_OP = True 1272 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1273 1274 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1275 NO_PAREN_IF_COMMANDS = True 1276 1277 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1278 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1279 1280 # Whether the `:` operator is used to extract a value from a VARIANT column 1281 COLON_IS_VARIANT_EXTRACT = False 1282 1283 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1284 # If this is True and '(' is not found, the keyword will be treated as an identifier 1285 VALUES_FOLLOWED_BY_PAREN = True 1286 1287 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1288 SUPPORTS_IMPLICIT_UNNEST = False 1289 1290 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1291 INTERVAL_SPANS = True 1292 1293 # Whether a PARTITION clause can follow a table reference 1294 SUPPORTS_PARTITION_SELECTION = False 1295 1296 __slots__ = ( 1297 "error_level", 1298 "error_message_context", 1299 "max_errors", 1300 "dialect", 1301 "sql", 1302 "errors", 1303 "_tokens", 1304 "_index", 1305 "_curr", 1306 "_next", 1307 "_prev", 1308 "_prev_comments", 1309 ) 1310 1311 # Autofilled 1312 SHOW_TRIE: t.Dict = {} 1313 SET_TRIE: t.Dict = {} 1314 1315 def __init__( 1316 self, 1317 error_level: t.Optional[ErrorLevel] = None, 1318 error_message_context: int = 100, 1319 max_errors: int = 3, 1320 dialect: DialectType = None, 1321 ): 1322 from sqlglot.dialects import Dialect 1323 1324 self.error_level = error_level or ErrorLevel.IMMEDIATE 1325 self.error_message_context = error_message_context 1326 self.max_errors = max_errors 1327 self.dialect = Dialect.get_or_raise(dialect) 1328 self.reset() 1329 1330 def reset(self): 1331 self.sql = "" 1332 self.errors = [] 1333 self._tokens = [] 1334 self._index = 0 1335 self._curr = None 1336 self._next = None 1337 self._prev = None 1338 self._prev_comments = None 1339 1340 def parse( 1341 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1342 ) -> t.List[t.Optional[exp.Expression]]: 1343 """ 1344 Parses a list of tokens and returns a list of syntax trees, one tree 1345 per parsed SQL statement. 1346 1347 Args: 1348 raw_tokens: The list of tokens. 1349 sql: The original SQL string, used to produce helpful debug messages. 1350 1351 Returns: 1352 The list of the produced syntax trees. 1353 """ 1354 return self._parse( 1355 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1356 ) 1357 1358 def parse_into( 1359 self, 1360 expression_types: exp.IntoType, 1361 raw_tokens: t.List[Token], 1362 sql: t.Optional[str] = None, 1363 ) -> t.List[t.Optional[exp.Expression]]: 1364 """ 1365 Parses a list of tokens into a given Expression type. If a collection of Expression 1366 types is given instead, this method will try to parse the token list into each one 1367 of them, stopping at the first for which the parsing succeeds. 1368 1369 Args: 1370 expression_types: The expression type(s) to try and parse the token list into. 1371 raw_tokens: The list of tokens. 1372 sql: The original SQL string, used to produce helpful debug messages. 1373 1374 Returns: 1375 The target Expression. 1376 """ 1377 errors = [] 1378 for expression_type in ensure_list(expression_types): 1379 parser = self.EXPRESSION_PARSERS.get(expression_type) 1380 if not parser: 1381 raise TypeError(f"No parser registered for {expression_type}") 1382 1383 try: 1384 return self._parse(parser, raw_tokens, sql) 1385 except ParseError as e: 1386 e.errors[0]["into_expression"] = expression_type 1387 errors.append(e) 1388 1389 raise ParseError( 1390 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1391 errors=merge_errors(errors), 1392 ) from errors[-1] 1393 1394 def _parse( 1395 self, 1396 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1397 raw_tokens: t.List[Token], 1398 sql: t.Optional[str] = None, 1399 ) -> t.List[t.Optional[exp.Expression]]: 1400 self.reset() 1401 self.sql = sql or "" 1402 1403 total = len(raw_tokens) 1404 chunks: t.List[t.List[Token]] = [[]] 1405 1406 for i, token in enumerate(raw_tokens): 1407 if token.token_type == TokenType.SEMICOLON: 1408 if token.comments: 1409 chunks.append([token]) 1410 1411 if i < total - 1: 1412 chunks.append([]) 1413 else: 1414 chunks[-1].append(token) 1415 1416 expressions = [] 1417 1418 for tokens in chunks: 1419 self._index = -1 1420 self._tokens = tokens 1421 self._advance() 1422 1423 expressions.append(parse_method(self)) 1424 1425 if self._index < len(self._tokens): 1426 self.raise_error("Invalid expression / Unexpected token") 1427 1428 self.check_errors() 1429 1430 return expressions 1431 1432 def check_errors(self) -> None: 1433 """Logs or raises any found errors, depending on the chosen error level setting.""" 1434 if self.error_level == ErrorLevel.WARN: 1435 for error in self.errors: 1436 logger.error(str(error)) 1437 elif self.error_level == ErrorLevel.RAISE and self.errors: 1438 raise ParseError( 1439 concat_messages(self.errors, self.max_errors), 1440 errors=merge_errors(self.errors), 1441 ) 1442 1443 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1444 """ 1445 Appends an error in the list of recorded errors or raises it, depending on the chosen 1446 error level setting. 1447 """ 1448 token = token or self._curr or self._prev or Token.string("") 1449 start = token.start 1450 end = token.end + 1 1451 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1452 highlight = self.sql[start:end] 1453 end_context = self.sql[end : end + self.error_message_context] 1454 1455 error = ParseError.new( 1456 f"{message}. Line {token.line}, Col: {token.col}.\n" 1457 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1458 description=message, 1459 line=token.line, 1460 col=token.col, 1461 start_context=start_context, 1462 highlight=highlight, 1463 end_context=end_context, 1464 ) 1465 1466 if self.error_level == ErrorLevel.IMMEDIATE: 1467 raise error 1468 1469 self.errors.append(error) 1470 1471 def expression( 1472 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1473 ) -> E: 1474 """ 1475 Creates a new, validated Expression. 1476 1477 Args: 1478 exp_class: The expression class to instantiate. 1479 comments: An optional list of comments to attach to the expression. 1480 kwargs: The arguments to set for the expression along with their respective values. 1481 1482 Returns: 1483 The target expression. 1484 """ 1485 instance = exp_class(**kwargs) 1486 instance.add_comments(comments) if comments else self._add_comments(instance) 1487 return self.validate_expression(instance) 1488 1489 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1490 if expression and self._prev_comments: 1491 expression.add_comments(self._prev_comments) 1492 self._prev_comments = None 1493 1494 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1495 """ 1496 Validates an Expression, making sure that all its mandatory arguments are set. 1497 1498 Args: 1499 expression: The expression to validate. 1500 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1501 1502 Returns: 1503 The validated expression. 1504 """ 1505 if self.error_level != ErrorLevel.IGNORE: 1506 for error_message in expression.error_messages(args): 1507 self.raise_error(error_message) 1508 1509 return expression 1510 1511 def _find_sql(self, start: Token, end: Token) -> str: 1512 return self.sql[start.start : end.end + 1] 1513 1514 def _is_connected(self) -> bool: 1515 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1516 1517 def _advance(self, times: int = 1) -> None: 1518 self._index += times 1519 self._curr = seq_get(self._tokens, self._index) 1520 self._next = seq_get(self._tokens, self._index + 1) 1521 1522 if self._index > 0: 1523 self._prev = self._tokens[self._index - 1] 1524 self._prev_comments = self._prev.comments 1525 else: 1526 self._prev = None 1527 self._prev_comments = None 1528 1529 def _retreat(self, index: int) -> None: 1530 if index != self._index: 1531 self._advance(index - self._index) 1532 1533 def _warn_unsupported(self) -> None: 1534 if len(self._tokens) <= 1: 1535 return 1536 1537 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1538 # interested in emitting a warning for the one being currently processed. 1539 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1540 1541 logger.warning( 1542 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1543 ) 1544 1545 def _parse_command(self) -> exp.Command: 1546 self._warn_unsupported() 1547 return self.expression( 1548 exp.Command, 1549 comments=self._prev_comments, 1550 this=self._prev.text.upper(), 1551 expression=self._parse_string(), 1552 ) 1553 1554 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1555 """ 1556 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1557 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1558 solve this by setting & resetting the parser state accordingly 1559 """ 1560 index = self._index 1561 error_level = self.error_level 1562 1563 self.error_level = ErrorLevel.IMMEDIATE 1564 try: 1565 this = parse_method() 1566 except ParseError: 1567 this = None 1568 finally: 1569 if not this or retreat: 1570 self._retreat(index) 1571 self.error_level = error_level 1572 1573 return this 1574 1575 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1576 start = self._prev 1577 exists = self._parse_exists() if allow_exists else None 1578 1579 self._match(TokenType.ON) 1580 1581 materialized = self._match_text_seq("MATERIALIZED") 1582 kind = self._match_set(self.CREATABLES) and self._prev 1583 if not kind: 1584 return self._parse_as_command(start) 1585 1586 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1587 this = self._parse_user_defined_function(kind=kind.token_type) 1588 elif kind.token_type == TokenType.TABLE: 1589 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1590 elif kind.token_type == TokenType.COLUMN: 1591 this = self._parse_column() 1592 else: 1593 this = self._parse_id_var() 1594 1595 self._match(TokenType.IS) 1596 1597 return self.expression( 1598 exp.Comment, 1599 this=this, 1600 kind=kind.text, 1601 expression=self._parse_string(), 1602 exists=exists, 1603 materialized=materialized, 1604 ) 1605 1606 def _parse_to_table( 1607 self, 1608 ) -> exp.ToTableProperty: 1609 table = self._parse_table_parts(schema=True) 1610 return self.expression(exp.ToTableProperty, this=table) 1611 1612 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1613 def _parse_ttl(self) -> exp.Expression: 1614 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1615 this = self._parse_bitwise() 1616 1617 if self._match_text_seq("DELETE"): 1618 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1619 if self._match_text_seq("RECOMPRESS"): 1620 return self.expression( 1621 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1622 ) 1623 if self._match_text_seq("TO", "DISK"): 1624 return self.expression( 1625 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1626 ) 1627 if self._match_text_seq("TO", "VOLUME"): 1628 return self.expression( 1629 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1630 ) 1631 1632 return this 1633 1634 expressions = self._parse_csv(_parse_ttl_action) 1635 where = self._parse_where() 1636 group = self._parse_group() 1637 1638 aggregates = None 1639 if group and self._match(TokenType.SET): 1640 aggregates = self._parse_csv(self._parse_set_item) 1641 1642 return self.expression( 1643 exp.MergeTreeTTL, 1644 expressions=expressions, 1645 where=where, 1646 group=group, 1647 aggregates=aggregates, 1648 ) 1649 1650 def _parse_statement(self) -> t.Optional[exp.Expression]: 1651 if self._curr is None: 1652 return None 1653 1654 if self._match_set(self.STATEMENT_PARSERS): 1655 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1656 1657 if self._match_set(self.dialect.tokenizer.COMMANDS): 1658 return self._parse_command() 1659 1660 expression = self._parse_expression() 1661 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1662 return self._parse_query_modifiers(expression) 1663 1664 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1665 start = self._prev 1666 temporary = self._match(TokenType.TEMPORARY) 1667 materialized = self._match_text_seq("MATERIALIZED") 1668 1669 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1670 if not kind: 1671 return self._parse_as_command(start) 1672 1673 if_exists = exists or self._parse_exists() 1674 table = self._parse_table_parts( 1675 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1676 ) 1677 1678 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1679 1680 if self._match(TokenType.L_PAREN, advance=False): 1681 expressions = self._parse_wrapped_csv(self._parse_types) 1682 else: 1683 expressions = None 1684 1685 return self.expression( 1686 exp.Drop, 1687 comments=start.comments, 1688 exists=if_exists, 1689 this=table, 1690 expressions=expressions, 1691 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1692 temporary=temporary, 1693 materialized=materialized, 1694 cascade=self._match_text_seq("CASCADE"), 1695 constraints=self._match_text_seq("CONSTRAINTS"), 1696 purge=self._match_text_seq("PURGE"), 1697 cluster=cluster, 1698 ) 1699 1700 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1701 return ( 1702 self._match_text_seq("IF") 1703 and (not not_ or self._match(TokenType.NOT)) 1704 and self._match(TokenType.EXISTS) 1705 ) 1706 1707 def _parse_create(self) -> exp.Create | exp.Command: 1708 # Note: this can't be None because we've matched a statement parser 1709 start = self._prev 1710 comments = self._prev_comments 1711 1712 replace = ( 1713 start.token_type == TokenType.REPLACE 1714 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1715 or self._match_pair(TokenType.OR, TokenType.ALTER) 1716 ) 1717 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1718 1719 unique = self._match(TokenType.UNIQUE) 1720 1721 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1722 clustered = True 1723 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1724 "COLUMNSTORE" 1725 ): 1726 clustered = False 1727 else: 1728 clustered = None 1729 1730 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1731 self._advance() 1732 1733 properties = None 1734 create_token = self._match_set(self.CREATABLES) and self._prev 1735 1736 if not create_token: 1737 # exp.Properties.Location.POST_CREATE 1738 properties = self._parse_properties() 1739 create_token = self._match_set(self.CREATABLES) and self._prev 1740 1741 if not properties or not create_token: 1742 return self._parse_as_command(start) 1743 1744 concurrently = self._match_text_seq("CONCURRENTLY") 1745 exists = self._parse_exists(not_=True) 1746 this = None 1747 expression: t.Optional[exp.Expression] = None 1748 indexes = None 1749 no_schema_binding = None 1750 begin = None 1751 end = None 1752 clone = None 1753 1754 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1755 nonlocal properties 1756 if properties and temp_props: 1757 properties.expressions.extend(temp_props.expressions) 1758 elif temp_props: 1759 properties = temp_props 1760 1761 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1762 this = self._parse_user_defined_function(kind=create_token.token_type) 1763 1764 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1765 extend_props(self._parse_properties()) 1766 1767 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1768 extend_props(self._parse_properties()) 1769 1770 if not expression: 1771 if self._match(TokenType.COMMAND): 1772 expression = self._parse_as_command(self._prev) 1773 else: 1774 begin = self._match(TokenType.BEGIN) 1775 return_ = self._match_text_seq("RETURN") 1776 1777 if self._match(TokenType.STRING, advance=False): 1778 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1779 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1780 expression = self._parse_string() 1781 extend_props(self._parse_properties()) 1782 else: 1783 expression = self._parse_statement() 1784 1785 end = self._match_text_seq("END") 1786 1787 if return_: 1788 expression = self.expression(exp.Return, this=expression) 1789 elif create_token.token_type == TokenType.INDEX: 1790 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1791 if not self._match(TokenType.ON): 1792 index = self._parse_id_var() 1793 anonymous = False 1794 else: 1795 index = None 1796 anonymous = True 1797 1798 this = self._parse_index(index=index, anonymous=anonymous) 1799 elif create_token.token_type in self.DB_CREATABLES: 1800 table_parts = self._parse_table_parts( 1801 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1802 ) 1803 1804 # exp.Properties.Location.POST_NAME 1805 self._match(TokenType.COMMA) 1806 extend_props(self._parse_properties(before=True)) 1807 1808 this = self._parse_schema(this=table_parts) 1809 1810 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1811 extend_props(self._parse_properties()) 1812 1813 self._match(TokenType.ALIAS) 1814 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1815 # exp.Properties.Location.POST_ALIAS 1816 extend_props(self._parse_properties()) 1817 1818 if create_token.token_type == TokenType.SEQUENCE: 1819 expression = self._parse_types() 1820 extend_props(self._parse_properties()) 1821 else: 1822 expression = self._parse_ddl_select() 1823 1824 if create_token.token_type == TokenType.TABLE: 1825 # exp.Properties.Location.POST_EXPRESSION 1826 extend_props(self._parse_properties()) 1827 1828 indexes = [] 1829 while True: 1830 index = self._parse_index() 1831 1832 # exp.Properties.Location.POST_INDEX 1833 extend_props(self._parse_properties()) 1834 if not index: 1835 break 1836 else: 1837 self._match(TokenType.COMMA) 1838 indexes.append(index) 1839 elif create_token.token_type == TokenType.VIEW: 1840 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1841 no_schema_binding = True 1842 1843 shallow = self._match_text_seq("SHALLOW") 1844 1845 if self._match_texts(self.CLONE_KEYWORDS): 1846 copy = self._prev.text.lower() == "copy" 1847 clone = self.expression( 1848 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1849 ) 1850 1851 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1852 return self._parse_as_command(start) 1853 1854 create_kind_text = create_token.text.upper() 1855 return self.expression( 1856 exp.Create, 1857 comments=comments, 1858 this=this, 1859 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1860 replace=replace, 1861 refresh=refresh, 1862 unique=unique, 1863 expression=expression, 1864 exists=exists, 1865 properties=properties, 1866 indexes=indexes, 1867 no_schema_binding=no_schema_binding, 1868 begin=begin, 1869 end=end, 1870 clone=clone, 1871 concurrently=concurrently, 1872 clustered=clustered, 1873 ) 1874 1875 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1876 seq = exp.SequenceProperties() 1877 1878 options = [] 1879 index = self._index 1880 1881 while self._curr: 1882 self._match(TokenType.COMMA) 1883 if self._match_text_seq("INCREMENT"): 1884 self._match_text_seq("BY") 1885 self._match_text_seq("=") 1886 seq.set("increment", self._parse_term()) 1887 elif self._match_text_seq("MINVALUE"): 1888 seq.set("minvalue", self._parse_term()) 1889 elif self._match_text_seq("MAXVALUE"): 1890 seq.set("maxvalue", self._parse_term()) 1891 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1892 self._match_text_seq("=") 1893 seq.set("start", self._parse_term()) 1894 elif self._match_text_seq("CACHE"): 1895 # T-SQL allows empty CACHE which is initialized dynamically 1896 seq.set("cache", self._parse_number() or True) 1897 elif self._match_text_seq("OWNED", "BY"): 1898 # "OWNED BY NONE" is the default 1899 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1900 else: 1901 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1902 if opt: 1903 options.append(opt) 1904 else: 1905 break 1906 1907 seq.set("options", options if options else None) 1908 return None if self._index == index else seq 1909 1910 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1911 # only used for teradata currently 1912 self._match(TokenType.COMMA) 1913 1914 kwargs = { 1915 "no": self._match_text_seq("NO"), 1916 "dual": self._match_text_seq("DUAL"), 1917 "before": self._match_text_seq("BEFORE"), 1918 "default": self._match_text_seq("DEFAULT"), 1919 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1920 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1921 "after": self._match_text_seq("AFTER"), 1922 "minimum": self._match_texts(("MIN", "MINIMUM")), 1923 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1924 } 1925 1926 if self._match_texts(self.PROPERTY_PARSERS): 1927 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1928 try: 1929 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1930 except TypeError: 1931 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1932 1933 return None 1934 1935 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1936 return self._parse_wrapped_csv(self._parse_property) 1937 1938 def _parse_property(self) -> t.Optional[exp.Expression]: 1939 if self._match_texts(self.PROPERTY_PARSERS): 1940 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1941 1942 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1943 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1944 1945 if self._match_text_seq("COMPOUND", "SORTKEY"): 1946 return self._parse_sortkey(compound=True) 1947 1948 if self._match_text_seq("SQL", "SECURITY"): 1949 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1950 1951 index = self._index 1952 key = self._parse_column() 1953 1954 if not self._match(TokenType.EQ): 1955 self._retreat(index) 1956 return self._parse_sequence_properties() 1957 1958 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1959 if isinstance(key, exp.Column): 1960 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1961 1962 value = self._parse_bitwise() or self._parse_var(any_token=True) 1963 1964 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1965 if isinstance(value, exp.Column): 1966 value = exp.var(value.name) 1967 1968 return self.expression(exp.Property, this=key, value=value) 1969 1970 def _parse_stored(self) -> exp.FileFormatProperty: 1971 self._match(TokenType.ALIAS) 1972 1973 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1974 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1975 1976 return self.expression( 1977 exp.FileFormatProperty, 1978 this=( 1979 self.expression( 1980 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1981 ) 1982 if input_format or output_format 1983 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1984 ), 1985 ) 1986 1987 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1988 field = self._parse_field() 1989 if isinstance(field, exp.Identifier) and not field.quoted: 1990 field = exp.var(field) 1991 1992 return field 1993 1994 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1995 self._match(TokenType.EQ) 1996 self._match(TokenType.ALIAS) 1997 1998 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1999 2000 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2001 properties = [] 2002 while True: 2003 if before: 2004 prop = self._parse_property_before() 2005 else: 2006 prop = self._parse_property() 2007 if not prop: 2008 break 2009 for p in ensure_list(prop): 2010 properties.append(p) 2011 2012 if properties: 2013 return self.expression(exp.Properties, expressions=properties) 2014 2015 return None 2016 2017 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2018 return self.expression( 2019 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2020 ) 2021 2022 def _parse_settings_property(self) -> exp.SettingsProperty: 2023 return self.expression( 2024 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2025 ) 2026 2027 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2028 if self._index >= 2: 2029 pre_volatile_token = self._tokens[self._index - 2] 2030 else: 2031 pre_volatile_token = None 2032 2033 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2034 return exp.VolatileProperty() 2035 2036 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2037 2038 def _parse_retention_period(self) -> exp.Var: 2039 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2040 number = self._parse_number() 2041 number_str = f"{number} " if number else "" 2042 unit = self._parse_var(any_token=True) 2043 return exp.var(f"{number_str}{unit}") 2044 2045 def _parse_system_versioning_property( 2046 self, with_: bool = False 2047 ) -> exp.WithSystemVersioningProperty: 2048 self._match(TokenType.EQ) 2049 prop = self.expression( 2050 exp.WithSystemVersioningProperty, 2051 **{ # type: ignore 2052 "on": True, 2053 "with": with_, 2054 }, 2055 ) 2056 2057 if self._match_text_seq("OFF"): 2058 prop.set("on", False) 2059 return prop 2060 2061 self._match(TokenType.ON) 2062 if self._match(TokenType.L_PAREN): 2063 while self._curr and not self._match(TokenType.R_PAREN): 2064 if self._match_text_seq("HISTORY_TABLE", "="): 2065 prop.set("this", self._parse_table_parts()) 2066 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2067 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2068 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2069 prop.set("retention_period", self._parse_retention_period()) 2070 2071 self._match(TokenType.COMMA) 2072 2073 return prop 2074 2075 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2076 self._match(TokenType.EQ) 2077 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2078 prop = self.expression(exp.DataDeletionProperty, on=on) 2079 2080 if self._match(TokenType.L_PAREN): 2081 while self._curr and not self._match(TokenType.R_PAREN): 2082 if self._match_text_seq("FILTER_COLUMN", "="): 2083 prop.set("filter_column", self._parse_column()) 2084 elif self._match_text_seq("RETENTION_PERIOD", "="): 2085 prop.set("retention_period", self._parse_retention_period()) 2086 2087 self._match(TokenType.COMMA) 2088 2089 return prop 2090 2091 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2092 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2093 prop = self._parse_system_versioning_property(with_=True) 2094 self._match_r_paren() 2095 return prop 2096 2097 if self._match(TokenType.L_PAREN, advance=False): 2098 return self._parse_wrapped_properties() 2099 2100 if self._match_text_seq("JOURNAL"): 2101 return self._parse_withjournaltable() 2102 2103 if self._match_texts(self.VIEW_ATTRIBUTES): 2104 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2105 2106 if self._match_text_seq("DATA"): 2107 return self._parse_withdata(no=False) 2108 elif self._match_text_seq("NO", "DATA"): 2109 return self._parse_withdata(no=True) 2110 2111 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2112 return self._parse_serde_properties(with_=True) 2113 2114 if self._match(TokenType.SCHEMA): 2115 return self.expression( 2116 exp.WithSchemaBindingProperty, 2117 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2118 ) 2119 2120 if not self._next: 2121 return None 2122 2123 return self._parse_withisolatedloading() 2124 2125 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2126 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2127 self._match(TokenType.EQ) 2128 2129 user = self._parse_id_var() 2130 self._match(TokenType.PARAMETER) 2131 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2132 2133 if not user or not host: 2134 return None 2135 2136 return exp.DefinerProperty(this=f"{user}@{host}") 2137 2138 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2139 self._match(TokenType.TABLE) 2140 self._match(TokenType.EQ) 2141 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2142 2143 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2144 return self.expression(exp.LogProperty, no=no) 2145 2146 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2147 return self.expression(exp.JournalProperty, **kwargs) 2148 2149 def _parse_checksum(self) -> exp.ChecksumProperty: 2150 self._match(TokenType.EQ) 2151 2152 on = None 2153 if self._match(TokenType.ON): 2154 on = True 2155 elif self._match_text_seq("OFF"): 2156 on = False 2157 2158 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2159 2160 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2161 return self.expression( 2162 exp.Cluster, 2163 expressions=( 2164 self._parse_wrapped_csv(self._parse_ordered) 2165 if wrapped 2166 else self._parse_csv(self._parse_ordered) 2167 ), 2168 ) 2169 2170 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2171 self._match_text_seq("BY") 2172 2173 self._match_l_paren() 2174 expressions = self._parse_csv(self._parse_column) 2175 self._match_r_paren() 2176 2177 if self._match_text_seq("SORTED", "BY"): 2178 self._match_l_paren() 2179 sorted_by = self._parse_csv(self._parse_ordered) 2180 self._match_r_paren() 2181 else: 2182 sorted_by = None 2183 2184 self._match(TokenType.INTO) 2185 buckets = self._parse_number() 2186 self._match_text_seq("BUCKETS") 2187 2188 return self.expression( 2189 exp.ClusteredByProperty, 2190 expressions=expressions, 2191 sorted_by=sorted_by, 2192 buckets=buckets, 2193 ) 2194 2195 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2196 if not self._match_text_seq("GRANTS"): 2197 self._retreat(self._index - 1) 2198 return None 2199 2200 return self.expression(exp.CopyGrantsProperty) 2201 2202 def _parse_freespace(self) -> exp.FreespaceProperty: 2203 self._match(TokenType.EQ) 2204 return self.expression( 2205 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2206 ) 2207 2208 def _parse_mergeblockratio( 2209 self, no: bool = False, default: bool = False 2210 ) -> exp.MergeBlockRatioProperty: 2211 if self._match(TokenType.EQ): 2212 return self.expression( 2213 exp.MergeBlockRatioProperty, 2214 this=self._parse_number(), 2215 percent=self._match(TokenType.PERCENT), 2216 ) 2217 2218 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2219 2220 def _parse_datablocksize( 2221 self, 2222 default: t.Optional[bool] = None, 2223 minimum: t.Optional[bool] = None, 2224 maximum: t.Optional[bool] = None, 2225 ) -> exp.DataBlocksizeProperty: 2226 self._match(TokenType.EQ) 2227 size = self._parse_number() 2228 2229 units = None 2230 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2231 units = self._prev.text 2232 2233 return self.expression( 2234 exp.DataBlocksizeProperty, 2235 size=size, 2236 units=units, 2237 default=default, 2238 minimum=minimum, 2239 maximum=maximum, 2240 ) 2241 2242 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2243 self._match(TokenType.EQ) 2244 always = self._match_text_seq("ALWAYS") 2245 manual = self._match_text_seq("MANUAL") 2246 never = self._match_text_seq("NEVER") 2247 default = self._match_text_seq("DEFAULT") 2248 2249 autotemp = None 2250 if self._match_text_seq("AUTOTEMP"): 2251 autotemp = self._parse_schema() 2252 2253 return self.expression( 2254 exp.BlockCompressionProperty, 2255 always=always, 2256 manual=manual, 2257 never=never, 2258 default=default, 2259 autotemp=autotemp, 2260 ) 2261 2262 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2263 index = self._index 2264 no = self._match_text_seq("NO") 2265 concurrent = self._match_text_seq("CONCURRENT") 2266 2267 if not self._match_text_seq("ISOLATED", "LOADING"): 2268 self._retreat(index) 2269 return None 2270 2271 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2272 return self.expression( 2273 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2274 ) 2275 2276 def _parse_locking(self) -> exp.LockingProperty: 2277 if self._match(TokenType.TABLE): 2278 kind = "TABLE" 2279 elif self._match(TokenType.VIEW): 2280 kind = "VIEW" 2281 elif self._match(TokenType.ROW): 2282 kind = "ROW" 2283 elif self._match_text_seq("DATABASE"): 2284 kind = "DATABASE" 2285 else: 2286 kind = None 2287 2288 if kind in ("DATABASE", "TABLE", "VIEW"): 2289 this = self._parse_table_parts() 2290 else: 2291 this = None 2292 2293 if self._match(TokenType.FOR): 2294 for_or_in = "FOR" 2295 elif self._match(TokenType.IN): 2296 for_or_in = "IN" 2297 else: 2298 for_or_in = None 2299 2300 if self._match_text_seq("ACCESS"): 2301 lock_type = "ACCESS" 2302 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2303 lock_type = "EXCLUSIVE" 2304 elif self._match_text_seq("SHARE"): 2305 lock_type = "SHARE" 2306 elif self._match_text_seq("READ"): 2307 lock_type = "READ" 2308 elif self._match_text_seq("WRITE"): 2309 lock_type = "WRITE" 2310 elif self._match_text_seq("CHECKSUM"): 2311 lock_type = "CHECKSUM" 2312 else: 2313 lock_type = None 2314 2315 override = self._match_text_seq("OVERRIDE") 2316 2317 return self.expression( 2318 exp.LockingProperty, 2319 this=this, 2320 kind=kind, 2321 for_or_in=for_or_in, 2322 lock_type=lock_type, 2323 override=override, 2324 ) 2325 2326 def _parse_partition_by(self) -> t.List[exp.Expression]: 2327 if self._match(TokenType.PARTITION_BY): 2328 return self._parse_csv(self._parse_assignment) 2329 return [] 2330 2331 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2332 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2333 if self._match_text_seq("MINVALUE"): 2334 return exp.var("MINVALUE") 2335 if self._match_text_seq("MAXVALUE"): 2336 return exp.var("MAXVALUE") 2337 return self._parse_bitwise() 2338 2339 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2340 expression = None 2341 from_expressions = None 2342 to_expressions = None 2343 2344 if self._match(TokenType.IN): 2345 this = self._parse_wrapped_csv(self._parse_bitwise) 2346 elif self._match(TokenType.FROM): 2347 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2348 self._match_text_seq("TO") 2349 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2350 elif self._match_text_seq("WITH", "(", "MODULUS"): 2351 this = self._parse_number() 2352 self._match_text_seq(",", "REMAINDER") 2353 expression = self._parse_number() 2354 self._match_r_paren() 2355 else: 2356 self.raise_error("Failed to parse partition bound spec.") 2357 2358 return self.expression( 2359 exp.PartitionBoundSpec, 2360 this=this, 2361 expression=expression, 2362 from_expressions=from_expressions, 2363 to_expressions=to_expressions, 2364 ) 2365 2366 # https://www.postgresql.org/docs/current/sql-createtable.html 2367 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2368 if not self._match_text_seq("OF"): 2369 self._retreat(self._index - 1) 2370 return None 2371 2372 this = self._parse_table(schema=True) 2373 2374 if self._match(TokenType.DEFAULT): 2375 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2376 elif self._match_text_seq("FOR", "VALUES"): 2377 expression = self._parse_partition_bound_spec() 2378 else: 2379 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2380 2381 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2382 2383 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2384 self._match(TokenType.EQ) 2385 return self.expression( 2386 exp.PartitionedByProperty, 2387 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2388 ) 2389 2390 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2391 if self._match_text_seq("AND", "STATISTICS"): 2392 statistics = True 2393 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2394 statistics = False 2395 else: 2396 statistics = None 2397 2398 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2399 2400 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2401 if self._match_text_seq("SQL"): 2402 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2403 return None 2404 2405 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2406 if self._match_text_seq("SQL", "DATA"): 2407 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2408 return None 2409 2410 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2411 if self._match_text_seq("PRIMARY", "INDEX"): 2412 return exp.NoPrimaryIndexProperty() 2413 if self._match_text_seq("SQL"): 2414 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2415 return None 2416 2417 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2418 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2419 return exp.OnCommitProperty() 2420 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2421 return exp.OnCommitProperty(delete=True) 2422 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2423 2424 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2425 if self._match_text_seq("SQL", "DATA"): 2426 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2427 return None 2428 2429 def _parse_distkey(self) -> exp.DistKeyProperty: 2430 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2431 2432 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2433 table = self._parse_table(schema=True) 2434 2435 options = [] 2436 while self._match_texts(("INCLUDING", "EXCLUDING")): 2437 this = self._prev.text.upper() 2438 2439 id_var = self._parse_id_var() 2440 if not id_var: 2441 return None 2442 2443 options.append( 2444 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2445 ) 2446 2447 return self.expression(exp.LikeProperty, this=table, expressions=options) 2448 2449 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2450 return self.expression( 2451 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2452 ) 2453 2454 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2455 self._match(TokenType.EQ) 2456 return self.expression( 2457 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2458 ) 2459 2460 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2461 self._match_text_seq("WITH", "CONNECTION") 2462 return self.expression( 2463 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2464 ) 2465 2466 def _parse_returns(self) -> exp.ReturnsProperty: 2467 value: t.Optional[exp.Expression] 2468 null = None 2469 is_table = self._match(TokenType.TABLE) 2470 2471 if is_table: 2472 if self._match(TokenType.LT): 2473 value = self.expression( 2474 exp.Schema, 2475 this="TABLE", 2476 expressions=self._parse_csv(self._parse_struct_types), 2477 ) 2478 if not self._match(TokenType.GT): 2479 self.raise_error("Expecting >") 2480 else: 2481 value = self._parse_schema(exp.var("TABLE")) 2482 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2483 null = True 2484 value = None 2485 else: 2486 value = self._parse_types() 2487 2488 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2489 2490 def _parse_describe(self) -> exp.Describe: 2491 kind = self._match_set(self.CREATABLES) and self._prev.text 2492 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2493 if self._match(TokenType.DOT): 2494 style = None 2495 self._retreat(self._index - 2) 2496 this = self._parse_table(schema=True) 2497 properties = self._parse_properties() 2498 expressions = properties.expressions if properties else None 2499 return self.expression( 2500 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2501 ) 2502 2503 def _parse_insert(self) -> exp.Insert: 2504 comments = ensure_list(self._prev_comments) 2505 hint = self._parse_hint() 2506 overwrite = self._match(TokenType.OVERWRITE) 2507 ignore = self._match(TokenType.IGNORE) 2508 local = self._match_text_seq("LOCAL") 2509 alternative = None 2510 is_function = None 2511 2512 if self._match_text_seq("DIRECTORY"): 2513 this: t.Optional[exp.Expression] = self.expression( 2514 exp.Directory, 2515 this=self._parse_var_or_string(), 2516 local=local, 2517 row_format=self._parse_row_format(match_row=True), 2518 ) 2519 else: 2520 if self._match(TokenType.OR): 2521 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2522 2523 self._match(TokenType.INTO) 2524 comments += ensure_list(self._prev_comments) 2525 self._match(TokenType.TABLE) 2526 is_function = self._match(TokenType.FUNCTION) 2527 2528 this = ( 2529 self._parse_table(schema=True, parse_partition=True) 2530 if not is_function 2531 else self._parse_function() 2532 ) 2533 2534 returning = self._parse_returning() 2535 2536 return self.expression( 2537 exp.Insert, 2538 comments=comments, 2539 hint=hint, 2540 is_function=is_function, 2541 this=this, 2542 stored=self._match_text_seq("STORED") and self._parse_stored(), 2543 by_name=self._match_text_seq("BY", "NAME"), 2544 exists=self._parse_exists(), 2545 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2546 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2547 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2548 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2549 conflict=self._parse_on_conflict(), 2550 returning=returning or self._parse_returning(), 2551 overwrite=overwrite, 2552 alternative=alternative, 2553 ignore=ignore, 2554 ) 2555 2556 def _parse_kill(self) -> exp.Kill: 2557 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2558 2559 return self.expression( 2560 exp.Kill, 2561 this=self._parse_primary(), 2562 kind=kind, 2563 ) 2564 2565 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2566 conflict = self._match_text_seq("ON", "CONFLICT") 2567 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2568 2569 if not conflict and not duplicate: 2570 return None 2571 2572 conflict_keys = None 2573 constraint = None 2574 2575 if conflict: 2576 if self._match_text_seq("ON", "CONSTRAINT"): 2577 constraint = self._parse_id_var() 2578 elif self._match(TokenType.L_PAREN): 2579 conflict_keys = self._parse_csv(self._parse_id_var) 2580 self._match_r_paren() 2581 2582 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2583 if self._prev.token_type == TokenType.UPDATE: 2584 self._match(TokenType.SET) 2585 expressions = self._parse_csv(self._parse_equality) 2586 else: 2587 expressions = None 2588 2589 return self.expression( 2590 exp.OnConflict, 2591 duplicate=duplicate, 2592 expressions=expressions, 2593 action=action, 2594 conflict_keys=conflict_keys, 2595 constraint=constraint, 2596 ) 2597 2598 def _parse_returning(self) -> t.Optional[exp.Returning]: 2599 if not self._match(TokenType.RETURNING): 2600 return None 2601 return self.expression( 2602 exp.Returning, 2603 expressions=self._parse_csv(self._parse_expression), 2604 into=self._match(TokenType.INTO) and self._parse_table_part(), 2605 ) 2606 2607 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2608 if not self._match(TokenType.FORMAT): 2609 return None 2610 return self._parse_row_format() 2611 2612 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2613 index = self._index 2614 with_ = with_ or self._match_text_seq("WITH") 2615 2616 if not self._match(TokenType.SERDE_PROPERTIES): 2617 self._retreat(index) 2618 return None 2619 return self.expression( 2620 exp.SerdeProperties, 2621 **{ # type: ignore 2622 "expressions": self._parse_wrapped_properties(), 2623 "with": with_, 2624 }, 2625 ) 2626 2627 def _parse_row_format( 2628 self, match_row: bool = False 2629 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2630 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2631 return None 2632 2633 if self._match_text_seq("SERDE"): 2634 this = self._parse_string() 2635 2636 serde_properties = self._parse_serde_properties() 2637 2638 return self.expression( 2639 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2640 ) 2641 2642 self._match_text_seq("DELIMITED") 2643 2644 kwargs = {} 2645 2646 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2647 kwargs["fields"] = self._parse_string() 2648 if self._match_text_seq("ESCAPED", "BY"): 2649 kwargs["escaped"] = self._parse_string() 2650 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2651 kwargs["collection_items"] = self._parse_string() 2652 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2653 kwargs["map_keys"] = self._parse_string() 2654 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2655 kwargs["lines"] = self._parse_string() 2656 if self._match_text_seq("NULL", "DEFINED", "AS"): 2657 kwargs["null"] = self._parse_string() 2658 2659 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2660 2661 def _parse_load(self) -> exp.LoadData | exp.Command: 2662 if self._match_text_seq("DATA"): 2663 local = self._match_text_seq("LOCAL") 2664 self._match_text_seq("INPATH") 2665 inpath = self._parse_string() 2666 overwrite = self._match(TokenType.OVERWRITE) 2667 self._match_pair(TokenType.INTO, TokenType.TABLE) 2668 2669 return self.expression( 2670 exp.LoadData, 2671 this=self._parse_table(schema=True), 2672 local=local, 2673 overwrite=overwrite, 2674 inpath=inpath, 2675 partition=self._parse_partition(), 2676 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2677 serde=self._match_text_seq("SERDE") and self._parse_string(), 2678 ) 2679 return self._parse_as_command(self._prev) 2680 2681 def _parse_delete(self) -> exp.Delete: 2682 # This handles MySQL's "Multiple-Table Syntax" 2683 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2684 tables = None 2685 comments = self._prev_comments 2686 if not self._match(TokenType.FROM, advance=False): 2687 tables = self._parse_csv(self._parse_table) or None 2688 2689 returning = self._parse_returning() 2690 2691 return self.expression( 2692 exp.Delete, 2693 comments=comments, 2694 tables=tables, 2695 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2696 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2697 where=self._parse_where(), 2698 returning=returning or self._parse_returning(), 2699 limit=self._parse_limit(), 2700 ) 2701 2702 def _parse_update(self) -> exp.Update: 2703 comments = self._prev_comments 2704 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2705 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2706 returning = self._parse_returning() 2707 return self.expression( 2708 exp.Update, 2709 comments=comments, 2710 **{ # type: ignore 2711 "this": this, 2712 "expressions": expressions, 2713 "from": self._parse_from(joins=True), 2714 "where": self._parse_where(), 2715 "returning": returning or self._parse_returning(), 2716 "order": self._parse_order(), 2717 "limit": self._parse_limit(), 2718 }, 2719 ) 2720 2721 def _parse_uncache(self) -> exp.Uncache: 2722 if not self._match(TokenType.TABLE): 2723 self.raise_error("Expecting TABLE after UNCACHE") 2724 2725 return self.expression( 2726 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2727 ) 2728 2729 def _parse_cache(self) -> exp.Cache: 2730 lazy = self._match_text_seq("LAZY") 2731 self._match(TokenType.TABLE) 2732 table = self._parse_table(schema=True) 2733 2734 options = [] 2735 if self._match_text_seq("OPTIONS"): 2736 self._match_l_paren() 2737 k = self._parse_string() 2738 self._match(TokenType.EQ) 2739 v = self._parse_string() 2740 options = [k, v] 2741 self._match_r_paren() 2742 2743 self._match(TokenType.ALIAS) 2744 return self.expression( 2745 exp.Cache, 2746 this=table, 2747 lazy=lazy, 2748 options=options, 2749 expression=self._parse_select(nested=True), 2750 ) 2751 2752 def _parse_partition(self) -> t.Optional[exp.Partition]: 2753 if not self._match(TokenType.PARTITION): 2754 return None 2755 2756 return self.expression( 2757 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2758 ) 2759 2760 def _parse_value(self) -> t.Optional[exp.Tuple]: 2761 if self._match(TokenType.L_PAREN): 2762 expressions = self._parse_csv(self._parse_expression) 2763 self._match_r_paren() 2764 return self.expression(exp.Tuple, expressions=expressions) 2765 2766 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2767 expression = self._parse_expression() 2768 if expression: 2769 return self.expression(exp.Tuple, expressions=[expression]) 2770 return None 2771 2772 def _parse_projections(self) -> t.List[exp.Expression]: 2773 return self._parse_expressions() 2774 2775 def _parse_select( 2776 self, 2777 nested: bool = False, 2778 table: bool = False, 2779 parse_subquery_alias: bool = True, 2780 parse_set_operation: bool = True, 2781 ) -> t.Optional[exp.Expression]: 2782 cte = self._parse_with() 2783 2784 if cte: 2785 this = self._parse_statement() 2786 2787 if not this: 2788 self.raise_error("Failed to parse any statement following CTE") 2789 return cte 2790 2791 if "with" in this.arg_types: 2792 this.set("with", cte) 2793 else: 2794 self.raise_error(f"{this.key} does not support CTE") 2795 this = cte 2796 2797 return this 2798 2799 # duckdb supports leading with FROM x 2800 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2801 2802 if self._match(TokenType.SELECT): 2803 comments = self._prev_comments 2804 2805 hint = self._parse_hint() 2806 2807 if self._next and not self._next.token_type == TokenType.DOT: 2808 all_ = self._match(TokenType.ALL) 2809 distinct = self._match_set(self.DISTINCT_TOKENS) 2810 else: 2811 all_, distinct = None, None 2812 2813 kind = ( 2814 self._match(TokenType.ALIAS) 2815 and self._match_texts(("STRUCT", "VALUE")) 2816 and self._prev.text.upper() 2817 ) 2818 2819 if distinct: 2820 distinct = self.expression( 2821 exp.Distinct, 2822 on=self._parse_value() if self._match(TokenType.ON) else None, 2823 ) 2824 2825 if all_ and distinct: 2826 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2827 2828 limit = self._parse_limit(top=True) 2829 projections = self._parse_projections() 2830 2831 this = self.expression( 2832 exp.Select, 2833 kind=kind, 2834 hint=hint, 2835 distinct=distinct, 2836 expressions=projections, 2837 limit=limit, 2838 ) 2839 this.comments = comments 2840 2841 into = self._parse_into() 2842 if into: 2843 this.set("into", into) 2844 2845 if not from_: 2846 from_ = self._parse_from() 2847 2848 if from_: 2849 this.set("from", from_) 2850 2851 this = self._parse_query_modifiers(this) 2852 elif (table or nested) and self._match(TokenType.L_PAREN): 2853 if self._match(TokenType.PIVOT): 2854 this = self._parse_simplified_pivot() 2855 elif self._match(TokenType.FROM): 2856 this = exp.select("*").from_( 2857 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2858 ) 2859 else: 2860 this = ( 2861 self._parse_table() 2862 if table 2863 else self._parse_select(nested=True, parse_set_operation=False) 2864 ) 2865 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2866 2867 self._match_r_paren() 2868 2869 # We return early here so that the UNION isn't attached to the subquery by the 2870 # following call to _parse_set_operations, but instead becomes the parent node 2871 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2872 elif self._match(TokenType.VALUES, advance=False): 2873 this = self._parse_derived_table_values() 2874 elif from_: 2875 this = exp.select("*").from_(from_.this, copy=False) 2876 elif self._match(TokenType.SUMMARIZE): 2877 table = self._match(TokenType.TABLE) 2878 this = self._parse_select() or self._parse_string() or self._parse_table() 2879 return self.expression(exp.Summarize, this=this, table=table) 2880 elif self._match(TokenType.DESCRIBE): 2881 this = self._parse_describe() 2882 elif self._match_text_seq("STREAM"): 2883 this = self.expression(exp.Stream, this=self._parse_function()) 2884 else: 2885 this = None 2886 2887 return self._parse_set_operations(this) if parse_set_operation else this 2888 2889 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2890 if not skip_with_token and not self._match(TokenType.WITH): 2891 return None 2892 2893 comments = self._prev_comments 2894 recursive = self._match(TokenType.RECURSIVE) 2895 2896 expressions = [] 2897 while True: 2898 expressions.append(self._parse_cte()) 2899 2900 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2901 break 2902 else: 2903 self._match(TokenType.WITH) 2904 2905 return self.expression( 2906 exp.With, comments=comments, expressions=expressions, recursive=recursive 2907 ) 2908 2909 def _parse_cte(self) -> exp.CTE: 2910 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2911 if not alias or not alias.this: 2912 self.raise_error("Expected CTE to have alias") 2913 2914 self._match(TokenType.ALIAS) 2915 comments = self._prev_comments 2916 2917 if self._match_text_seq("NOT", "MATERIALIZED"): 2918 materialized = False 2919 elif self._match_text_seq("MATERIALIZED"): 2920 materialized = True 2921 else: 2922 materialized = None 2923 2924 return self.expression( 2925 exp.CTE, 2926 this=self._parse_wrapped(self._parse_statement), 2927 alias=alias, 2928 materialized=materialized, 2929 comments=comments, 2930 ) 2931 2932 def _parse_table_alias( 2933 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2934 ) -> t.Optional[exp.TableAlias]: 2935 any_token = self._match(TokenType.ALIAS) 2936 alias = ( 2937 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2938 or self._parse_string_as_identifier() 2939 ) 2940 2941 index = self._index 2942 if self._match(TokenType.L_PAREN): 2943 columns = self._parse_csv(self._parse_function_parameter) 2944 self._match_r_paren() if columns else self._retreat(index) 2945 else: 2946 columns = None 2947 2948 if not alias and not columns: 2949 return None 2950 2951 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2952 2953 # We bubble up comments from the Identifier to the TableAlias 2954 if isinstance(alias, exp.Identifier): 2955 table_alias.add_comments(alias.pop_comments()) 2956 2957 return table_alias 2958 2959 def _parse_subquery( 2960 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2961 ) -> t.Optional[exp.Subquery]: 2962 if not this: 2963 return None 2964 2965 return self.expression( 2966 exp.Subquery, 2967 this=this, 2968 pivots=self._parse_pivots(), 2969 alias=self._parse_table_alias() if parse_alias else None, 2970 ) 2971 2972 def _implicit_unnests_to_explicit(self, this: E) -> E: 2973 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2974 2975 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2976 for i, join in enumerate(this.args.get("joins") or []): 2977 table = join.this 2978 normalized_table = table.copy() 2979 normalized_table.meta["maybe_column"] = True 2980 normalized_table = _norm(normalized_table, dialect=self.dialect) 2981 2982 if isinstance(table, exp.Table) and not join.args.get("on"): 2983 if normalized_table.parts[0].name in refs: 2984 table_as_column = table.to_column() 2985 unnest = exp.Unnest(expressions=[table_as_column]) 2986 2987 # Table.to_column creates a parent Alias node that we want to convert to 2988 # a TableAlias and attach to the Unnest, so it matches the parser's output 2989 if isinstance(table.args.get("alias"), exp.TableAlias): 2990 table_as_column.replace(table_as_column.this) 2991 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2992 2993 table.replace(unnest) 2994 2995 refs.add(normalized_table.alias_or_name) 2996 2997 return this 2998 2999 def _parse_query_modifiers( 3000 self, this: t.Optional[exp.Expression] 3001 ) -> t.Optional[exp.Expression]: 3002 if isinstance(this, (exp.Query, exp.Table)): 3003 for join in self._parse_joins(): 3004 this.append("joins", join) 3005 for lateral in iter(self._parse_lateral, None): 3006 this.append("laterals", lateral) 3007 3008 while True: 3009 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3010 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3011 key, expression = parser(self) 3012 3013 if expression: 3014 this.set(key, expression) 3015 if key == "limit": 3016 offset = expression.args.pop("offset", None) 3017 3018 if offset: 3019 offset = exp.Offset(expression=offset) 3020 this.set("offset", offset) 3021 3022 limit_by_expressions = expression.expressions 3023 expression.set("expressions", None) 3024 offset.set("expressions", limit_by_expressions) 3025 continue 3026 break 3027 3028 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3029 this = self._implicit_unnests_to_explicit(this) 3030 3031 return this 3032 3033 def _parse_hint(self) -> t.Optional[exp.Hint]: 3034 if self._match(TokenType.HINT): 3035 hints = [] 3036 for hint in iter( 3037 lambda: self._parse_csv( 3038 lambda: self._parse_function() or self._parse_var(upper=True) 3039 ), 3040 [], 3041 ): 3042 hints.extend(hint) 3043 3044 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3045 self.raise_error("Expected */ after HINT") 3046 3047 return self.expression(exp.Hint, expressions=hints) 3048 3049 return None 3050 3051 def _parse_into(self) -> t.Optional[exp.Into]: 3052 if not self._match(TokenType.INTO): 3053 return None 3054 3055 temp = self._match(TokenType.TEMPORARY) 3056 unlogged = self._match_text_seq("UNLOGGED") 3057 self._match(TokenType.TABLE) 3058 3059 return self.expression( 3060 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3061 ) 3062 3063 def _parse_from( 3064 self, joins: bool = False, skip_from_token: bool = False 3065 ) -> t.Optional[exp.From]: 3066 if not skip_from_token and not self._match(TokenType.FROM): 3067 return None 3068 3069 return self.expression( 3070 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3071 ) 3072 3073 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3074 return self.expression( 3075 exp.MatchRecognizeMeasure, 3076 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3077 this=self._parse_expression(), 3078 ) 3079 3080 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3081 if not self._match(TokenType.MATCH_RECOGNIZE): 3082 return None 3083 3084 self._match_l_paren() 3085 3086 partition = self._parse_partition_by() 3087 order = self._parse_order() 3088 3089 measures = ( 3090 self._parse_csv(self._parse_match_recognize_measure) 3091 if self._match_text_seq("MEASURES") 3092 else None 3093 ) 3094 3095 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3096 rows = exp.var("ONE ROW PER MATCH") 3097 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3098 text = "ALL ROWS PER MATCH" 3099 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3100 text += " SHOW EMPTY MATCHES" 3101 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3102 text += " OMIT EMPTY MATCHES" 3103 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3104 text += " WITH UNMATCHED ROWS" 3105 rows = exp.var(text) 3106 else: 3107 rows = None 3108 3109 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3110 text = "AFTER MATCH SKIP" 3111 if self._match_text_seq("PAST", "LAST", "ROW"): 3112 text += " PAST LAST ROW" 3113 elif self._match_text_seq("TO", "NEXT", "ROW"): 3114 text += " TO NEXT ROW" 3115 elif self._match_text_seq("TO", "FIRST"): 3116 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3117 elif self._match_text_seq("TO", "LAST"): 3118 text += f" TO LAST {self._advance_any().text}" # type: ignore 3119 after = exp.var(text) 3120 else: 3121 after = None 3122 3123 if self._match_text_seq("PATTERN"): 3124 self._match_l_paren() 3125 3126 if not self._curr: 3127 self.raise_error("Expecting )", self._curr) 3128 3129 paren = 1 3130 start = self._curr 3131 3132 while self._curr and paren > 0: 3133 if self._curr.token_type == TokenType.L_PAREN: 3134 paren += 1 3135 if self._curr.token_type == TokenType.R_PAREN: 3136 paren -= 1 3137 3138 end = self._prev 3139 self._advance() 3140 3141 if paren > 0: 3142 self.raise_error("Expecting )", self._curr) 3143 3144 pattern = exp.var(self._find_sql(start, end)) 3145 else: 3146 pattern = None 3147 3148 define = ( 3149 self._parse_csv(self._parse_name_as_expression) 3150 if self._match_text_seq("DEFINE") 3151 else None 3152 ) 3153 3154 self._match_r_paren() 3155 3156 return self.expression( 3157 exp.MatchRecognize, 3158 partition_by=partition, 3159 order=order, 3160 measures=measures, 3161 rows=rows, 3162 after=after, 3163 pattern=pattern, 3164 define=define, 3165 alias=self._parse_table_alias(), 3166 ) 3167 3168 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3169 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3170 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3171 cross_apply = False 3172 3173 if cross_apply is not None: 3174 this = self._parse_select(table=True) 3175 view = None 3176 outer = None 3177 elif self._match(TokenType.LATERAL): 3178 this = self._parse_select(table=True) 3179 view = self._match(TokenType.VIEW) 3180 outer = self._match(TokenType.OUTER) 3181 else: 3182 return None 3183 3184 if not this: 3185 this = ( 3186 self._parse_unnest() 3187 or self._parse_function() 3188 or self._parse_id_var(any_token=False) 3189 ) 3190 3191 while self._match(TokenType.DOT): 3192 this = exp.Dot( 3193 this=this, 3194 expression=self._parse_function() or self._parse_id_var(any_token=False), 3195 ) 3196 3197 if view: 3198 table = self._parse_id_var(any_token=False) 3199 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3200 table_alias: t.Optional[exp.TableAlias] = self.expression( 3201 exp.TableAlias, this=table, columns=columns 3202 ) 3203 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3204 # We move the alias from the lateral's child node to the lateral itself 3205 table_alias = this.args["alias"].pop() 3206 else: 3207 table_alias = self._parse_table_alias() 3208 3209 return self.expression( 3210 exp.Lateral, 3211 this=this, 3212 view=view, 3213 outer=outer, 3214 alias=table_alias, 3215 cross_apply=cross_apply, 3216 ) 3217 3218 def _parse_join_parts( 3219 self, 3220 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3221 return ( 3222 self._match_set(self.JOIN_METHODS) and self._prev, 3223 self._match_set(self.JOIN_SIDES) and self._prev, 3224 self._match_set(self.JOIN_KINDS) and self._prev, 3225 ) 3226 3227 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3228 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3229 this = self._parse_column() 3230 if isinstance(this, exp.Column): 3231 return this.this 3232 return this 3233 3234 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3235 3236 def _parse_join( 3237 self, skip_join_token: bool = False, parse_bracket: bool = False 3238 ) -> t.Optional[exp.Join]: 3239 if self._match(TokenType.COMMA): 3240 return self.expression(exp.Join, this=self._parse_table()) 3241 3242 index = self._index 3243 method, side, kind = self._parse_join_parts() 3244 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3245 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3246 3247 if not skip_join_token and not join: 3248 self._retreat(index) 3249 kind = None 3250 method = None 3251 side = None 3252 3253 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3254 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3255 3256 if not skip_join_token and not join and not outer_apply and not cross_apply: 3257 return None 3258 3259 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3260 3261 if method: 3262 kwargs["method"] = method.text 3263 if side: 3264 kwargs["side"] = side.text 3265 if kind: 3266 kwargs["kind"] = kind.text 3267 if hint: 3268 kwargs["hint"] = hint 3269 3270 if self._match(TokenType.MATCH_CONDITION): 3271 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3272 3273 if self._match(TokenType.ON): 3274 kwargs["on"] = self._parse_assignment() 3275 elif self._match(TokenType.USING): 3276 kwargs["using"] = self._parse_using_identifiers() 3277 elif ( 3278 not (outer_apply or cross_apply) 3279 and not isinstance(kwargs["this"], exp.Unnest) 3280 and not (kind and kind.token_type == TokenType.CROSS) 3281 ): 3282 index = self._index 3283 joins: t.Optional[list] = list(self._parse_joins()) 3284 3285 if joins and self._match(TokenType.ON): 3286 kwargs["on"] = self._parse_assignment() 3287 elif joins and self._match(TokenType.USING): 3288 kwargs["using"] = self._parse_using_identifiers() 3289 else: 3290 joins = None 3291 self._retreat(index) 3292 3293 kwargs["this"].set("joins", joins if joins else None) 3294 3295 comments = [c for token in (method, side, kind) if token for c in token.comments] 3296 return self.expression(exp.Join, comments=comments, **kwargs) 3297 3298 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3299 this = self._parse_assignment() 3300 3301 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3302 return this 3303 3304 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3305 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3306 3307 return this 3308 3309 def _parse_index_params(self) -> exp.IndexParameters: 3310 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3311 3312 if self._match(TokenType.L_PAREN, advance=False): 3313 columns = self._parse_wrapped_csv(self._parse_with_operator) 3314 else: 3315 columns = None 3316 3317 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3318 partition_by = self._parse_partition_by() 3319 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3320 tablespace = ( 3321 self._parse_var(any_token=True) 3322 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3323 else None 3324 ) 3325 where = self._parse_where() 3326 3327 on = self._parse_field() if self._match(TokenType.ON) else None 3328 3329 return self.expression( 3330 exp.IndexParameters, 3331 using=using, 3332 columns=columns, 3333 include=include, 3334 partition_by=partition_by, 3335 where=where, 3336 with_storage=with_storage, 3337 tablespace=tablespace, 3338 on=on, 3339 ) 3340 3341 def _parse_index( 3342 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3343 ) -> t.Optional[exp.Index]: 3344 if index or anonymous: 3345 unique = None 3346 primary = None 3347 amp = None 3348 3349 self._match(TokenType.ON) 3350 self._match(TokenType.TABLE) # hive 3351 table = self._parse_table_parts(schema=True) 3352 else: 3353 unique = self._match(TokenType.UNIQUE) 3354 primary = self._match_text_seq("PRIMARY") 3355 amp = self._match_text_seq("AMP") 3356 3357 if not self._match(TokenType.INDEX): 3358 return None 3359 3360 index = self._parse_id_var() 3361 table = None 3362 3363 params = self._parse_index_params() 3364 3365 return self.expression( 3366 exp.Index, 3367 this=index, 3368 table=table, 3369 unique=unique, 3370 primary=primary, 3371 amp=amp, 3372 params=params, 3373 ) 3374 3375 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3376 hints: t.List[exp.Expression] = [] 3377 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3378 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3379 hints.append( 3380 self.expression( 3381 exp.WithTableHint, 3382 expressions=self._parse_csv( 3383 lambda: self._parse_function() or self._parse_var(any_token=True) 3384 ), 3385 ) 3386 ) 3387 self._match_r_paren() 3388 else: 3389 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3390 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3391 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3392 3393 self._match_set((TokenType.INDEX, TokenType.KEY)) 3394 if self._match(TokenType.FOR): 3395 hint.set("target", self._advance_any() and self._prev.text.upper()) 3396 3397 hint.set("expressions", self._parse_wrapped_id_vars()) 3398 hints.append(hint) 3399 3400 return hints or None 3401 3402 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3403 return ( 3404 (not schema and self._parse_function(optional_parens=False)) 3405 or self._parse_id_var(any_token=False) 3406 or self._parse_string_as_identifier() 3407 or self._parse_placeholder() 3408 ) 3409 3410 def _parse_table_parts( 3411 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3412 ) -> exp.Table: 3413 catalog = None 3414 db = None 3415 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3416 3417 while self._match(TokenType.DOT): 3418 if catalog: 3419 # This allows nesting the table in arbitrarily many dot expressions if needed 3420 table = self.expression( 3421 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3422 ) 3423 else: 3424 catalog = db 3425 db = table 3426 # "" used for tsql FROM a..b case 3427 table = self._parse_table_part(schema=schema) or "" 3428 3429 if ( 3430 wildcard 3431 and self._is_connected() 3432 and (isinstance(table, exp.Identifier) or not table) 3433 and self._match(TokenType.STAR) 3434 ): 3435 if isinstance(table, exp.Identifier): 3436 table.args["this"] += "*" 3437 else: 3438 table = exp.Identifier(this="*") 3439 3440 # We bubble up comments from the Identifier to the Table 3441 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3442 3443 if is_db_reference: 3444 catalog = db 3445 db = table 3446 table = None 3447 3448 if not table and not is_db_reference: 3449 self.raise_error(f"Expected table name but got {self._curr}") 3450 if not db and is_db_reference: 3451 self.raise_error(f"Expected database name but got {self._curr}") 3452 3453 table = self.expression( 3454 exp.Table, 3455 comments=comments, 3456 this=table, 3457 db=db, 3458 catalog=catalog, 3459 ) 3460 3461 changes = self._parse_changes() 3462 if changes: 3463 table.set("changes", changes) 3464 3465 at_before = self._parse_historical_data() 3466 if at_before: 3467 table.set("when", at_before) 3468 3469 pivots = self._parse_pivots() 3470 if pivots: 3471 table.set("pivots", pivots) 3472 3473 return table 3474 3475 def _parse_table( 3476 self, 3477 schema: bool = False, 3478 joins: bool = False, 3479 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3480 parse_bracket: bool = False, 3481 is_db_reference: bool = False, 3482 parse_partition: bool = False, 3483 ) -> t.Optional[exp.Expression]: 3484 lateral = self._parse_lateral() 3485 if lateral: 3486 return lateral 3487 3488 unnest = self._parse_unnest() 3489 if unnest: 3490 return unnest 3491 3492 values = self._parse_derived_table_values() 3493 if values: 3494 return values 3495 3496 subquery = self._parse_select(table=True) 3497 if subquery: 3498 if not subquery.args.get("pivots"): 3499 subquery.set("pivots", self._parse_pivots()) 3500 return subquery 3501 3502 bracket = parse_bracket and self._parse_bracket(None) 3503 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3504 3505 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3506 self._parse_table 3507 ) 3508 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3509 3510 only = self._match(TokenType.ONLY) 3511 3512 this = t.cast( 3513 exp.Expression, 3514 bracket 3515 or rows_from 3516 or self._parse_bracket( 3517 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3518 ), 3519 ) 3520 3521 if only: 3522 this.set("only", only) 3523 3524 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3525 self._match_text_seq("*") 3526 3527 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3528 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3529 this.set("partition", self._parse_partition()) 3530 3531 if schema: 3532 return self._parse_schema(this=this) 3533 3534 version = self._parse_version() 3535 3536 if version: 3537 this.set("version", version) 3538 3539 if self.dialect.ALIAS_POST_TABLESAMPLE: 3540 table_sample = self._parse_table_sample() 3541 3542 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3543 if alias: 3544 this.set("alias", alias) 3545 3546 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3547 return self.expression( 3548 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3549 ) 3550 3551 this.set("hints", self._parse_table_hints()) 3552 3553 if not this.args.get("pivots"): 3554 this.set("pivots", self._parse_pivots()) 3555 3556 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3557 table_sample = self._parse_table_sample() 3558 3559 if table_sample: 3560 table_sample.set("this", this) 3561 this = table_sample 3562 3563 if joins: 3564 for join in self._parse_joins(): 3565 this.append("joins", join) 3566 3567 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3568 this.set("ordinality", True) 3569 this.set("alias", self._parse_table_alias()) 3570 3571 return this 3572 3573 def _parse_version(self) -> t.Optional[exp.Version]: 3574 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3575 this = "TIMESTAMP" 3576 elif self._match(TokenType.VERSION_SNAPSHOT): 3577 this = "VERSION" 3578 else: 3579 return None 3580 3581 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3582 kind = self._prev.text.upper() 3583 start = self._parse_bitwise() 3584 self._match_texts(("TO", "AND")) 3585 end = self._parse_bitwise() 3586 expression: t.Optional[exp.Expression] = self.expression( 3587 exp.Tuple, expressions=[start, end] 3588 ) 3589 elif self._match_text_seq("CONTAINED", "IN"): 3590 kind = "CONTAINED IN" 3591 expression = self.expression( 3592 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3593 ) 3594 elif self._match(TokenType.ALL): 3595 kind = "ALL" 3596 expression = None 3597 else: 3598 self._match_text_seq("AS", "OF") 3599 kind = "AS OF" 3600 expression = self._parse_type() 3601 3602 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3603 3604 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3605 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3606 index = self._index 3607 historical_data = None 3608 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3609 this = self._prev.text.upper() 3610 kind = ( 3611 self._match(TokenType.L_PAREN) 3612 and self._match_texts(self.HISTORICAL_DATA_KIND) 3613 and self._prev.text.upper() 3614 ) 3615 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3616 3617 if expression: 3618 self._match_r_paren() 3619 historical_data = self.expression( 3620 exp.HistoricalData, this=this, kind=kind, expression=expression 3621 ) 3622 else: 3623 self._retreat(index) 3624 3625 return historical_data 3626 3627 def _parse_changes(self) -> t.Optional[exp.Changes]: 3628 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3629 return None 3630 3631 information = self._parse_var(any_token=True) 3632 self._match_r_paren() 3633 3634 return self.expression( 3635 exp.Changes, 3636 information=information, 3637 at_before=self._parse_historical_data(), 3638 end=self._parse_historical_data(), 3639 ) 3640 3641 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3642 if not self._match(TokenType.UNNEST): 3643 return None 3644 3645 expressions = self._parse_wrapped_csv(self._parse_equality) 3646 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3647 3648 alias = self._parse_table_alias() if with_alias else None 3649 3650 if alias: 3651 if self.dialect.UNNEST_COLUMN_ONLY: 3652 if alias.args.get("columns"): 3653 self.raise_error("Unexpected extra column alias in unnest.") 3654 3655 alias.set("columns", [alias.this]) 3656 alias.set("this", None) 3657 3658 columns = alias.args.get("columns") or [] 3659 if offset and len(expressions) < len(columns): 3660 offset = columns.pop() 3661 3662 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3663 self._match(TokenType.ALIAS) 3664 offset = self._parse_id_var( 3665 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3666 ) or exp.to_identifier("offset") 3667 3668 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3669 3670 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3671 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3672 if not is_derived and not ( 3673 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3674 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3675 ): 3676 return None 3677 3678 expressions = self._parse_csv(self._parse_value) 3679 alias = self._parse_table_alias() 3680 3681 if is_derived: 3682 self._match_r_paren() 3683 3684 return self.expression( 3685 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3686 ) 3687 3688 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3689 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3690 as_modifier and self._match_text_seq("USING", "SAMPLE") 3691 ): 3692 return None 3693 3694 bucket_numerator = None 3695 bucket_denominator = None 3696 bucket_field = None 3697 percent = None 3698 size = None 3699 seed = None 3700 3701 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3702 matched_l_paren = self._match(TokenType.L_PAREN) 3703 3704 if self.TABLESAMPLE_CSV: 3705 num = None 3706 expressions = self._parse_csv(self._parse_primary) 3707 else: 3708 expressions = None 3709 num = ( 3710 self._parse_factor() 3711 if self._match(TokenType.NUMBER, advance=False) 3712 else self._parse_primary() or self._parse_placeholder() 3713 ) 3714 3715 if self._match_text_seq("BUCKET"): 3716 bucket_numerator = self._parse_number() 3717 self._match_text_seq("OUT", "OF") 3718 bucket_denominator = bucket_denominator = self._parse_number() 3719 self._match(TokenType.ON) 3720 bucket_field = self._parse_field() 3721 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3722 percent = num 3723 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3724 size = num 3725 else: 3726 percent = num 3727 3728 if matched_l_paren: 3729 self._match_r_paren() 3730 3731 if self._match(TokenType.L_PAREN): 3732 method = self._parse_var(upper=True) 3733 seed = self._match(TokenType.COMMA) and self._parse_number() 3734 self._match_r_paren() 3735 elif self._match_texts(("SEED", "REPEATABLE")): 3736 seed = self._parse_wrapped(self._parse_number) 3737 3738 if not method and self.DEFAULT_SAMPLING_METHOD: 3739 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3740 3741 return self.expression( 3742 exp.TableSample, 3743 expressions=expressions, 3744 method=method, 3745 bucket_numerator=bucket_numerator, 3746 bucket_denominator=bucket_denominator, 3747 bucket_field=bucket_field, 3748 percent=percent, 3749 size=size, 3750 seed=seed, 3751 ) 3752 3753 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3754 return list(iter(self._parse_pivot, None)) or None 3755 3756 def _parse_joins(self) -> t.Iterator[exp.Join]: 3757 return iter(self._parse_join, None) 3758 3759 # https://duckdb.org/docs/sql/statements/pivot 3760 def _parse_simplified_pivot(self) -> exp.Pivot: 3761 def _parse_on() -> t.Optional[exp.Expression]: 3762 this = self._parse_bitwise() 3763 return self._parse_in(this) if self._match(TokenType.IN) else this 3764 3765 this = self._parse_table() 3766 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3767 using = self._match(TokenType.USING) and self._parse_csv( 3768 lambda: self._parse_alias(self._parse_function()) 3769 ) 3770 group = self._parse_group() 3771 return self.expression( 3772 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3773 ) 3774 3775 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3776 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3777 this = self._parse_select_or_expression() 3778 3779 self._match(TokenType.ALIAS) 3780 alias = self._parse_field() 3781 if alias: 3782 return self.expression(exp.PivotAlias, this=this, alias=alias) 3783 3784 return this 3785 3786 value = self._parse_column() 3787 3788 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3789 self.raise_error("Expecting IN (") 3790 3791 if self._match(TokenType.ANY): 3792 expr: exp.PivotAny | exp.In = self.expression(exp.PivotAny, this=self._parse_order()) 3793 else: 3794 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3795 expr = self.expression(exp.In, this=value, expressions=aliased_expressions) 3796 3797 self._match_r_paren() 3798 return expr 3799 3800 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3801 index = self._index 3802 include_nulls = None 3803 3804 if self._match(TokenType.PIVOT): 3805 unpivot = False 3806 elif self._match(TokenType.UNPIVOT): 3807 unpivot = True 3808 3809 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3810 if self._match_text_seq("INCLUDE", "NULLS"): 3811 include_nulls = True 3812 elif self._match_text_seq("EXCLUDE", "NULLS"): 3813 include_nulls = False 3814 else: 3815 return None 3816 3817 expressions = [] 3818 3819 if not self._match(TokenType.L_PAREN): 3820 self._retreat(index) 3821 return None 3822 3823 if unpivot: 3824 expressions = self._parse_csv(self._parse_column) 3825 else: 3826 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3827 3828 if not expressions: 3829 self.raise_error("Failed to parse PIVOT's aggregation list") 3830 3831 if not self._match(TokenType.FOR): 3832 self.raise_error("Expecting FOR") 3833 3834 field = self._parse_pivot_in() 3835 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3836 self._parse_bitwise 3837 ) 3838 3839 self._match_r_paren() 3840 3841 pivot = self.expression( 3842 exp.Pivot, 3843 expressions=expressions, 3844 field=field, 3845 unpivot=unpivot, 3846 include_nulls=include_nulls, 3847 default_on_null=default_on_null, 3848 ) 3849 3850 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3851 pivot.set("alias", self._parse_table_alias()) 3852 3853 if not unpivot: 3854 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3855 3856 columns: t.List[exp.Expression] = [] 3857 for fld in pivot.args["field"].expressions: 3858 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3859 for name in names: 3860 if self.PREFIXED_PIVOT_COLUMNS: 3861 name = f"{name}_{field_name}" if name else field_name 3862 else: 3863 name = f"{field_name}_{name}" if name else field_name 3864 3865 columns.append(exp.to_identifier(name)) 3866 3867 pivot.set("columns", columns) 3868 3869 return pivot 3870 3871 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3872 return [agg.alias for agg in aggregations] 3873 3874 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3875 if not skip_where_token and not self._match(TokenType.PREWHERE): 3876 return None 3877 3878 return self.expression( 3879 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3880 ) 3881 3882 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3883 if not skip_where_token and not self._match(TokenType.WHERE): 3884 return None 3885 3886 return self.expression( 3887 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3888 ) 3889 3890 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3891 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3892 return None 3893 3894 elements: t.Dict[str, t.Any] = defaultdict(list) 3895 3896 if self._match(TokenType.ALL): 3897 elements["all"] = True 3898 elif self._match(TokenType.DISTINCT): 3899 elements["all"] = False 3900 3901 while True: 3902 expressions = self._parse_csv( 3903 lambda: None 3904 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 3905 else self._parse_assignment() 3906 ) 3907 if expressions: 3908 elements["expressions"].extend(expressions) 3909 3910 grouping_sets = self._parse_grouping_sets() 3911 if grouping_sets: 3912 elements["grouping_sets"].extend(grouping_sets) 3913 3914 rollup = None 3915 cube = None 3916 totals = None 3917 3918 index = self._index 3919 with_ = self._match(TokenType.WITH) 3920 if self._match(TokenType.ROLLUP): 3921 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3922 elements["rollup"].extend(ensure_list(rollup)) 3923 3924 if self._match(TokenType.CUBE): 3925 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3926 elements["cube"].extend(ensure_list(cube)) 3927 3928 if self._match_text_seq("TOTALS"): 3929 totals = True 3930 elements["totals"] = True # type: ignore 3931 3932 if not (grouping_sets or rollup or cube or totals): 3933 if with_: 3934 self._retreat(index) 3935 break 3936 3937 return self.expression(exp.Group, **elements) # type: ignore 3938 3939 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3940 if not self._match(TokenType.GROUPING_SETS): 3941 return None 3942 3943 return self._parse_wrapped_csv(self._parse_grouping_set) 3944 3945 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3946 if self._match(TokenType.L_PAREN): 3947 grouping_set = self._parse_csv(self._parse_column) 3948 self._match_r_paren() 3949 return self.expression(exp.Tuple, expressions=grouping_set) 3950 3951 return self._parse_column() 3952 3953 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3954 if not skip_having_token and not self._match(TokenType.HAVING): 3955 return None 3956 return self.expression(exp.Having, this=self._parse_assignment()) 3957 3958 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3959 if not self._match(TokenType.QUALIFY): 3960 return None 3961 return self.expression(exp.Qualify, this=self._parse_assignment()) 3962 3963 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3964 if skip_start_token: 3965 start = None 3966 elif self._match(TokenType.START_WITH): 3967 start = self._parse_assignment() 3968 else: 3969 return None 3970 3971 self._match(TokenType.CONNECT_BY) 3972 nocycle = self._match_text_seq("NOCYCLE") 3973 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3974 exp.Prior, this=self._parse_bitwise() 3975 ) 3976 connect = self._parse_assignment() 3977 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3978 3979 if not start and self._match(TokenType.START_WITH): 3980 start = self._parse_assignment() 3981 3982 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3983 3984 def _parse_name_as_expression(self) -> exp.Alias: 3985 return self.expression( 3986 exp.Alias, 3987 alias=self._parse_id_var(any_token=True), 3988 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3989 ) 3990 3991 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3992 if self._match_text_seq("INTERPOLATE"): 3993 return self._parse_wrapped_csv(self._parse_name_as_expression) 3994 return None 3995 3996 def _parse_order( 3997 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3998 ) -> t.Optional[exp.Expression]: 3999 siblings = None 4000 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4001 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4002 return this 4003 4004 siblings = True 4005 4006 return self.expression( 4007 exp.Order, 4008 this=this, 4009 expressions=self._parse_csv(self._parse_ordered), 4010 interpolate=self._parse_interpolate(), 4011 siblings=siblings, 4012 ) 4013 4014 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4015 if not self._match(token): 4016 return None 4017 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4018 4019 def _parse_ordered( 4020 self, parse_method: t.Optional[t.Callable] = None 4021 ) -> t.Optional[exp.Ordered]: 4022 this = parse_method() if parse_method else self._parse_assignment() 4023 if not this: 4024 return None 4025 4026 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4027 this = exp.var("ALL") 4028 4029 asc = self._match(TokenType.ASC) 4030 desc = self._match(TokenType.DESC) or (asc and False) 4031 4032 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4033 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4034 4035 nulls_first = is_nulls_first or False 4036 explicitly_null_ordered = is_nulls_first or is_nulls_last 4037 4038 if ( 4039 not explicitly_null_ordered 4040 and ( 4041 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4042 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4043 ) 4044 and self.dialect.NULL_ORDERING != "nulls_are_last" 4045 ): 4046 nulls_first = True 4047 4048 if self._match_text_seq("WITH", "FILL"): 4049 with_fill = self.expression( 4050 exp.WithFill, 4051 **{ # type: ignore 4052 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4053 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4054 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4055 }, 4056 ) 4057 else: 4058 with_fill = None 4059 4060 return self.expression( 4061 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4062 ) 4063 4064 def _parse_limit( 4065 self, 4066 this: t.Optional[exp.Expression] = None, 4067 top: bool = False, 4068 skip_limit_token: bool = False, 4069 ) -> t.Optional[exp.Expression]: 4070 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4071 comments = self._prev_comments 4072 if top: 4073 limit_paren = self._match(TokenType.L_PAREN) 4074 expression = self._parse_term() if limit_paren else self._parse_number() 4075 4076 if limit_paren: 4077 self._match_r_paren() 4078 else: 4079 expression = self._parse_term() 4080 4081 if self._match(TokenType.COMMA): 4082 offset = expression 4083 expression = self._parse_term() 4084 else: 4085 offset = None 4086 4087 limit_exp = self.expression( 4088 exp.Limit, 4089 this=this, 4090 expression=expression, 4091 offset=offset, 4092 comments=comments, 4093 expressions=self._parse_limit_by(), 4094 ) 4095 4096 return limit_exp 4097 4098 if self._match(TokenType.FETCH): 4099 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4100 direction = self._prev.text.upper() if direction else "FIRST" 4101 4102 count = self._parse_field(tokens=self.FETCH_TOKENS) 4103 percent = self._match(TokenType.PERCENT) 4104 4105 self._match_set((TokenType.ROW, TokenType.ROWS)) 4106 4107 only = self._match_text_seq("ONLY") 4108 with_ties = self._match_text_seq("WITH", "TIES") 4109 4110 if only and with_ties: 4111 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4112 4113 return self.expression( 4114 exp.Fetch, 4115 direction=direction, 4116 count=count, 4117 percent=percent, 4118 with_ties=with_ties, 4119 ) 4120 4121 return this 4122 4123 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4124 if not self._match(TokenType.OFFSET): 4125 return this 4126 4127 count = self._parse_term() 4128 self._match_set((TokenType.ROW, TokenType.ROWS)) 4129 4130 return self.expression( 4131 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4132 ) 4133 4134 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4135 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4136 4137 def _parse_locks(self) -> t.List[exp.Lock]: 4138 locks = [] 4139 while True: 4140 if self._match_text_seq("FOR", "UPDATE"): 4141 update = True 4142 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4143 "LOCK", "IN", "SHARE", "MODE" 4144 ): 4145 update = False 4146 else: 4147 break 4148 4149 expressions = None 4150 if self._match_text_seq("OF"): 4151 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4152 4153 wait: t.Optional[bool | exp.Expression] = None 4154 if self._match_text_seq("NOWAIT"): 4155 wait = True 4156 elif self._match_text_seq("WAIT"): 4157 wait = self._parse_primary() 4158 elif self._match_text_seq("SKIP", "LOCKED"): 4159 wait = False 4160 4161 locks.append( 4162 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4163 ) 4164 4165 return locks 4166 4167 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4168 while this and self._match_set(self.SET_OPERATIONS): 4169 token_type = self._prev.token_type 4170 4171 if token_type == TokenType.UNION: 4172 operation: t.Type[exp.SetOperation] = exp.Union 4173 elif token_type == TokenType.EXCEPT: 4174 operation = exp.Except 4175 else: 4176 operation = exp.Intersect 4177 4178 comments = self._prev.comments 4179 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4180 by_name = self._match_text_seq("BY", "NAME") 4181 expression = self._parse_select(nested=True, parse_set_operation=False) 4182 4183 this = self.expression( 4184 operation, 4185 comments=comments, 4186 this=this, 4187 distinct=distinct, 4188 by_name=by_name, 4189 expression=expression, 4190 ) 4191 4192 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4193 expression = this.expression 4194 4195 if expression: 4196 for arg in self.SET_OP_MODIFIERS: 4197 expr = expression.args.get(arg) 4198 if expr: 4199 this.set(arg, expr.pop()) 4200 4201 return this 4202 4203 def _parse_expression(self) -> t.Optional[exp.Expression]: 4204 return self._parse_alias(self._parse_assignment()) 4205 4206 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4207 this = self._parse_disjunction() 4208 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4209 # This allows us to parse <non-identifier token> := <expr> 4210 this = exp.column( 4211 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4212 ) 4213 4214 while self._match_set(self.ASSIGNMENT): 4215 this = self.expression( 4216 self.ASSIGNMENT[self._prev.token_type], 4217 this=this, 4218 comments=self._prev_comments, 4219 expression=self._parse_assignment(), 4220 ) 4221 4222 return this 4223 4224 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4225 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4226 4227 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4228 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4229 4230 def _parse_equality(self) -> t.Optional[exp.Expression]: 4231 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4232 4233 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4234 return self._parse_tokens(self._parse_range, self.COMPARISON) 4235 4236 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4237 this = this or self._parse_bitwise() 4238 negate = self._match(TokenType.NOT) 4239 4240 if self._match_set(self.RANGE_PARSERS): 4241 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4242 if not expression: 4243 return this 4244 4245 this = expression 4246 elif self._match(TokenType.ISNULL): 4247 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4248 4249 # Postgres supports ISNULL and NOTNULL for conditions. 4250 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4251 if self._match(TokenType.NOTNULL): 4252 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4253 this = self.expression(exp.Not, this=this) 4254 4255 if negate: 4256 this = self._negate_range(this) 4257 4258 if self._match(TokenType.IS): 4259 this = self._parse_is(this) 4260 4261 return this 4262 4263 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4264 if not this: 4265 return this 4266 4267 return self.expression(exp.Not, this=this) 4268 4269 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4270 index = self._index - 1 4271 negate = self._match(TokenType.NOT) 4272 4273 if self._match_text_seq("DISTINCT", "FROM"): 4274 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4275 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4276 4277 expression = self._parse_null() or self._parse_boolean() 4278 if not expression: 4279 self._retreat(index) 4280 return None 4281 4282 this = self.expression(exp.Is, this=this, expression=expression) 4283 return self.expression(exp.Not, this=this) if negate else this 4284 4285 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4286 unnest = self._parse_unnest(with_alias=False) 4287 if unnest: 4288 this = self.expression(exp.In, this=this, unnest=unnest) 4289 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4290 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4291 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4292 4293 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4294 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4295 else: 4296 this = self.expression(exp.In, this=this, expressions=expressions) 4297 4298 if matched_l_paren: 4299 self._match_r_paren(this) 4300 elif not self._match(TokenType.R_BRACKET, expression=this): 4301 self.raise_error("Expecting ]") 4302 else: 4303 this = self.expression(exp.In, this=this, field=self._parse_field()) 4304 4305 return this 4306 4307 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4308 low = self._parse_bitwise() 4309 self._match(TokenType.AND) 4310 high = self._parse_bitwise() 4311 return self.expression(exp.Between, this=this, low=low, high=high) 4312 4313 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4314 if not self._match(TokenType.ESCAPE): 4315 return this 4316 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4317 4318 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4319 index = self._index 4320 4321 if not self._match(TokenType.INTERVAL) and match_interval: 4322 return None 4323 4324 if self._match(TokenType.STRING, advance=False): 4325 this = self._parse_primary() 4326 else: 4327 this = self._parse_term() 4328 4329 if not this or ( 4330 isinstance(this, exp.Column) 4331 and not this.table 4332 and not this.this.quoted 4333 and this.name.upper() == "IS" 4334 ): 4335 self._retreat(index) 4336 return None 4337 4338 unit = self._parse_function() or ( 4339 not self._match(TokenType.ALIAS, advance=False) 4340 and self._parse_var(any_token=True, upper=True) 4341 ) 4342 4343 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4344 # each INTERVAL expression into this canonical form so it's easy to transpile 4345 if this and this.is_number: 4346 this = exp.Literal.string(this.to_py()) 4347 elif this and this.is_string: 4348 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4349 if len(parts) == 1: 4350 if unit: 4351 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4352 self._retreat(self._index - 1) 4353 4354 this = exp.Literal.string(parts[0][0]) 4355 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4356 4357 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4358 unit = self.expression( 4359 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4360 ) 4361 4362 interval = self.expression(exp.Interval, this=this, unit=unit) 4363 4364 index = self._index 4365 self._match(TokenType.PLUS) 4366 4367 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4368 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4369 return self.expression( 4370 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4371 ) 4372 4373 self._retreat(index) 4374 return interval 4375 4376 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4377 this = self._parse_term() 4378 4379 while True: 4380 if self._match_set(self.BITWISE): 4381 this = self.expression( 4382 self.BITWISE[self._prev.token_type], 4383 this=this, 4384 expression=self._parse_term(), 4385 ) 4386 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4387 this = self.expression( 4388 exp.DPipe, 4389 this=this, 4390 expression=self._parse_term(), 4391 safe=not self.dialect.STRICT_STRING_CONCAT, 4392 ) 4393 elif self._match(TokenType.DQMARK): 4394 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4395 elif self._match_pair(TokenType.LT, TokenType.LT): 4396 this = self.expression( 4397 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4398 ) 4399 elif self._match_pair(TokenType.GT, TokenType.GT): 4400 this = self.expression( 4401 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4402 ) 4403 else: 4404 break 4405 4406 return this 4407 4408 def _parse_term(self) -> t.Optional[exp.Expression]: 4409 this = self._parse_factor() 4410 4411 while self._match_set(self.TERM): 4412 klass = self.TERM[self._prev.token_type] 4413 comments = self._prev_comments 4414 expression = self._parse_factor() 4415 4416 this = self.expression(klass, this=this, comments=comments, expression=expression) 4417 4418 if isinstance(this, exp.Collate): 4419 expr = this.expression 4420 4421 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4422 # fallback to Identifier / Var 4423 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4424 ident = expr.this 4425 if isinstance(ident, exp.Identifier): 4426 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4427 4428 return this 4429 4430 def _parse_factor(self) -> t.Optional[exp.Expression]: 4431 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4432 this = parse_method() 4433 4434 while self._match_set(self.FACTOR): 4435 klass = self.FACTOR[self._prev.token_type] 4436 comments = self._prev_comments 4437 expression = parse_method() 4438 4439 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4440 self._retreat(self._index - 1) 4441 return this 4442 4443 this = self.expression(klass, this=this, comments=comments, expression=expression) 4444 4445 if isinstance(this, exp.Div): 4446 this.args["typed"] = self.dialect.TYPED_DIVISION 4447 this.args["safe"] = self.dialect.SAFE_DIVISION 4448 4449 return this 4450 4451 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4452 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4453 4454 def _parse_unary(self) -> t.Optional[exp.Expression]: 4455 if self._match_set(self.UNARY_PARSERS): 4456 return self.UNARY_PARSERS[self._prev.token_type](self) 4457 return self._parse_at_time_zone(self._parse_type()) 4458 4459 def _parse_type( 4460 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4461 ) -> t.Optional[exp.Expression]: 4462 interval = parse_interval and self._parse_interval() 4463 if interval: 4464 return interval 4465 4466 index = self._index 4467 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4468 4469 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4470 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4471 if isinstance(data_type, exp.Cast): 4472 # This constructor can contain ops directly after it, for instance struct unnesting: 4473 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4474 return self._parse_column_ops(data_type) 4475 4476 if data_type: 4477 index2 = self._index 4478 this = self._parse_primary() 4479 4480 if isinstance(this, exp.Literal): 4481 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4482 if parser: 4483 return parser(self, this, data_type) 4484 4485 return self.expression(exp.Cast, this=this, to=data_type) 4486 4487 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4488 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4489 # 4490 # If the index difference here is greater than 1, that means the parser itself must have 4491 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4492 # 4493 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4494 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4495 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4496 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4497 # 4498 # In these cases, we don't really want to return the converted type, but instead retreat 4499 # and try to parse a Column or Identifier in the section below. 4500 if data_type.expressions and index2 - index > 1: 4501 self._retreat(index2) 4502 return self._parse_column_ops(data_type) 4503 4504 self._retreat(index) 4505 4506 if fallback_to_identifier: 4507 return self._parse_id_var() 4508 4509 this = self._parse_column() 4510 return this and self._parse_column_ops(this) 4511 4512 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4513 this = self._parse_type() 4514 if not this: 4515 return None 4516 4517 if isinstance(this, exp.Column) and not this.table: 4518 this = exp.var(this.name.upper()) 4519 4520 return self.expression( 4521 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4522 ) 4523 4524 def _parse_types( 4525 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4526 ) -> t.Optional[exp.Expression]: 4527 index = self._index 4528 4529 this: t.Optional[exp.Expression] = None 4530 prefix = self._match_text_seq("SYSUDTLIB", ".") 4531 4532 if not self._match_set(self.TYPE_TOKENS): 4533 identifier = allow_identifiers and self._parse_id_var( 4534 any_token=False, tokens=(TokenType.VAR,) 4535 ) 4536 if isinstance(identifier, exp.Identifier): 4537 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4538 4539 if len(tokens) != 1: 4540 self.raise_error("Unexpected identifier", self._prev) 4541 4542 if tokens[0].token_type in self.TYPE_TOKENS: 4543 self._prev = tokens[0] 4544 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4545 type_name = identifier.name 4546 4547 while self._match(TokenType.DOT): 4548 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4549 4550 this = exp.DataType.build(type_name, udt=True) 4551 else: 4552 self._retreat(self._index - 1) 4553 return None 4554 else: 4555 return None 4556 4557 type_token = self._prev.token_type 4558 4559 if type_token == TokenType.PSEUDO_TYPE: 4560 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4561 4562 if type_token == TokenType.OBJECT_IDENTIFIER: 4563 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4564 4565 # https://materialize.com/docs/sql/types/map/ 4566 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4567 key_type = self._parse_types( 4568 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4569 ) 4570 if not self._match(TokenType.FARROW): 4571 self._retreat(index) 4572 return None 4573 4574 value_type = self._parse_types( 4575 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4576 ) 4577 if not self._match(TokenType.R_BRACKET): 4578 self._retreat(index) 4579 return None 4580 4581 return exp.DataType( 4582 this=exp.DataType.Type.MAP, 4583 expressions=[key_type, value_type], 4584 nested=True, 4585 prefix=prefix, 4586 ) 4587 4588 nested = type_token in self.NESTED_TYPE_TOKENS 4589 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4590 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4591 expressions = None 4592 maybe_func = False 4593 4594 if self._match(TokenType.L_PAREN): 4595 if is_struct: 4596 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4597 elif nested: 4598 expressions = self._parse_csv( 4599 lambda: self._parse_types( 4600 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4601 ) 4602 ) 4603 elif type_token in self.ENUM_TYPE_TOKENS: 4604 expressions = self._parse_csv(self._parse_equality) 4605 elif is_aggregate: 4606 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4607 any_token=False, tokens=(TokenType.VAR,) 4608 ) 4609 if not func_or_ident or not self._match(TokenType.COMMA): 4610 return None 4611 expressions = self._parse_csv( 4612 lambda: self._parse_types( 4613 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4614 ) 4615 ) 4616 expressions.insert(0, func_or_ident) 4617 else: 4618 expressions = self._parse_csv(self._parse_type_size) 4619 4620 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4621 if type_token == TokenType.VECTOR and len(expressions) == 2: 4622 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4623 4624 if not expressions or not self._match(TokenType.R_PAREN): 4625 self._retreat(index) 4626 return None 4627 4628 maybe_func = True 4629 4630 values: t.Optional[t.List[exp.Expression]] = None 4631 4632 if nested and self._match(TokenType.LT): 4633 if is_struct: 4634 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4635 else: 4636 expressions = self._parse_csv( 4637 lambda: self._parse_types( 4638 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4639 ) 4640 ) 4641 4642 if not self._match(TokenType.GT): 4643 self.raise_error("Expecting >") 4644 4645 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4646 values = self._parse_csv(self._parse_assignment) 4647 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4648 4649 if type_token in self.TIMESTAMPS: 4650 if self._match_text_seq("WITH", "TIME", "ZONE"): 4651 maybe_func = False 4652 tz_type = ( 4653 exp.DataType.Type.TIMETZ 4654 if type_token in self.TIMES 4655 else exp.DataType.Type.TIMESTAMPTZ 4656 ) 4657 this = exp.DataType(this=tz_type, expressions=expressions) 4658 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4659 maybe_func = False 4660 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4661 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4662 maybe_func = False 4663 elif type_token == TokenType.INTERVAL: 4664 unit = self._parse_var(upper=True) 4665 if unit: 4666 if self._match_text_seq("TO"): 4667 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4668 4669 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4670 else: 4671 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4672 4673 if maybe_func and check_func: 4674 index2 = self._index 4675 peek = self._parse_string() 4676 4677 if not peek: 4678 self._retreat(index) 4679 return None 4680 4681 self._retreat(index2) 4682 4683 if not this: 4684 if self._match_text_seq("UNSIGNED"): 4685 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4686 if not unsigned_type_token: 4687 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4688 4689 type_token = unsigned_type_token or type_token 4690 4691 this = exp.DataType( 4692 this=exp.DataType.Type[type_token.value], 4693 expressions=expressions, 4694 nested=nested, 4695 prefix=prefix, 4696 ) 4697 4698 # Empty arrays/structs are allowed 4699 if values is not None: 4700 cls = exp.Struct if is_struct else exp.Array 4701 this = exp.cast(cls(expressions=values), this, copy=False) 4702 4703 elif expressions: 4704 this.set("expressions", expressions) 4705 4706 # https://materialize.com/docs/sql/types/list/#type-name 4707 while self._match(TokenType.LIST): 4708 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4709 4710 index = self._index 4711 4712 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4713 matched_array = self._match(TokenType.ARRAY) 4714 4715 while self._curr: 4716 datatype_token = self._prev.token_type 4717 matched_l_bracket = self._match(TokenType.L_BRACKET) 4718 if not matched_l_bracket and not matched_array: 4719 break 4720 4721 matched_array = False 4722 values = self._parse_csv(self._parse_assignment) or None 4723 if ( 4724 values 4725 and not schema 4726 and ( 4727 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4728 ) 4729 ): 4730 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4731 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4732 self._retreat(index) 4733 break 4734 4735 this = exp.DataType( 4736 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4737 ) 4738 self._match(TokenType.R_BRACKET) 4739 4740 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4741 converter = self.TYPE_CONVERTERS.get(this.this) 4742 if converter: 4743 this = converter(t.cast(exp.DataType, this)) 4744 4745 return this 4746 4747 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4748 index = self._index 4749 4750 if ( 4751 self._curr 4752 and self._next 4753 and self._curr.token_type in self.TYPE_TOKENS 4754 and self._next.token_type in self.TYPE_TOKENS 4755 ): 4756 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4757 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4758 this = self._parse_id_var() 4759 else: 4760 this = ( 4761 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4762 or self._parse_id_var() 4763 ) 4764 4765 self._match(TokenType.COLON) 4766 4767 if ( 4768 type_required 4769 and not isinstance(this, exp.DataType) 4770 and not self._match_set(self.TYPE_TOKENS, advance=False) 4771 ): 4772 self._retreat(index) 4773 return self._parse_types() 4774 4775 return self._parse_column_def(this) 4776 4777 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4778 if not self._match_text_seq("AT", "TIME", "ZONE"): 4779 return this 4780 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4781 4782 def _parse_column(self) -> t.Optional[exp.Expression]: 4783 this = self._parse_column_reference() 4784 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4785 4786 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4787 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4788 4789 return column 4790 4791 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4792 this = self._parse_field() 4793 if ( 4794 not this 4795 and self._match(TokenType.VALUES, advance=False) 4796 and self.VALUES_FOLLOWED_BY_PAREN 4797 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4798 ): 4799 this = self._parse_id_var() 4800 4801 if isinstance(this, exp.Identifier): 4802 # We bubble up comments from the Identifier to the Column 4803 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4804 4805 return this 4806 4807 def _parse_colon_as_variant_extract( 4808 self, this: t.Optional[exp.Expression] 4809 ) -> t.Optional[exp.Expression]: 4810 casts = [] 4811 json_path = [] 4812 4813 while self._match(TokenType.COLON): 4814 start_index = self._index 4815 4816 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4817 path = self._parse_column_ops( 4818 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4819 ) 4820 4821 # The cast :: operator has a lower precedence than the extraction operator :, so 4822 # we rearrange the AST appropriately to avoid casting the JSON path 4823 while isinstance(path, exp.Cast): 4824 casts.append(path.to) 4825 path = path.this 4826 4827 if casts: 4828 dcolon_offset = next( 4829 i 4830 for i, t in enumerate(self._tokens[start_index:]) 4831 if t.token_type == TokenType.DCOLON 4832 ) 4833 end_token = self._tokens[start_index + dcolon_offset - 1] 4834 else: 4835 end_token = self._prev 4836 4837 if path: 4838 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4839 4840 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4841 # Databricks transforms it back to the colon/dot notation 4842 if json_path: 4843 this = self.expression( 4844 exp.JSONExtract, 4845 this=this, 4846 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4847 variant_extract=True, 4848 ) 4849 4850 while casts: 4851 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4852 4853 return this 4854 4855 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4856 return self._parse_types() 4857 4858 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4859 this = self._parse_bracket(this) 4860 4861 while self._match_set(self.COLUMN_OPERATORS): 4862 op_token = self._prev.token_type 4863 op = self.COLUMN_OPERATORS.get(op_token) 4864 4865 if op_token == TokenType.DCOLON: 4866 field = self._parse_dcolon() 4867 if not field: 4868 self.raise_error("Expected type") 4869 elif op and self._curr: 4870 field = self._parse_column_reference() 4871 else: 4872 field = self._parse_field(any_token=True, anonymous_func=True) 4873 4874 if isinstance(field, exp.Func) and this: 4875 # bigquery allows function calls like x.y.count(...) 4876 # SAFE.SUBSTR(...) 4877 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4878 this = exp.replace_tree( 4879 this, 4880 lambda n: ( 4881 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4882 if n.table 4883 else n.this 4884 ) 4885 if isinstance(n, exp.Column) 4886 else n, 4887 ) 4888 4889 if op: 4890 this = op(self, this, field) 4891 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4892 this = self.expression( 4893 exp.Column, 4894 this=field, 4895 table=this.this, 4896 db=this.args.get("table"), 4897 catalog=this.args.get("db"), 4898 ) 4899 else: 4900 this = self.expression(exp.Dot, this=this, expression=field) 4901 4902 this = self._parse_bracket(this) 4903 4904 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4905 4906 def _parse_primary(self) -> t.Optional[exp.Expression]: 4907 if self._match_set(self.PRIMARY_PARSERS): 4908 token_type = self._prev.token_type 4909 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4910 4911 if token_type == TokenType.STRING: 4912 expressions = [primary] 4913 while self._match(TokenType.STRING): 4914 expressions.append(exp.Literal.string(self._prev.text)) 4915 4916 if len(expressions) > 1: 4917 return self.expression(exp.Concat, expressions=expressions) 4918 4919 return primary 4920 4921 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4922 return exp.Literal.number(f"0.{self._prev.text}") 4923 4924 if self._match(TokenType.L_PAREN): 4925 comments = self._prev_comments 4926 query = self._parse_select() 4927 4928 if query: 4929 expressions = [query] 4930 else: 4931 expressions = self._parse_expressions() 4932 4933 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4934 4935 if not this and self._match(TokenType.R_PAREN, advance=False): 4936 this = self.expression(exp.Tuple) 4937 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4938 this = self._parse_subquery(this=this, parse_alias=False) 4939 elif isinstance(this, exp.Subquery): 4940 this = self._parse_subquery( 4941 this=self._parse_set_operations(this), parse_alias=False 4942 ) 4943 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4944 this = self.expression(exp.Tuple, expressions=expressions) 4945 else: 4946 this = self.expression(exp.Paren, this=this) 4947 4948 if this: 4949 this.add_comments(comments) 4950 4951 self._match_r_paren(expression=this) 4952 return this 4953 4954 return None 4955 4956 def _parse_field( 4957 self, 4958 any_token: bool = False, 4959 tokens: t.Optional[t.Collection[TokenType]] = None, 4960 anonymous_func: bool = False, 4961 ) -> t.Optional[exp.Expression]: 4962 if anonymous_func: 4963 field = ( 4964 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4965 or self._parse_primary() 4966 ) 4967 else: 4968 field = self._parse_primary() or self._parse_function( 4969 anonymous=anonymous_func, any_token=any_token 4970 ) 4971 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4972 4973 def _parse_function( 4974 self, 4975 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4976 anonymous: bool = False, 4977 optional_parens: bool = True, 4978 any_token: bool = False, 4979 ) -> t.Optional[exp.Expression]: 4980 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4981 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4982 fn_syntax = False 4983 if ( 4984 self._match(TokenType.L_BRACE, advance=False) 4985 and self._next 4986 and self._next.text.upper() == "FN" 4987 ): 4988 self._advance(2) 4989 fn_syntax = True 4990 4991 func = self._parse_function_call( 4992 functions=functions, 4993 anonymous=anonymous, 4994 optional_parens=optional_parens, 4995 any_token=any_token, 4996 ) 4997 4998 if fn_syntax: 4999 self._match(TokenType.R_BRACE) 5000 5001 return func 5002 5003 def _parse_function_call( 5004 self, 5005 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5006 anonymous: bool = False, 5007 optional_parens: bool = True, 5008 any_token: bool = False, 5009 ) -> t.Optional[exp.Expression]: 5010 if not self._curr: 5011 return None 5012 5013 comments = self._curr.comments 5014 token_type = self._curr.token_type 5015 this = self._curr.text 5016 upper = this.upper() 5017 5018 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5019 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5020 self._advance() 5021 return self._parse_window(parser(self)) 5022 5023 if not self._next or self._next.token_type != TokenType.L_PAREN: 5024 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5025 self._advance() 5026 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5027 5028 return None 5029 5030 if any_token: 5031 if token_type in self.RESERVED_TOKENS: 5032 return None 5033 elif token_type not in self.FUNC_TOKENS: 5034 return None 5035 5036 self._advance(2) 5037 5038 parser = self.FUNCTION_PARSERS.get(upper) 5039 if parser and not anonymous: 5040 this = parser(self) 5041 else: 5042 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5043 5044 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5045 this = self.expression(subquery_predicate, this=self._parse_select()) 5046 self._match_r_paren() 5047 return this 5048 5049 if functions is None: 5050 functions = self.FUNCTIONS 5051 5052 function = functions.get(upper) 5053 5054 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5055 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5056 5057 if alias: 5058 args = self._kv_to_prop_eq(args) 5059 5060 if function and not anonymous: 5061 if "dialect" in function.__code__.co_varnames: 5062 func = function(args, dialect=self.dialect) 5063 else: 5064 func = function(args) 5065 5066 func = self.validate_expression(func, args) 5067 if not self.dialect.NORMALIZE_FUNCTIONS: 5068 func.meta["name"] = this 5069 5070 this = func 5071 else: 5072 if token_type == TokenType.IDENTIFIER: 5073 this = exp.Identifier(this=this, quoted=True) 5074 this = self.expression(exp.Anonymous, this=this, expressions=args) 5075 5076 if isinstance(this, exp.Expression): 5077 this.add_comments(comments) 5078 5079 self._match_r_paren(this) 5080 return self._parse_window(this) 5081 5082 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5083 transformed = [] 5084 5085 for e in expressions: 5086 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5087 if isinstance(e, exp.Alias): 5088 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5089 5090 if not isinstance(e, exp.PropertyEQ): 5091 e = self.expression( 5092 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5093 ) 5094 5095 if isinstance(e.this, exp.Column): 5096 e.this.replace(e.this.this) 5097 5098 transformed.append(e) 5099 5100 return transformed 5101 5102 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5103 return self._parse_column_def(self._parse_id_var()) 5104 5105 def _parse_user_defined_function( 5106 self, kind: t.Optional[TokenType] = None 5107 ) -> t.Optional[exp.Expression]: 5108 this = self._parse_id_var() 5109 5110 while self._match(TokenType.DOT): 5111 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5112 5113 if not self._match(TokenType.L_PAREN): 5114 return this 5115 5116 expressions = self._parse_csv(self._parse_function_parameter) 5117 self._match_r_paren() 5118 return self.expression( 5119 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5120 ) 5121 5122 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5123 literal = self._parse_primary() 5124 if literal: 5125 return self.expression(exp.Introducer, this=token.text, expression=literal) 5126 5127 return self.expression(exp.Identifier, this=token.text) 5128 5129 def _parse_session_parameter(self) -> exp.SessionParameter: 5130 kind = None 5131 this = self._parse_id_var() or self._parse_primary() 5132 5133 if this and self._match(TokenType.DOT): 5134 kind = this.name 5135 this = self._parse_var() or self._parse_primary() 5136 5137 return self.expression(exp.SessionParameter, this=this, kind=kind) 5138 5139 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5140 return self._parse_id_var() 5141 5142 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5143 index = self._index 5144 5145 if self._match(TokenType.L_PAREN): 5146 expressions = t.cast( 5147 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5148 ) 5149 5150 if not self._match(TokenType.R_PAREN): 5151 self._retreat(index) 5152 else: 5153 expressions = [self._parse_lambda_arg()] 5154 5155 if self._match_set(self.LAMBDAS): 5156 return self.LAMBDAS[self._prev.token_type](self, expressions) 5157 5158 self._retreat(index) 5159 5160 this: t.Optional[exp.Expression] 5161 5162 if self._match(TokenType.DISTINCT): 5163 this = self.expression( 5164 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5165 ) 5166 else: 5167 this = self._parse_select_or_expression(alias=alias) 5168 5169 return self._parse_limit( 5170 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5171 ) 5172 5173 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5174 index = self._index 5175 if not self._match(TokenType.L_PAREN): 5176 return this 5177 5178 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5179 # expr can be of both types 5180 if self._match_set(self.SELECT_START_TOKENS): 5181 self._retreat(index) 5182 return this 5183 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5184 self._match_r_paren() 5185 return self.expression(exp.Schema, this=this, expressions=args) 5186 5187 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5188 return self._parse_column_def(self._parse_field(any_token=True)) 5189 5190 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5191 # column defs are not really columns, they're identifiers 5192 if isinstance(this, exp.Column): 5193 this = this.this 5194 5195 kind = self._parse_types(schema=True) 5196 5197 if self._match_text_seq("FOR", "ORDINALITY"): 5198 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5199 5200 constraints: t.List[exp.Expression] = [] 5201 5202 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5203 ("ALIAS", "MATERIALIZED") 5204 ): 5205 persisted = self._prev.text.upper() == "MATERIALIZED" 5206 constraints.append( 5207 self.expression( 5208 exp.ComputedColumnConstraint, 5209 this=self._parse_assignment(), 5210 persisted=persisted or self._match_text_seq("PERSISTED"), 5211 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5212 ) 5213 ) 5214 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5215 self._match(TokenType.ALIAS) 5216 constraints.append( 5217 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5218 ) 5219 5220 while True: 5221 constraint = self._parse_column_constraint() 5222 if not constraint: 5223 break 5224 constraints.append(constraint) 5225 5226 if not kind and not constraints: 5227 return this 5228 5229 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5230 5231 def _parse_auto_increment( 5232 self, 5233 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5234 start = None 5235 increment = None 5236 5237 if self._match(TokenType.L_PAREN, advance=False): 5238 args = self._parse_wrapped_csv(self._parse_bitwise) 5239 start = seq_get(args, 0) 5240 increment = seq_get(args, 1) 5241 elif self._match_text_seq("START"): 5242 start = self._parse_bitwise() 5243 self._match_text_seq("INCREMENT") 5244 increment = self._parse_bitwise() 5245 5246 if start and increment: 5247 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5248 5249 return exp.AutoIncrementColumnConstraint() 5250 5251 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5252 if not self._match_text_seq("REFRESH"): 5253 self._retreat(self._index - 1) 5254 return None 5255 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5256 5257 def _parse_compress(self) -> exp.CompressColumnConstraint: 5258 if self._match(TokenType.L_PAREN, advance=False): 5259 return self.expression( 5260 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5261 ) 5262 5263 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5264 5265 def _parse_generated_as_identity( 5266 self, 5267 ) -> ( 5268 exp.GeneratedAsIdentityColumnConstraint 5269 | exp.ComputedColumnConstraint 5270 | exp.GeneratedAsRowColumnConstraint 5271 ): 5272 if self._match_text_seq("BY", "DEFAULT"): 5273 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5274 this = self.expression( 5275 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5276 ) 5277 else: 5278 self._match_text_seq("ALWAYS") 5279 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5280 5281 self._match(TokenType.ALIAS) 5282 5283 if self._match_text_seq("ROW"): 5284 start = self._match_text_seq("START") 5285 if not start: 5286 self._match(TokenType.END) 5287 hidden = self._match_text_seq("HIDDEN") 5288 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5289 5290 identity = self._match_text_seq("IDENTITY") 5291 5292 if self._match(TokenType.L_PAREN): 5293 if self._match(TokenType.START_WITH): 5294 this.set("start", self._parse_bitwise()) 5295 if self._match_text_seq("INCREMENT", "BY"): 5296 this.set("increment", self._parse_bitwise()) 5297 if self._match_text_seq("MINVALUE"): 5298 this.set("minvalue", self._parse_bitwise()) 5299 if self._match_text_seq("MAXVALUE"): 5300 this.set("maxvalue", self._parse_bitwise()) 5301 5302 if self._match_text_seq("CYCLE"): 5303 this.set("cycle", True) 5304 elif self._match_text_seq("NO", "CYCLE"): 5305 this.set("cycle", False) 5306 5307 if not identity: 5308 this.set("expression", self._parse_range()) 5309 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5310 args = self._parse_csv(self._parse_bitwise) 5311 this.set("start", seq_get(args, 0)) 5312 this.set("increment", seq_get(args, 1)) 5313 5314 self._match_r_paren() 5315 5316 return this 5317 5318 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5319 self._match_text_seq("LENGTH") 5320 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5321 5322 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5323 if self._match_text_seq("NULL"): 5324 return self.expression(exp.NotNullColumnConstraint) 5325 if self._match_text_seq("CASESPECIFIC"): 5326 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5327 if self._match_text_seq("FOR", "REPLICATION"): 5328 return self.expression(exp.NotForReplicationColumnConstraint) 5329 return None 5330 5331 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5332 if self._match(TokenType.CONSTRAINT): 5333 this = self._parse_id_var() 5334 else: 5335 this = None 5336 5337 if self._match_texts(self.CONSTRAINT_PARSERS): 5338 return self.expression( 5339 exp.ColumnConstraint, 5340 this=this, 5341 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5342 ) 5343 5344 return this 5345 5346 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5347 if not self._match(TokenType.CONSTRAINT): 5348 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5349 5350 return self.expression( 5351 exp.Constraint, 5352 this=self._parse_id_var(), 5353 expressions=self._parse_unnamed_constraints(), 5354 ) 5355 5356 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5357 constraints = [] 5358 while True: 5359 constraint = self._parse_unnamed_constraint() or self._parse_function() 5360 if not constraint: 5361 break 5362 constraints.append(constraint) 5363 5364 return constraints 5365 5366 def _parse_unnamed_constraint( 5367 self, constraints: t.Optional[t.Collection[str]] = None 5368 ) -> t.Optional[exp.Expression]: 5369 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5370 constraints or self.CONSTRAINT_PARSERS 5371 ): 5372 return None 5373 5374 constraint = self._prev.text.upper() 5375 if constraint not in self.CONSTRAINT_PARSERS: 5376 self.raise_error(f"No parser found for schema constraint {constraint}.") 5377 5378 return self.CONSTRAINT_PARSERS[constraint](self) 5379 5380 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5381 return self._parse_id_var(any_token=False) 5382 5383 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5384 self._match_text_seq("KEY") 5385 return self.expression( 5386 exp.UniqueColumnConstraint, 5387 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5388 this=self._parse_schema(self._parse_unique_key()), 5389 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5390 on_conflict=self._parse_on_conflict(), 5391 ) 5392 5393 def _parse_key_constraint_options(self) -> t.List[str]: 5394 options = [] 5395 while True: 5396 if not self._curr: 5397 break 5398 5399 if self._match(TokenType.ON): 5400 action = None 5401 on = self._advance_any() and self._prev.text 5402 5403 if self._match_text_seq("NO", "ACTION"): 5404 action = "NO ACTION" 5405 elif self._match_text_seq("CASCADE"): 5406 action = "CASCADE" 5407 elif self._match_text_seq("RESTRICT"): 5408 action = "RESTRICT" 5409 elif self._match_pair(TokenType.SET, TokenType.NULL): 5410 action = "SET NULL" 5411 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5412 action = "SET DEFAULT" 5413 else: 5414 self.raise_error("Invalid key constraint") 5415 5416 options.append(f"ON {on} {action}") 5417 else: 5418 var = self._parse_var_from_options( 5419 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5420 ) 5421 if not var: 5422 break 5423 options.append(var.name) 5424 5425 return options 5426 5427 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5428 if match and not self._match(TokenType.REFERENCES): 5429 return None 5430 5431 expressions = None 5432 this = self._parse_table(schema=True) 5433 options = self._parse_key_constraint_options() 5434 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5435 5436 def _parse_foreign_key(self) -> exp.ForeignKey: 5437 expressions = self._parse_wrapped_id_vars() 5438 reference = self._parse_references() 5439 options = {} 5440 5441 while self._match(TokenType.ON): 5442 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5443 self.raise_error("Expected DELETE or UPDATE") 5444 5445 kind = self._prev.text.lower() 5446 5447 if self._match_text_seq("NO", "ACTION"): 5448 action = "NO ACTION" 5449 elif self._match(TokenType.SET): 5450 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5451 action = "SET " + self._prev.text.upper() 5452 else: 5453 self._advance() 5454 action = self._prev.text.upper() 5455 5456 options[kind] = action 5457 5458 return self.expression( 5459 exp.ForeignKey, 5460 expressions=expressions, 5461 reference=reference, 5462 **options, # type: ignore 5463 ) 5464 5465 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5466 return self._parse_field() 5467 5468 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5469 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5470 self._retreat(self._index - 1) 5471 return None 5472 5473 id_vars = self._parse_wrapped_id_vars() 5474 return self.expression( 5475 exp.PeriodForSystemTimeConstraint, 5476 this=seq_get(id_vars, 0), 5477 expression=seq_get(id_vars, 1), 5478 ) 5479 5480 def _parse_primary_key( 5481 self, wrapped_optional: bool = False, in_props: bool = False 5482 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5483 desc = ( 5484 self._match_set((TokenType.ASC, TokenType.DESC)) 5485 and self._prev.token_type == TokenType.DESC 5486 ) 5487 5488 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5489 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5490 5491 expressions = self._parse_wrapped_csv( 5492 self._parse_primary_key_part, optional=wrapped_optional 5493 ) 5494 options = self._parse_key_constraint_options() 5495 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5496 5497 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5498 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5499 5500 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5501 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5502 return this 5503 5504 bracket_kind = self._prev.token_type 5505 expressions = self._parse_csv( 5506 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5507 ) 5508 5509 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5510 self.raise_error("Expected ]") 5511 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5512 self.raise_error("Expected }") 5513 5514 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5515 if bracket_kind == TokenType.L_BRACE: 5516 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5517 elif not this: 5518 this = build_array_constructor( 5519 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5520 ) 5521 else: 5522 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5523 if constructor_type: 5524 return build_array_constructor( 5525 constructor_type, 5526 args=expressions, 5527 bracket_kind=bracket_kind, 5528 dialect=self.dialect, 5529 ) 5530 5531 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5532 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5533 5534 self._add_comments(this) 5535 return self._parse_bracket(this) 5536 5537 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5538 if self._match(TokenType.COLON): 5539 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5540 return this 5541 5542 def _parse_case(self) -> t.Optional[exp.Expression]: 5543 ifs = [] 5544 default = None 5545 5546 comments = self._prev_comments 5547 expression = self._parse_assignment() 5548 5549 while self._match(TokenType.WHEN): 5550 this = self._parse_assignment() 5551 self._match(TokenType.THEN) 5552 then = self._parse_assignment() 5553 ifs.append(self.expression(exp.If, this=this, true=then)) 5554 5555 if self._match(TokenType.ELSE): 5556 default = self._parse_assignment() 5557 5558 if not self._match(TokenType.END): 5559 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5560 default = exp.column("interval") 5561 else: 5562 self.raise_error("Expected END after CASE", self._prev) 5563 5564 return self.expression( 5565 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5566 ) 5567 5568 def _parse_if(self) -> t.Optional[exp.Expression]: 5569 if self._match(TokenType.L_PAREN): 5570 args = self._parse_csv(self._parse_assignment) 5571 this = self.validate_expression(exp.If.from_arg_list(args), args) 5572 self._match_r_paren() 5573 else: 5574 index = self._index - 1 5575 5576 if self.NO_PAREN_IF_COMMANDS and index == 0: 5577 return self._parse_as_command(self._prev) 5578 5579 condition = self._parse_assignment() 5580 5581 if not condition: 5582 self._retreat(index) 5583 return None 5584 5585 self._match(TokenType.THEN) 5586 true = self._parse_assignment() 5587 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5588 self._match(TokenType.END) 5589 this = self.expression(exp.If, this=condition, true=true, false=false) 5590 5591 return this 5592 5593 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5594 if not self._match_text_seq("VALUE", "FOR"): 5595 self._retreat(self._index - 1) 5596 return None 5597 5598 return self.expression( 5599 exp.NextValueFor, 5600 this=self._parse_column(), 5601 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5602 ) 5603 5604 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5605 this = self._parse_function() or self._parse_var_or_string(upper=True) 5606 5607 if self._match(TokenType.FROM): 5608 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5609 5610 if not self._match(TokenType.COMMA): 5611 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5612 5613 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5614 5615 def _parse_gap_fill(self) -> exp.GapFill: 5616 self._match(TokenType.TABLE) 5617 this = self._parse_table() 5618 5619 self._match(TokenType.COMMA) 5620 args = [this, *self._parse_csv(self._parse_lambda)] 5621 5622 gap_fill = exp.GapFill.from_arg_list(args) 5623 return self.validate_expression(gap_fill, args) 5624 5625 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5626 this = self._parse_assignment() 5627 5628 if not self._match(TokenType.ALIAS): 5629 if self._match(TokenType.COMMA): 5630 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5631 5632 self.raise_error("Expected AS after CAST") 5633 5634 fmt = None 5635 to = self._parse_types() 5636 5637 if self._match(TokenType.FORMAT): 5638 fmt_string = self._parse_string() 5639 fmt = self._parse_at_time_zone(fmt_string) 5640 5641 if not to: 5642 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5643 if to.this in exp.DataType.TEMPORAL_TYPES: 5644 this = self.expression( 5645 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5646 this=this, 5647 format=exp.Literal.string( 5648 format_time( 5649 fmt_string.this if fmt_string else "", 5650 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5651 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5652 ) 5653 ), 5654 safe=safe, 5655 ) 5656 5657 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5658 this.set("zone", fmt.args["zone"]) 5659 return this 5660 elif not to: 5661 self.raise_error("Expected TYPE after CAST") 5662 elif isinstance(to, exp.Identifier): 5663 to = exp.DataType.build(to.name, udt=True) 5664 elif to.this == exp.DataType.Type.CHAR: 5665 if self._match(TokenType.CHARACTER_SET): 5666 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5667 5668 return self.expression( 5669 exp.Cast if strict else exp.TryCast, 5670 this=this, 5671 to=to, 5672 format=fmt, 5673 safe=safe, 5674 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5675 ) 5676 5677 def _parse_string_agg(self) -> exp.Expression: 5678 if self._match(TokenType.DISTINCT): 5679 args: t.List[t.Optional[exp.Expression]] = [ 5680 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5681 ] 5682 if self._match(TokenType.COMMA): 5683 args.extend(self._parse_csv(self._parse_assignment)) 5684 else: 5685 args = self._parse_csv(self._parse_assignment) # type: ignore 5686 5687 index = self._index 5688 if not self._match(TokenType.R_PAREN) and args: 5689 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5690 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5691 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5692 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5693 5694 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5695 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5696 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5697 if not self._match_text_seq("WITHIN", "GROUP"): 5698 self._retreat(index) 5699 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5700 5701 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5702 order = self._parse_order(this=seq_get(args, 0)) 5703 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5704 5705 def _parse_convert( 5706 self, strict: bool, safe: t.Optional[bool] = None 5707 ) -> t.Optional[exp.Expression]: 5708 this = self._parse_bitwise() 5709 5710 if self._match(TokenType.USING): 5711 to: t.Optional[exp.Expression] = self.expression( 5712 exp.CharacterSet, this=self._parse_var() 5713 ) 5714 elif self._match(TokenType.COMMA): 5715 to = self._parse_types() 5716 else: 5717 to = None 5718 5719 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5720 5721 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5722 """ 5723 There are generally two variants of the DECODE function: 5724 5725 - DECODE(bin, charset) 5726 - DECODE(expression, search, result [, search, result] ... [, default]) 5727 5728 The second variant will always be parsed into a CASE expression. Note that NULL 5729 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5730 instead of relying on pattern matching. 5731 """ 5732 args = self._parse_csv(self._parse_assignment) 5733 5734 if len(args) < 3: 5735 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5736 5737 expression, *expressions = args 5738 if not expression: 5739 return None 5740 5741 ifs = [] 5742 for search, result in zip(expressions[::2], expressions[1::2]): 5743 if not search or not result: 5744 return None 5745 5746 if isinstance(search, exp.Literal): 5747 ifs.append( 5748 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5749 ) 5750 elif isinstance(search, exp.Null): 5751 ifs.append( 5752 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5753 ) 5754 else: 5755 cond = exp.or_( 5756 exp.EQ(this=expression.copy(), expression=search), 5757 exp.and_( 5758 exp.Is(this=expression.copy(), expression=exp.Null()), 5759 exp.Is(this=search.copy(), expression=exp.Null()), 5760 copy=False, 5761 ), 5762 copy=False, 5763 ) 5764 ifs.append(exp.If(this=cond, true=result)) 5765 5766 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5767 5768 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5769 self._match_text_seq("KEY") 5770 key = self._parse_column() 5771 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5772 self._match_text_seq("VALUE") 5773 value = self._parse_bitwise() 5774 5775 if not key and not value: 5776 return None 5777 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5778 5779 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5780 if not this or not self._match_text_seq("FORMAT", "JSON"): 5781 return this 5782 5783 return self.expression(exp.FormatJson, this=this) 5784 5785 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5786 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5787 for value in values: 5788 if self._match_text_seq(value, "ON", on): 5789 return f"{value} ON {on}" 5790 5791 return None 5792 5793 @t.overload 5794 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5795 5796 @t.overload 5797 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5798 5799 def _parse_json_object(self, agg=False): 5800 star = self._parse_star() 5801 expressions = ( 5802 [star] 5803 if star 5804 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5805 ) 5806 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5807 5808 unique_keys = None 5809 if self._match_text_seq("WITH", "UNIQUE"): 5810 unique_keys = True 5811 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5812 unique_keys = False 5813 5814 self._match_text_seq("KEYS") 5815 5816 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5817 self._parse_type() 5818 ) 5819 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5820 5821 return self.expression( 5822 exp.JSONObjectAgg if agg else exp.JSONObject, 5823 expressions=expressions, 5824 null_handling=null_handling, 5825 unique_keys=unique_keys, 5826 return_type=return_type, 5827 encoding=encoding, 5828 ) 5829 5830 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5831 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5832 if not self._match_text_seq("NESTED"): 5833 this = self._parse_id_var() 5834 kind = self._parse_types(allow_identifiers=False) 5835 nested = None 5836 else: 5837 this = None 5838 kind = None 5839 nested = True 5840 5841 path = self._match_text_seq("PATH") and self._parse_string() 5842 nested_schema = nested and self._parse_json_schema() 5843 5844 return self.expression( 5845 exp.JSONColumnDef, 5846 this=this, 5847 kind=kind, 5848 path=path, 5849 nested_schema=nested_schema, 5850 ) 5851 5852 def _parse_json_schema(self) -> exp.JSONSchema: 5853 self._match_text_seq("COLUMNS") 5854 return self.expression( 5855 exp.JSONSchema, 5856 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5857 ) 5858 5859 def _parse_json_table(self) -> exp.JSONTable: 5860 this = self._parse_format_json(self._parse_bitwise()) 5861 path = self._match(TokenType.COMMA) and self._parse_string() 5862 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5863 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5864 schema = self._parse_json_schema() 5865 5866 return exp.JSONTable( 5867 this=this, 5868 schema=schema, 5869 path=path, 5870 error_handling=error_handling, 5871 empty_handling=empty_handling, 5872 ) 5873 5874 def _parse_match_against(self) -> exp.MatchAgainst: 5875 expressions = self._parse_csv(self._parse_column) 5876 5877 self._match_text_seq(")", "AGAINST", "(") 5878 5879 this = self._parse_string() 5880 5881 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5882 modifier = "IN NATURAL LANGUAGE MODE" 5883 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5884 modifier = f"{modifier} WITH QUERY EXPANSION" 5885 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5886 modifier = "IN BOOLEAN MODE" 5887 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5888 modifier = "WITH QUERY EXPANSION" 5889 else: 5890 modifier = None 5891 5892 return self.expression( 5893 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5894 ) 5895 5896 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5897 def _parse_open_json(self) -> exp.OpenJSON: 5898 this = self._parse_bitwise() 5899 path = self._match(TokenType.COMMA) and self._parse_string() 5900 5901 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5902 this = self._parse_field(any_token=True) 5903 kind = self._parse_types() 5904 path = self._parse_string() 5905 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5906 5907 return self.expression( 5908 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5909 ) 5910 5911 expressions = None 5912 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5913 self._match_l_paren() 5914 expressions = self._parse_csv(_parse_open_json_column_def) 5915 5916 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5917 5918 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5919 args = self._parse_csv(self._parse_bitwise) 5920 5921 if self._match(TokenType.IN): 5922 return self.expression( 5923 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5924 ) 5925 5926 if haystack_first: 5927 haystack = seq_get(args, 0) 5928 needle = seq_get(args, 1) 5929 else: 5930 needle = seq_get(args, 0) 5931 haystack = seq_get(args, 1) 5932 5933 return self.expression( 5934 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5935 ) 5936 5937 def _parse_predict(self) -> exp.Predict: 5938 self._match_text_seq("MODEL") 5939 this = self._parse_table() 5940 5941 self._match(TokenType.COMMA) 5942 self._match_text_seq("TABLE") 5943 5944 return self.expression( 5945 exp.Predict, 5946 this=this, 5947 expression=self._parse_table(), 5948 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5949 ) 5950 5951 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5952 args = self._parse_csv(self._parse_table) 5953 return exp.JoinHint(this=func_name.upper(), expressions=args) 5954 5955 def _parse_substring(self) -> exp.Substring: 5956 # Postgres supports the form: substring(string [from int] [for int]) 5957 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5958 5959 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5960 5961 if self._match(TokenType.FROM): 5962 args.append(self._parse_bitwise()) 5963 if self._match(TokenType.FOR): 5964 if len(args) == 1: 5965 args.append(exp.Literal.number(1)) 5966 args.append(self._parse_bitwise()) 5967 5968 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5969 5970 def _parse_trim(self) -> exp.Trim: 5971 # https://www.w3resource.com/sql/character-functions/trim.php 5972 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5973 5974 position = None 5975 collation = None 5976 expression = None 5977 5978 if self._match_texts(self.TRIM_TYPES): 5979 position = self._prev.text.upper() 5980 5981 this = self._parse_bitwise() 5982 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5983 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5984 expression = self._parse_bitwise() 5985 5986 if invert_order: 5987 this, expression = expression, this 5988 5989 if self._match(TokenType.COLLATE): 5990 collation = self._parse_bitwise() 5991 5992 return self.expression( 5993 exp.Trim, this=this, position=position, expression=expression, collation=collation 5994 ) 5995 5996 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5997 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5998 5999 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6000 return self._parse_window(self._parse_id_var(), alias=True) 6001 6002 def _parse_respect_or_ignore_nulls( 6003 self, this: t.Optional[exp.Expression] 6004 ) -> t.Optional[exp.Expression]: 6005 if self._match_text_seq("IGNORE", "NULLS"): 6006 return self.expression(exp.IgnoreNulls, this=this) 6007 if self._match_text_seq("RESPECT", "NULLS"): 6008 return self.expression(exp.RespectNulls, this=this) 6009 return this 6010 6011 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6012 if self._match(TokenType.HAVING): 6013 self._match_texts(("MAX", "MIN")) 6014 max = self._prev.text.upper() != "MIN" 6015 return self.expression( 6016 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6017 ) 6018 6019 return this 6020 6021 def _parse_window( 6022 self, this: t.Optional[exp.Expression], alias: bool = False 6023 ) -> t.Optional[exp.Expression]: 6024 func = this 6025 comments = func.comments if isinstance(func, exp.Expression) else None 6026 6027 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6028 self._match(TokenType.WHERE) 6029 this = self.expression( 6030 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6031 ) 6032 self._match_r_paren() 6033 6034 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6035 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6036 if self._match_text_seq("WITHIN", "GROUP"): 6037 order = self._parse_wrapped(self._parse_order) 6038 this = self.expression(exp.WithinGroup, this=this, expression=order) 6039 6040 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6041 # Some dialects choose to implement and some do not. 6042 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6043 6044 # There is some code above in _parse_lambda that handles 6045 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6046 6047 # The below changes handle 6048 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6049 6050 # Oracle allows both formats 6051 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6052 # and Snowflake chose to do the same for familiarity 6053 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6054 if isinstance(this, exp.AggFunc): 6055 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6056 6057 if ignore_respect and ignore_respect is not this: 6058 ignore_respect.replace(ignore_respect.this) 6059 this = self.expression(ignore_respect.__class__, this=this) 6060 6061 this = self._parse_respect_or_ignore_nulls(this) 6062 6063 # bigquery select from window x AS (partition by ...) 6064 if alias: 6065 over = None 6066 self._match(TokenType.ALIAS) 6067 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6068 return this 6069 else: 6070 over = self._prev.text.upper() 6071 6072 if comments and isinstance(func, exp.Expression): 6073 func.pop_comments() 6074 6075 if not self._match(TokenType.L_PAREN): 6076 return self.expression( 6077 exp.Window, 6078 comments=comments, 6079 this=this, 6080 alias=self._parse_id_var(False), 6081 over=over, 6082 ) 6083 6084 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6085 6086 first = self._match(TokenType.FIRST) 6087 if self._match_text_seq("LAST"): 6088 first = False 6089 6090 partition, order = self._parse_partition_and_order() 6091 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6092 6093 if kind: 6094 self._match(TokenType.BETWEEN) 6095 start = self._parse_window_spec() 6096 self._match(TokenType.AND) 6097 end = self._parse_window_spec() 6098 6099 spec = self.expression( 6100 exp.WindowSpec, 6101 kind=kind, 6102 start=start["value"], 6103 start_side=start["side"], 6104 end=end["value"], 6105 end_side=end["side"], 6106 ) 6107 else: 6108 spec = None 6109 6110 self._match_r_paren() 6111 6112 window = self.expression( 6113 exp.Window, 6114 comments=comments, 6115 this=this, 6116 partition_by=partition, 6117 order=order, 6118 spec=spec, 6119 alias=window_alias, 6120 over=over, 6121 first=first, 6122 ) 6123 6124 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6125 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6126 return self._parse_window(window, alias=alias) 6127 6128 return window 6129 6130 def _parse_partition_and_order( 6131 self, 6132 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6133 return self._parse_partition_by(), self._parse_order() 6134 6135 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6136 self._match(TokenType.BETWEEN) 6137 6138 return { 6139 "value": ( 6140 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6141 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6142 or self._parse_bitwise() 6143 ), 6144 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6145 } 6146 6147 def _parse_alias( 6148 self, this: t.Optional[exp.Expression], explicit: bool = False 6149 ) -> t.Optional[exp.Expression]: 6150 any_token = self._match(TokenType.ALIAS) 6151 comments = self._prev_comments or [] 6152 6153 if explicit and not any_token: 6154 return this 6155 6156 if self._match(TokenType.L_PAREN): 6157 aliases = self.expression( 6158 exp.Aliases, 6159 comments=comments, 6160 this=this, 6161 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6162 ) 6163 self._match_r_paren(aliases) 6164 return aliases 6165 6166 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6167 self.STRING_ALIASES and self._parse_string_as_identifier() 6168 ) 6169 6170 if alias: 6171 comments.extend(alias.pop_comments()) 6172 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6173 column = this.this 6174 6175 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6176 if not this.comments and column and column.comments: 6177 this.comments = column.pop_comments() 6178 6179 return this 6180 6181 def _parse_id_var( 6182 self, 6183 any_token: bool = True, 6184 tokens: t.Optional[t.Collection[TokenType]] = None, 6185 ) -> t.Optional[exp.Expression]: 6186 expression = self._parse_identifier() 6187 if not expression and ( 6188 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6189 ): 6190 quoted = self._prev.token_type == TokenType.STRING 6191 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6192 6193 return expression 6194 6195 def _parse_string(self) -> t.Optional[exp.Expression]: 6196 if self._match_set(self.STRING_PARSERS): 6197 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6198 return self._parse_placeholder() 6199 6200 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6201 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6202 6203 def _parse_number(self) -> t.Optional[exp.Expression]: 6204 if self._match_set(self.NUMERIC_PARSERS): 6205 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6206 return self._parse_placeholder() 6207 6208 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6209 if self._match(TokenType.IDENTIFIER): 6210 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6211 return self._parse_placeholder() 6212 6213 def _parse_var( 6214 self, 6215 any_token: bool = False, 6216 tokens: t.Optional[t.Collection[TokenType]] = None, 6217 upper: bool = False, 6218 ) -> t.Optional[exp.Expression]: 6219 if ( 6220 (any_token and self._advance_any()) 6221 or self._match(TokenType.VAR) 6222 or (self._match_set(tokens) if tokens else False) 6223 ): 6224 return self.expression( 6225 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6226 ) 6227 return self._parse_placeholder() 6228 6229 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6230 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6231 self._advance() 6232 return self._prev 6233 return None 6234 6235 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6236 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6237 6238 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6239 return self._parse_primary() or self._parse_var(any_token=True) 6240 6241 def _parse_null(self) -> t.Optional[exp.Expression]: 6242 if self._match_set(self.NULL_TOKENS): 6243 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6244 return self._parse_placeholder() 6245 6246 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6247 if self._match(TokenType.TRUE): 6248 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6249 if self._match(TokenType.FALSE): 6250 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6251 return self._parse_placeholder() 6252 6253 def _parse_star(self) -> t.Optional[exp.Expression]: 6254 if self._match(TokenType.STAR): 6255 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6256 return self._parse_placeholder() 6257 6258 def _parse_parameter(self) -> exp.Parameter: 6259 this = self._parse_identifier() or self._parse_primary_or_var() 6260 return self.expression(exp.Parameter, this=this) 6261 6262 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6263 if self._match_set(self.PLACEHOLDER_PARSERS): 6264 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6265 if placeholder: 6266 return placeholder 6267 self._advance(-1) 6268 return None 6269 6270 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6271 if not self._match_texts(keywords): 6272 return None 6273 if self._match(TokenType.L_PAREN, advance=False): 6274 return self._parse_wrapped_csv(self._parse_expression) 6275 6276 expression = self._parse_expression() 6277 return [expression] if expression else None 6278 6279 def _parse_csv( 6280 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6281 ) -> t.List[exp.Expression]: 6282 parse_result = parse_method() 6283 items = [parse_result] if parse_result is not None else [] 6284 6285 while self._match(sep): 6286 self._add_comments(parse_result) 6287 parse_result = parse_method() 6288 if parse_result is not None: 6289 items.append(parse_result) 6290 6291 return items 6292 6293 def _parse_tokens( 6294 self, parse_method: t.Callable, expressions: t.Dict 6295 ) -> t.Optional[exp.Expression]: 6296 this = parse_method() 6297 6298 while self._match_set(expressions): 6299 this = self.expression( 6300 expressions[self._prev.token_type], 6301 this=this, 6302 comments=self._prev_comments, 6303 expression=parse_method(), 6304 ) 6305 6306 return this 6307 6308 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6309 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6310 6311 def _parse_wrapped_csv( 6312 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6313 ) -> t.List[exp.Expression]: 6314 return self._parse_wrapped( 6315 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6316 ) 6317 6318 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6319 wrapped = self._match(TokenType.L_PAREN) 6320 if not wrapped and not optional: 6321 self.raise_error("Expecting (") 6322 parse_result = parse_method() 6323 if wrapped: 6324 self._match_r_paren() 6325 return parse_result 6326 6327 def _parse_expressions(self) -> t.List[exp.Expression]: 6328 return self._parse_csv(self._parse_expression) 6329 6330 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6331 return self._parse_select() or self._parse_set_operations( 6332 self._parse_expression() if alias else self._parse_assignment() 6333 ) 6334 6335 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6336 return self._parse_query_modifiers( 6337 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6338 ) 6339 6340 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6341 this = None 6342 if self._match_texts(self.TRANSACTION_KIND): 6343 this = self._prev.text 6344 6345 self._match_texts(("TRANSACTION", "WORK")) 6346 6347 modes = [] 6348 while True: 6349 mode = [] 6350 while self._match(TokenType.VAR): 6351 mode.append(self._prev.text) 6352 6353 if mode: 6354 modes.append(" ".join(mode)) 6355 if not self._match(TokenType.COMMA): 6356 break 6357 6358 return self.expression(exp.Transaction, this=this, modes=modes) 6359 6360 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6361 chain = None 6362 savepoint = None 6363 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6364 6365 self._match_texts(("TRANSACTION", "WORK")) 6366 6367 if self._match_text_seq("TO"): 6368 self._match_text_seq("SAVEPOINT") 6369 savepoint = self._parse_id_var() 6370 6371 if self._match(TokenType.AND): 6372 chain = not self._match_text_seq("NO") 6373 self._match_text_seq("CHAIN") 6374 6375 if is_rollback: 6376 return self.expression(exp.Rollback, savepoint=savepoint) 6377 6378 return self.expression(exp.Commit, chain=chain) 6379 6380 def _parse_refresh(self) -> exp.Refresh: 6381 self._match(TokenType.TABLE) 6382 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6383 6384 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6385 if not self._match_text_seq("ADD"): 6386 return None 6387 6388 self._match(TokenType.COLUMN) 6389 exists_column = self._parse_exists(not_=True) 6390 expression = self._parse_field_def() 6391 6392 if expression: 6393 expression.set("exists", exists_column) 6394 6395 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6396 if self._match_texts(("FIRST", "AFTER")): 6397 position = self._prev.text 6398 column_position = self.expression( 6399 exp.ColumnPosition, this=self._parse_column(), position=position 6400 ) 6401 expression.set("position", column_position) 6402 6403 return expression 6404 6405 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6406 drop = self._match(TokenType.DROP) and self._parse_drop() 6407 if drop and not isinstance(drop, exp.Command): 6408 drop.set("kind", drop.args.get("kind", "COLUMN")) 6409 return drop 6410 6411 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6412 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6413 return self.expression( 6414 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6415 ) 6416 6417 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6418 index = self._index - 1 6419 6420 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6421 return self._parse_csv( 6422 lambda: self.expression( 6423 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6424 ) 6425 ) 6426 6427 self._retreat(index) 6428 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6429 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6430 6431 if self._match_text_seq("ADD", "COLUMNS"): 6432 schema = self._parse_schema() 6433 if schema: 6434 return [schema] 6435 return [] 6436 6437 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6438 6439 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6440 if self._match_texts(self.ALTER_ALTER_PARSERS): 6441 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6442 6443 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6444 # keyword after ALTER we default to parsing this statement 6445 self._match(TokenType.COLUMN) 6446 column = self._parse_field(any_token=True) 6447 6448 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6449 return self.expression(exp.AlterColumn, this=column, drop=True) 6450 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6451 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6452 if self._match(TokenType.COMMENT): 6453 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6454 if self._match_text_seq("DROP", "NOT", "NULL"): 6455 return self.expression( 6456 exp.AlterColumn, 6457 this=column, 6458 drop=True, 6459 allow_null=True, 6460 ) 6461 if self._match_text_seq("SET", "NOT", "NULL"): 6462 return self.expression( 6463 exp.AlterColumn, 6464 this=column, 6465 allow_null=False, 6466 ) 6467 self._match_text_seq("SET", "DATA") 6468 self._match_text_seq("TYPE") 6469 return self.expression( 6470 exp.AlterColumn, 6471 this=column, 6472 dtype=self._parse_types(), 6473 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6474 using=self._match(TokenType.USING) and self._parse_assignment(), 6475 ) 6476 6477 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6478 if self._match_texts(("ALL", "EVEN", "AUTO")): 6479 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6480 6481 self._match_text_seq("KEY", "DISTKEY") 6482 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6483 6484 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6485 if compound: 6486 self._match_text_seq("SORTKEY") 6487 6488 if self._match(TokenType.L_PAREN, advance=False): 6489 return self.expression( 6490 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6491 ) 6492 6493 self._match_texts(("AUTO", "NONE")) 6494 return self.expression( 6495 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6496 ) 6497 6498 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6499 index = self._index - 1 6500 6501 partition_exists = self._parse_exists() 6502 if self._match(TokenType.PARTITION, advance=False): 6503 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6504 6505 self._retreat(index) 6506 return self._parse_csv(self._parse_drop_column) 6507 6508 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6509 if self._match(TokenType.COLUMN): 6510 exists = self._parse_exists() 6511 old_column = self._parse_column() 6512 to = self._match_text_seq("TO") 6513 new_column = self._parse_column() 6514 6515 if old_column is None or to is None or new_column is None: 6516 return None 6517 6518 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6519 6520 self._match_text_seq("TO") 6521 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6522 6523 def _parse_alter_table_set(self) -> exp.AlterSet: 6524 alter_set = self.expression(exp.AlterSet) 6525 6526 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6527 "TABLE", "PROPERTIES" 6528 ): 6529 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6530 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6531 alter_set.set("expressions", [self._parse_assignment()]) 6532 elif self._match_texts(("LOGGED", "UNLOGGED")): 6533 alter_set.set("option", exp.var(self._prev.text.upper())) 6534 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6535 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6536 elif self._match_text_seq("LOCATION"): 6537 alter_set.set("location", self._parse_field()) 6538 elif self._match_text_seq("ACCESS", "METHOD"): 6539 alter_set.set("access_method", self._parse_field()) 6540 elif self._match_text_seq("TABLESPACE"): 6541 alter_set.set("tablespace", self._parse_field()) 6542 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6543 alter_set.set("file_format", [self._parse_field()]) 6544 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6545 alter_set.set("file_format", self._parse_wrapped_options()) 6546 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6547 alter_set.set("copy_options", self._parse_wrapped_options()) 6548 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6549 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6550 else: 6551 if self._match_text_seq("SERDE"): 6552 alter_set.set("serde", self._parse_field()) 6553 6554 alter_set.set("expressions", [self._parse_properties()]) 6555 6556 return alter_set 6557 6558 def _parse_alter(self) -> exp.Alter | exp.Command: 6559 start = self._prev 6560 6561 alter_token = self._match_set(self.ALTERABLES) and self._prev 6562 if not alter_token: 6563 return self._parse_as_command(start) 6564 6565 exists = self._parse_exists() 6566 only = self._match_text_seq("ONLY") 6567 this = self._parse_table(schema=True) 6568 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6569 6570 if self._next: 6571 self._advance() 6572 6573 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6574 if parser: 6575 actions = ensure_list(parser(self)) 6576 options = self._parse_csv(self._parse_property) 6577 6578 if not self._curr and actions: 6579 return self.expression( 6580 exp.Alter, 6581 this=this, 6582 kind=alter_token.text.upper(), 6583 exists=exists, 6584 actions=actions, 6585 only=only, 6586 options=options, 6587 cluster=cluster, 6588 ) 6589 6590 return self._parse_as_command(start) 6591 6592 def _parse_merge(self) -> exp.Merge: 6593 self._match(TokenType.INTO) 6594 target = self._parse_table() 6595 6596 if target and self._match(TokenType.ALIAS, advance=False): 6597 target.set("alias", self._parse_table_alias()) 6598 6599 self._match(TokenType.USING) 6600 using = self._parse_table() 6601 6602 self._match(TokenType.ON) 6603 on = self._parse_assignment() 6604 6605 return self.expression( 6606 exp.Merge, 6607 this=target, 6608 using=using, 6609 on=on, 6610 expressions=self._parse_when_matched(), 6611 ) 6612 6613 def _parse_when_matched(self) -> t.List[exp.When]: 6614 whens = [] 6615 6616 while self._match(TokenType.WHEN): 6617 matched = not self._match(TokenType.NOT) 6618 self._match_text_seq("MATCHED") 6619 source = ( 6620 False 6621 if self._match_text_seq("BY", "TARGET") 6622 else self._match_text_seq("BY", "SOURCE") 6623 ) 6624 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6625 6626 self._match(TokenType.THEN) 6627 6628 if self._match(TokenType.INSERT): 6629 _this = self._parse_star() 6630 if _this: 6631 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6632 else: 6633 then = self.expression( 6634 exp.Insert, 6635 this=self._parse_value(), 6636 expression=self._match_text_seq("VALUES") and self._parse_value(), 6637 ) 6638 elif self._match(TokenType.UPDATE): 6639 expressions = self._parse_star() 6640 if expressions: 6641 then = self.expression(exp.Update, expressions=expressions) 6642 else: 6643 then = self.expression( 6644 exp.Update, 6645 expressions=self._match(TokenType.SET) 6646 and self._parse_csv(self._parse_equality), 6647 ) 6648 elif self._match(TokenType.DELETE): 6649 then = self.expression(exp.Var, this=self._prev.text) 6650 else: 6651 then = None 6652 6653 whens.append( 6654 self.expression( 6655 exp.When, 6656 matched=matched, 6657 source=source, 6658 condition=condition, 6659 then=then, 6660 ) 6661 ) 6662 return whens 6663 6664 def _parse_show(self) -> t.Optional[exp.Expression]: 6665 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6666 if parser: 6667 return parser(self) 6668 return self._parse_as_command(self._prev) 6669 6670 def _parse_set_item_assignment( 6671 self, kind: t.Optional[str] = None 6672 ) -> t.Optional[exp.Expression]: 6673 index = self._index 6674 6675 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6676 return self._parse_set_transaction(global_=kind == "GLOBAL") 6677 6678 left = self._parse_primary() or self._parse_column() 6679 assignment_delimiter = self._match_texts(("=", "TO")) 6680 6681 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6682 self._retreat(index) 6683 return None 6684 6685 right = self._parse_statement() or self._parse_id_var() 6686 if isinstance(right, (exp.Column, exp.Identifier)): 6687 right = exp.var(right.name) 6688 6689 this = self.expression(exp.EQ, this=left, expression=right) 6690 return self.expression(exp.SetItem, this=this, kind=kind) 6691 6692 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6693 self._match_text_seq("TRANSACTION") 6694 characteristics = self._parse_csv( 6695 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6696 ) 6697 return self.expression( 6698 exp.SetItem, 6699 expressions=characteristics, 6700 kind="TRANSACTION", 6701 **{"global": global_}, # type: ignore 6702 ) 6703 6704 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6705 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6706 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6707 6708 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6709 index = self._index 6710 set_ = self.expression( 6711 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6712 ) 6713 6714 if self._curr: 6715 self._retreat(index) 6716 return self._parse_as_command(self._prev) 6717 6718 return set_ 6719 6720 def _parse_var_from_options( 6721 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6722 ) -> t.Optional[exp.Var]: 6723 start = self._curr 6724 if not start: 6725 return None 6726 6727 option = start.text.upper() 6728 continuations = options.get(option) 6729 6730 index = self._index 6731 self._advance() 6732 for keywords in continuations or []: 6733 if isinstance(keywords, str): 6734 keywords = (keywords,) 6735 6736 if self._match_text_seq(*keywords): 6737 option = f"{option} {' '.join(keywords)}" 6738 break 6739 else: 6740 if continuations or continuations is None: 6741 if raise_unmatched: 6742 self.raise_error(f"Unknown option {option}") 6743 6744 self._retreat(index) 6745 return None 6746 6747 return exp.var(option) 6748 6749 def _parse_as_command(self, start: Token) -> exp.Command: 6750 while self._curr: 6751 self._advance() 6752 text = self._find_sql(start, self._prev) 6753 size = len(start.text) 6754 self._warn_unsupported() 6755 return exp.Command(this=text[:size], expression=text[size:]) 6756 6757 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6758 settings = [] 6759 6760 self._match_l_paren() 6761 kind = self._parse_id_var() 6762 6763 if self._match(TokenType.L_PAREN): 6764 while True: 6765 key = self._parse_id_var() 6766 value = self._parse_primary() 6767 6768 if not key and value is None: 6769 break 6770 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6771 self._match(TokenType.R_PAREN) 6772 6773 self._match_r_paren() 6774 6775 return self.expression( 6776 exp.DictProperty, 6777 this=this, 6778 kind=kind.this if kind else None, 6779 settings=settings, 6780 ) 6781 6782 def _parse_dict_range(self, this: str) -> exp.DictRange: 6783 self._match_l_paren() 6784 has_min = self._match_text_seq("MIN") 6785 if has_min: 6786 min = self._parse_var() or self._parse_primary() 6787 self._match_text_seq("MAX") 6788 max = self._parse_var() or self._parse_primary() 6789 else: 6790 max = self._parse_var() or self._parse_primary() 6791 min = exp.Literal.number(0) 6792 self._match_r_paren() 6793 return self.expression(exp.DictRange, this=this, min=min, max=max) 6794 6795 def _parse_comprehension( 6796 self, this: t.Optional[exp.Expression] 6797 ) -> t.Optional[exp.Comprehension]: 6798 index = self._index 6799 expression = self._parse_column() 6800 if not self._match(TokenType.IN): 6801 self._retreat(index - 1) 6802 return None 6803 iterator = self._parse_column() 6804 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6805 return self.expression( 6806 exp.Comprehension, 6807 this=this, 6808 expression=expression, 6809 iterator=iterator, 6810 condition=condition, 6811 ) 6812 6813 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6814 if self._match(TokenType.HEREDOC_STRING): 6815 return self.expression(exp.Heredoc, this=self._prev.text) 6816 6817 if not self._match_text_seq("$"): 6818 return None 6819 6820 tags = ["$"] 6821 tag_text = None 6822 6823 if self._is_connected(): 6824 self._advance() 6825 tags.append(self._prev.text.upper()) 6826 else: 6827 self.raise_error("No closing $ found") 6828 6829 if tags[-1] != "$": 6830 if self._is_connected() and self._match_text_seq("$"): 6831 tag_text = tags[-1] 6832 tags.append("$") 6833 else: 6834 self.raise_error("No closing $ found") 6835 6836 heredoc_start = self._curr 6837 6838 while self._curr: 6839 if self._match_text_seq(*tags, advance=False): 6840 this = self._find_sql(heredoc_start, self._prev) 6841 self._advance(len(tags)) 6842 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6843 6844 self._advance() 6845 6846 self.raise_error(f"No closing {''.join(tags)} found") 6847 return None 6848 6849 def _find_parser( 6850 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6851 ) -> t.Optional[t.Callable]: 6852 if not self._curr: 6853 return None 6854 6855 index = self._index 6856 this = [] 6857 while True: 6858 # The current token might be multiple words 6859 curr = self._curr.text.upper() 6860 key = curr.split(" ") 6861 this.append(curr) 6862 6863 self._advance() 6864 result, trie = in_trie(trie, key) 6865 if result == TrieResult.FAILED: 6866 break 6867 6868 if result == TrieResult.EXISTS: 6869 subparser = parsers[" ".join(this)] 6870 return subparser 6871 6872 self._retreat(index) 6873 return None 6874 6875 def _match(self, token_type, advance=True, expression=None): 6876 if not self._curr: 6877 return None 6878 6879 if self._curr.token_type == token_type: 6880 if advance: 6881 self._advance() 6882 self._add_comments(expression) 6883 return True 6884 6885 return None 6886 6887 def _match_set(self, types, advance=True): 6888 if not self._curr: 6889 return None 6890 6891 if self._curr.token_type in types: 6892 if advance: 6893 self._advance() 6894 return True 6895 6896 return None 6897 6898 def _match_pair(self, token_type_a, token_type_b, advance=True): 6899 if not self._curr or not self._next: 6900 return None 6901 6902 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6903 if advance: 6904 self._advance(2) 6905 return True 6906 6907 return None 6908 6909 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6910 if not self._match(TokenType.L_PAREN, expression=expression): 6911 self.raise_error("Expecting (") 6912 6913 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6914 if not self._match(TokenType.R_PAREN, expression=expression): 6915 self.raise_error("Expecting )") 6916 6917 def _match_texts(self, texts, advance=True): 6918 if self._curr and self._curr.text.upper() in texts: 6919 if advance: 6920 self._advance() 6921 return True 6922 return None 6923 6924 def _match_text_seq(self, *texts, advance=True): 6925 index = self._index 6926 for text in texts: 6927 if self._curr and self._curr.text.upper() == text: 6928 self._advance() 6929 else: 6930 self._retreat(index) 6931 return None 6932 6933 if not advance: 6934 self._retreat(index) 6935 6936 return True 6937 6938 def _replace_lambda( 6939 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6940 ) -> t.Optional[exp.Expression]: 6941 if not node: 6942 return node 6943 6944 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6945 6946 for column in node.find_all(exp.Column): 6947 typ = lambda_types.get(column.parts[0].name) 6948 if typ is not None: 6949 dot_or_id = column.to_dot() if column.table else column.this 6950 6951 if typ: 6952 dot_or_id = self.expression( 6953 exp.Cast, 6954 this=dot_or_id, 6955 to=typ, 6956 ) 6957 6958 parent = column.parent 6959 6960 while isinstance(parent, exp.Dot): 6961 if not isinstance(parent.parent, exp.Dot): 6962 parent.replace(dot_or_id) 6963 break 6964 parent = parent.parent 6965 else: 6966 if column is node: 6967 node = dot_or_id 6968 else: 6969 column.replace(dot_or_id) 6970 return node 6971 6972 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6973 start = self._prev 6974 6975 # Not to be confused with TRUNCATE(number, decimals) function call 6976 if self._match(TokenType.L_PAREN): 6977 self._retreat(self._index - 2) 6978 return self._parse_function() 6979 6980 # Clickhouse supports TRUNCATE DATABASE as well 6981 is_database = self._match(TokenType.DATABASE) 6982 6983 self._match(TokenType.TABLE) 6984 6985 exists = self._parse_exists(not_=False) 6986 6987 expressions = self._parse_csv( 6988 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6989 ) 6990 6991 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6992 6993 if self._match_text_seq("RESTART", "IDENTITY"): 6994 identity = "RESTART" 6995 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6996 identity = "CONTINUE" 6997 else: 6998 identity = None 6999 7000 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7001 option = self._prev.text 7002 else: 7003 option = None 7004 7005 partition = self._parse_partition() 7006 7007 # Fallback case 7008 if self._curr: 7009 return self._parse_as_command(start) 7010 7011 return self.expression( 7012 exp.TruncateTable, 7013 expressions=expressions, 7014 is_database=is_database, 7015 exists=exists, 7016 cluster=cluster, 7017 identity=identity, 7018 option=option, 7019 partition=partition, 7020 ) 7021 7022 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7023 this = self._parse_ordered(self._parse_opclass) 7024 7025 if not self._match(TokenType.WITH): 7026 return this 7027 7028 op = self._parse_var(any_token=True) 7029 7030 return self.expression(exp.WithOperator, this=this, op=op) 7031 7032 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7033 self._match(TokenType.EQ) 7034 self._match(TokenType.L_PAREN) 7035 7036 opts: t.List[t.Optional[exp.Expression]] = [] 7037 while self._curr and not self._match(TokenType.R_PAREN): 7038 if self._match_text_seq("FORMAT_NAME", "="): 7039 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7040 # so we parse it separately to use _parse_field() 7041 prop = self.expression( 7042 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7043 ) 7044 opts.append(prop) 7045 else: 7046 opts.append(self._parse_property()) 7047 7048 self._match(TokenType.COMMA) 7049 7050 return opts 7051 7052 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7053 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7054 7055 options = [] 7056 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7057 option = self._parse_var(any_token=True) 7058 prev = self._prev.text.upper() 7059 7060 # Different dialects might separate options and values by white space, "=" and "AS" 7061 self._match(TokenType.EQ) 7062 self._match(TokenType.ALIAS) 7063 7064 param = self.expression(exp.CopyParameter, this=option) 7065 7066 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7067 TokenType.L_PAREN, advance=False 7068 ): 7069 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7070 param.set("expressions", self._parse_wrapped_options()) 7071 elif prev == "FILE_FORMAT": 7072 # T-SQL's external file format case 7073 param.set("expression", self._parse_field()) 7074 else: 7075 param.set("expression", self._parse_unquoted_field()) 7076 7077 options.append(param) 7078 self._match(sep) 7079 7080 return options 7081 7082 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7083 expr = self.expression(exp.Credentials) 7084 7085 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7086 expr.set("storage", self._parse_field()) 7087 if self._match_text_seq("CREDENTIALS"): 7088 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7089 creds = ( 7090 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7091 ) 7092 expr.set("credentials", creds) 7093 if self._match_text_seq("ENCRYPTION"): 7094 expr.set("encryption", self._parse_wrapped_options()) 7095 if self._match_text_seq("IAM_ROLE"): 7096 expr.set("iam_role", self._parse_field()) 7097 if self._match_text_seq("REGION"): 7098 expr.set("region", self._parse_field()) 7099 7100 return expr 7101 7102 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7103 return self._parse_field() 7104 7105 def _parse_copy(self) -> exp.Copy | exp.Command: 7106 start = self._prev 7107 7108 self._match(TokenType.INTO) 7109 7110 this = ( 7111 self._parse_select(nested=True, parse_subquery_alias=False) 7112 if self._match(TokenType.L_PAREN, advance=False) 7113 else self._parse_table(schema=True) 7114 ) 7115 7116 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7117 7118 files = self._parse_csv(self._parse_file_location) 7119 credentials = self._parse_credentials() 7120 7121 self._match_text_seq("WITH") 7122 7123 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7124 7125 # Fallback case 7126 if self._curr: 7127 return self._parse_as_command(start) 7128 7129 return self.expression( 7130 exp.Copy, 7131 this=this, 7132 kind=kind, 7133 credentials=credentials, 7134 files=files, 7135 params=params, 7136 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1315 def __init__( 1316 self, 1317 error_level: t.Optional[ErrorLevel] = None, 1318 error_message_context: int = 100, 1319 max_errors: int = 3, 1320 dialect: DialectType = None, 1321 ): 1322 from sqlglot.dialects import Dialect 1323 1324 self.error_level = error_level or ErrorLevel.IMMEDIATE 1325 self.error_message_context = error_message_context 1326 self.max_errors = max_errors 1327 self.dialect = Dialect.get_or_raise(dialect) 1328 self.reset()
1340 def parse( 1341 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1342 ) -> t.List[t.Optional[exp.Expression]]: 1343 """ 1344 Parses a list of tokens and returns a list of syntax trees, one tree 1345 per parsed SQL statement. 1346 1347 Args: 1348 raw_tokens: The list of tokens. 1349 sql: The original SQL string, used to produce helpful debug messages. 1350 1351 Returns: 1352 The list of the produced syntax trees. 1353 """ 1354 return self._parse( 1355 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1356 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1358 def parse_into( 1359 self, 1360 expression_types: exp.IntoType, 1361 raw_tokens: t.List[Token], 1362 sql: t.Optional[str] = None, 1363 ) -> t.List[t.Optional[exp.Expression]]: 1364 """ 1365 Parses a list of tokens into a given Expression type. If a collection of Expression 1366 types is given instead, this method will try to parse the token list into each one 1367 of them, stopping at the first for which the parsing succeeds. 1368 1369 Args: 1370 expression_types: The expression type(s) to try and parse the token list into. 1371 raw_tokens: The list of tokens. 1372 sql: The original SQL string, used to produce helpful debug messages. 1373 1374 Returns: 1375 The target Expression. 1376 """ 1377 errors = [] 1378 for expression_type in ensure_list(expression_types): 1379 parser = self.EXPRESSION_PARSERS.get(expression_type) 1380 if not parser: 1381 raise TypeError(f"No parser registered for {expression_type}") 1382 1383 try: 1384 return self._parse(parser, raw_tokens, sql) 1385 except ParseError as e: 1386 e.errors[0]["into_expression"] = expression_type 1387 errors.append(e) 1388 1389 raise ParseError( 1390 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1391 errors=merge_errors(errors), 1392 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1432 def check_errors(self) -> None: 1433 """Logs or raises any found errors, depending on the chosen error level setting.""" 1434 if self.error_level == ErrorLevel.WARN: 1435 for error in self.errors: 1436 logger.error(str(error)) 1437 elif self.error_level == ErrorLevel.RAISE and self.errors: 1438 raise ParseError( 1439 concat_messages(self.errors, self.max_errors), 1440 errors=merge_errors(self.errors), 1441 )
Logs or raises any found errors, depending on the chosen error level setting.
1443 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1444 """ 1445 Appends an error in the list of recorded errors or raises it, depending on the chosen 1446 error level setting. 1447 """ 1448 token = token or self._curr or self._prev or Token.string("") 1449 start = token.start 1450 end = token.end + 1 1451 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1452 highlight = self.sql[start:end] 1453 end_context = self.sql[end : end + self.error_message_context] 1454 1455 error = ParseError.new( 1456 f"{message}. Line {token.line}, Col: {token.col}.\n" 1457 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1458 description=message, 1459 line=token.line, 1460 col=token.col, 1461 start_context=start_context, 1462 highlight=highlight, 1463 end_context=end_context, 1464 ) 1465 1466 if self.error_level == ErrorLevel.IMMEDIATE: 1467 raise error 1468 1469 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1471 def expression( 1472 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1473 ) -> E: 1474 """ 1475 Creates a new, validated Expression. 1476 1477 Args: 1478 exp_class: The expression class to instantiate. 1479 comments: An optional list of comments to attach to the expression. 1480 kwargs: The arguments to set for the expression along with their respective values. 1481 1482 Returns: 1483 The target expression. 1484 """ 1485 instance = exp_class(**kwargs) 1486 instance.add_comments(comments) if comments else self._add_comments(instance) 1487 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1494 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1495 """ 1496 Validates an Expression, making sure that all its mandatory arguments are set. 1497 1498 Args: 1499 expression: The expression to validate. 1500 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1501 1502 Returns: 1503 The validated expression. 1504 """ 1505 if self.error_level != ErrorLevel.IGNORE: 1506 for error_message in expression.error_messages(args): 1507 self.raise_error(error_message) 1508 1509 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.