sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "CHAR": lambda args: exp.Chr(expressions=args), 191 "CHR": lambda args: exp.Chr(expressions=args), 192 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 193 "CONCAT": lambda args, dialect: exp.Concat( 194 expressions=args, 195 safe=not dialect.STRICT_STRING_CONCAT, 196 coalesce=dialect.CONCAT_COALESCE, 197 ), 198 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 199 expressions=args, 200 safe=not dialect.STRICT_STRING_CONCAT, 201 coalesce=dialect.CONCAT_COALESCE, 202 ), 203 "CONVERT_TIMEZONE": build_convert_timezone, 204 "DATE_TO_DATE_STR": lambda args: exp.Cast( 205 this=seq_get(args, 0), 206 to=exp.DataType(this=exp.DataType.Type.TEXT), 207 ), 208 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 209 start=seq_get(args, 0), 210 end=seq_get(args, 1), 211 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 212 ), 213 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 214 "HEX": build_hex, 215 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 216 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 217 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 218 "LIKE": build_like, 219 "LOG": build_logarithm, 220 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 221 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 222 "LOWER": build_lower, 223 "LPAD": lambda args: build_pad(args), 224 "LEFTPAD": lambda args: build_pad(args), 225 "LTRIM": lambda args: build_trim(args), 226 "MOD": build_mod, 227 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 228 "RPAD": lambda args: build_pad(args, is_left=False), 229 "RTRIM": lambda args: build_trim(args, is_left=False), 230 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 231 if len(args) != 2 232 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 233 "TIME_TO_TIME_STR": lambda args: exp.Cast( 234 this=seq_get(args, 0), 235 to=exp.DataType(this=exp.DataType.Type.TEXT), 236 ), 237 "TO_HEX": build_hex, 238 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 239 this=exp.Cast( 240 this=seq_get(args, 0), 241 to=exp.DataType(this=exp.DataType.Type.TEXT), 242 ), 243 start=exp.Literal.number(1), 244 length=exp.Literal.number(10), 245 ), 246 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 247 "UPPER": build_upper, 248 "VAR_MAP": build_var_map, 249 } 250 251 NO_PAREN_FUNCTIONS = { 252 TokenType.CURRENT_DATE: exp.CurrentDate, 253 TokenType.CURRENT_DATETIME: exp.CurrentDate, 254 TokenType.CURRENT_TIME: exp.CurrentTime, 255 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 256 TokenType.CURRENT_USER: exp.CurrentUser, 257 } 258 259 STRUCT_TYPE_TOKENS = { 260 TokenType.NESTED, 261 TokenType.OBJECT, 262 TokenType.STRUCT, 263 } 264 265 NESTED_TYPE_TOKENS = { 266 TokenType.ARRAY, 267 TokenType.LIST, 268 TokenType.LOWCARDINALITY, 269 TokenType.MAP, 270 TokenType.NULLABLE, 271 *STRUCT_TYPE_TOKENS, 272 } 273 274 ENUM_TYPE_TOKENS = { 275 TokenType.ENUM, 276 TokenType.ENUM8, 277 TokenType.ENUM16, 278 } 279 280 AGGREGATE_TYPE_TOKENS = { 281 TokenType.AGGREGATEFUNCTION, 282 TokenType.SIMPLEAGGREGATEFUNCTION, 283 } 284 285 TYPE_TOKENS = { 286 TokenType.BIT, 287 TokenType.BOOLEAN, 288 TokenType.TINYINT, 289 TokenType.UTINYINT, 290 TokenType.SMALLINT, 291 TokenType.USMALLINT, 292 TokenType.INT, 293 TokenType.UINT, 294 TokenType.BIGINT, 295 TokenType.UBIGINT, 296 TokenType.INT128, 297 TokenType.UINT128, 298 TokenType.INT256, 299 TokenType.UINT256, 300 TokenType.MEDIUMINT, 301 TokenType.UMEDIUMINT, 302 TokenType.FIXEDSTRING, 303 TokenType.FLOAT, 304 TokenType.DOUBLE, 305 TokenType.CHAR, 306 TokenType.NCHAR, 307 TokenType.VARCHAR, 308 TokenType.NVARCHAR, 309 TokenType.BPCHAR, 310 TokenType.TEXT, 311 TokenType.MEDIUMTEXT, 312 TokenType.LONGTEXT, 313 TokenType.MEDIUMBLOB, 314 TokenType.LONGBLOB, 315 TokenType.BINARY, 316 TokenType.VARBINARY, 317 TokenType.JSON, 318 TokenType.JSONB, 319 TokenType.INTERVAL, 320 TokenType.TINYBLOB, 321 TokenType.TINYTEXT, 322 TokenType.TIME, 323 TokenType.TIMETZ, 324 TokenType.TIMESTAMP, 325 TokenType.TIMESTAMP_S, 326 TokenType.TIMESTAMP_MS, 327 TokenType.TIMESTAMP_NS, 328 TokenType.TIMESTAMPTZ, 329 TokenType.TIMESTAMPLTZ, 330 TokenType.TIMESTAMPNTZ, 331 TokenType.DATETIME, 332 TokenType.DATETIME64, 333 TokenType.DATE, 334 TokenType.DATE32, 335 TokenType.INT4RANGE, 336 TokenType.INT4MULTIRANGE, 337 TokenType.INT8RANGE, 338 TokenType.INT8MULTIRANGE, 339 TokenType.NUMRANGE, 340 TokenType.NUMMULTIRANGE, 341 TokenType.TSRANGE, 342 TokenType.TSMULTIRANGE, 343 TokenType.TSTZRANGE, 344 TokenType.TSTZMULTIRANGE, 345 TokenType.DATERANGE, 346 TokenType.DATEMULTIRANGE, 347 TokenType.DECIMAL, 348 TokenType.DECIMAL32, 349 TokenType.DECIMAL64, 350 TokenType.DECIMAL128, 351 TokenType.UDECIMAL, 352 TokenType.BIGDECIMAL, 353 TokenType.UUID, 354 TokenType.GEOGRAPHY, 355 TokenType.GEOMETRY, 356 TokenType.HLLSKETCH, 357 TokenType.HSTORE, 358 TokenType.PSEUDO_TYPE, 359 TokenType.SUPER, 360 TokenType.SERIAL, 361 TokenType.SMALLSERIAL, 362 TokenType.BIGSERIAL, 363 TokenType.XML, 364 TokenType.YEAR, 365 TokenType.UNIQUEIDENTIFIER, 366 TokenType.USERDEFINED, 367 TokenType.MONEY, 368 TokenType.SMALLMONEY, 369 TokenType.ROWVERSION, 370 TokenType.IMAGE, 371 TokenType.VARIANT, 372 TokenType.VECTOR, 373 TokenType.OBJECT, 374 TokenType.OBJECT_IDENTIFIER, 375 TokenType.INET, 376 TokenType.IPADDRESS, 377 TokenType.IPPREFIX, 378 TokenType.IPV4, 379 TokenType.IPV6, 380 TokenType.UNKNOWN, 381 TokenType.NULL, 382 TokenType.NAME, 383 TokenType.TDIGEST, 384 *ENUM_TYPE_TOKENS, 385 *NESTED_TYPE_TOKENS, 386 *AGGREGATE_TYPE_TOKENS, 387 } 388 389 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 390 TokenType.BIGINT: TokenType.UBIGINT, 391 TokenType.INT: TokenType.UINT, 392 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 393 TokenType.SMALLINT: TokenType.USMALLINT, 394 TokenType.TINYINT: TokenType.UTINYINT, 395 TokenType.DECIMAL: TokenType.UDECIMAL, 396 } 397 398 SUBQUERY_PREDICATES = { 399 TokenType.ANY: exp.Any, 400 TokenType.ALL: exp.All, 401 TokenType.EXISTS: exp.Exists, 402 TokenType.SOME: exp.Any, 403 } 404 405 RESERVED_TOKENS = { 406 *Tokenizer.SINGLE_TOKENS.values(), 407 TokenType.SELECT, 408 } - {TokenType.IDENTIFIER} 409 410 DB_CREATABLES = { 411 TokenType.DATABASE, 412 TokenType.DICTIONARY, 413 TokenType.MODEL, 414 TokenType.SCHEMA, 415 TokenType.SEQUENCE, 416 TokenType.STORAGE_INTEGRATION, 417 TokenType.TABLE, 418 TokenType.TAG, 419 TokenType.VIEW, 420 TokenType.WAREHOUSE, 421 TokenType.STREAMLIT, 422 } 423 424 CREATABLES = { 425 TokenType.COLUMN, 426 TokenType.CONSTRAINT, 427 TokenType.FOREIGN_KEY, 428 TokenType.FUNCTION, 429 TokenType.INDEX, 430 TokenType.PROCEDURE, 431 *DB_CREATABLES, 432 } 433 434 ALTERABLES = { 435 TokenType.INDEX, 436 TokenType.TABLE, 437 TokenType.VIEW, 438 } 439 440 # Tokens that can represent identifiers 441 ID_VAR_TOKENS = { 442 TokenType.ALL, 443 TokenType.VAR, 444 TokenType.ANTI, 445 TokenType.APPLY, 446 TokenType.ASC, 447 TokenType.ASOF, 448 TokenType.AUTO_INCREMENT, 449 TokenType.BEGIN, 450 TokenType.BPCHAR, 451 TokenType.CACHE, 452 TokenType.CASE, 453 TokenType.COLLATE, 454 TokenType.COMMAND, 455 TokenType.COMMENT, 456 TokenType.COMMIT, 457 TokenType.CONSTRAINT, 458 TokenType.COPY, 459 TokenType.CUBE, 460 TokenType.DEFAULT, 461 TokenType.DELETE, 462 TokenType.DESC, 463 TokenType.DESCRIBE, 464 TokenType.DICTIONARY, 465 TokenType.DIV, 466 TokenType.END, 467 TokenType.EXECUTE, 468 TokenType.ESCAPE, 469 TokenType.FALSE, 470 TokenType.FIRST, 471 TokenType.FILTER, 472 TokenType.FINAL, 473 TokenType.FORMAT, 474 TokenType.FULL, 475 TokenType.IDENTIFIER, 476 TokenType.IS, 477 TokenType.ISNULL, 478 TokenType.INTERVAL, 479 TokenType.KEEP, 480 TokenType.KILL, 481 TokenType.LEFT, 482 TokenType.LOAD, 483 TokenType.MERGE, 484 TokenType.NATURAL, 485 TokenType.NEXT, 486 TokenType.OFFSET, 487 TokenType.OPERATOR, 488 TokenType.ORDINALITY, 489 TokenType.OVERLAPS, 490 TokenType.OVERWRITE, 491 TokenType.PARTITION, 492 TokenType.PERCENT, 493 TokenType.PIVOT, 494 TokenType.PRAGMA, 495 TokenType.RANGE, 496 TokenType.RECURSIVE, 497 TokenType.REFERENCES, 498 TokenType.REFRESH, 499 TokenType.RENAME, 500 TokenType.REPLACE, 501 TokenType.RIGHT, 502 TokenType.ROLLUP, 503 TokenType.ROW, 504 TokenType.ROWS, 505 TokenType.SEMI, 506 TokenType.SET, 507 TokenType.SETTINGS, 508 TokenType.SHOW, 509 TokenType.TEMPORARY, 510 TokenType.TOP, 511 TokenType.TRUE, 512 TokenType.TRUNCATE, 513 TokenType.UNIQUE, 514 TokenType.UNNEST, 515 TokenType.UNPIVOT, 516 TokenType.UPDATE, 517 TokenType.USE, 518 TokenType.VOLATILE, 519 TokenType.WINDOW, 520 *CREATABLES, 521 *SUBQUERY_PREDICATES, 522 *TYPE_TOKENS, 523 *NO_PAREN_FUNCTIONS, 524 } 525 526 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 527 528 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 529 TokenType.ANTI, 530 TokenType.APPLY, 531 TokenType.ASOF, 532 TokenType.FULL, 533 TokenType.LEFT, 534 TokenType.LOCK, 535 TokenType.NATURAL, 536 TokenType.OFFSET, 537 TokenType.RIGHT, 538 TokenType.SEMI, 539 TokenType.WINDOW, 540 } 541 542 ALIAS_TOKENS = ID_VAR_TOKENS 543 544 ARRAY_CONSTRUCTORS = { 545 "ARRAY": exp.Array, 546 "LIST": exp.List, 547 } 548 549 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 550 551 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 552 553 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 554 555 FUNC_TOKENS = { 556 TokenType.COLLATE, 557 TokenType.COMMAND, 558 TokenType.CURRENT_DATE, 559 TokenType.CURRENT_DATETIME, 560 TokenType.CURRENT_TIMESTAMP, 561 TokenType.CURRENT_TIME, 562 TokenType.CURRENT_USER, 563 TokenType.FILTER, 564 TokenType.FIRST, 565 TokenType.FORMAT, 566 TokenType.GLOB, 567 TokenType.IDENTIFIER, 568 TokenType.INDEX, 569 TokenType.ISNULL, 570 TokenType.ILIKE, 571 TokenType.INSERT, 572 TokenType.LIKE, 573 TokenType.MERGE, 574 TokenType.OFFSET, 575 TokenType.PRIMARY_KEY, 576 TokenType.RANGE, 577 TokenType.REPLACE, 578 TokenType.RLIKE, 579 TokenType.ROW, 580 TokenType.UNNEST, 581 TokenType.VAR, 582 TokenType.LEFT, 583 TokenType.RIGHT, 584 TokenType.SEQUENCE, 585 TokenType.DATE, 586 TokenType.DATETIME, 587 TokenType.TABLE, 588 TokenType.TIMESTAMP, 589 TokenType.TIMESTAMPTZ, 590 TokenType.TRUNCATE, 591 TokenType.WINDOW, 592 TokenType.XOR, 593 *TYPE_TOKENS, 594 *SUBQUERY_PREDICATES, 595 } 596 597 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 598 TokenType.AND: exp.And, 599 } 600 601 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 602 TokenType.COLON_EQ: exp.PropertyEQ, 603 } 604 605 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 606 TokenType.OR: exp.Or, 607 } 608 609 EQUALITY = { 610 TokenType.EQ: exp.EQ, 611 TokenType.NEQ: exp.NEQ, 612 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 613 } 614 615 COMPARISON = { 616 TokenType.GT: exp.GT, 617 TokenType.GTE: exp.GTE, 618 TokenType.LT: exp.LT, 619 TokenType.LTE: exp.LTE, 620 } 621 622 BITWISE = { 623 TokenType.AMP: exp.BitwiseAnd, 624 TokenType.CARET: exp.BitwiseXor, 625 TokenType.PIPE: exp.BitwiseOr, 626 } 627 628 TERM = { 629 TokenType.DASH: exp.Sub, 630 TokenType.PLUS: exp.Add, 631 TokenType.MOD: exp.Mod, 632 TokenType.COLLATE: exp.Collate, 633 } 634 635 FACTOR = { 636 TokenType.DIV: exp.IntDiv, 637 TokenType.LR_ARROW: exp.Distance, 638 TokenType.SLASH: exp.Div, 639 TokenType.STAR: exp.Mul, 640 } 641 642 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 643 644 TIMES = { 645 TokenType.TIME, 646 TokenType.TIMETZ, 647 } 648 649 TIMESTAMPS = { 650 TokenType.TIMESTAMP, 651 TokenType.TIMESTAMPTZ, 652 TokenType.TIMESTAMPLTZ, 653 *TIMES, 654 } 655 656 SET_OPERATIONS = { 657 TokenType.UNION, 658 TokenType.INTERSECT, 659 TokenType.EXCEPT, 660 } 661 662 JOIN_METHODS = { 663 TokenType.ASOF, 664 TokenType.NATURAL, 665 TokenType.POSITIONAL, 666 } 667 668 JOIN_SIDES = { 669 TokenType.LEFT, 670 TokenType.RIGHT, 671 TokenType.FULL, 672 } 673 674 JOIN_KINDS = { 675 TokenType.ANTI, 676 TokenType.CROSS, 677 TokenType.INNER, 678 TokenType.OUTER, 679 TokenType.SEMI, 680 TokenType.STRAIGHT_JOIN, 681 } 682 683 JOIN_HINTS: t.Set[str] = set() 684 685 LAMBDAS = { 686 TokenType.ARROW: lambda self, expressions: self.expression( 687 exp.Lambda, 688 this=self._replace_lambda( 689 self._parse_assignment(), 690 expressions, 691 ), 692 expressions=expressions, 693 ), 694 TokenType.FARROW: lambda self, expressions: self.expression( 695 exp.Kwarg, 696 this=exp.var(expressions[0].name), 697 expression=self._parse_assignment(), 698 ), 699 } 700 701 COLUMN_OPERATORS = { 702 TokenType.DOT: None, 703 TokenType.DCOLON: lambda self, this, to: self.expression( 704 exp.Cast if self.STRICT_CAST else exp.TryCast, 705 this=this, 706 to=to, 707 ), 708 TokenType.ARROW: lambda self, this, path: self.expression( 709 exp.JSONExtract, 710 this=this, 711 expression=self.dialect.to_json_path(path), 712 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 713 ), 714 TokenType.DARROW: lambda self, this, path: self.expression( 715 exp.JSONExtractScalar, 716 this=this, 717 expression=self.dialect.to_json_path(path), 718 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 719 ), 720 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 721 exp.JSONBExtract, 722 this=this, 723 expression=path, 724 ), 725 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 726 exp.JSONBExtractScalar, 727 this=this, 728 expression=path, 729 ), 730 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 731 exp.JSONBContains, 732 this=this, 733 expression=key, 734 ), 735 } 736 737 EXPRESSION_PARSERS = { 738 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 739 exp.Column: lambda self: self._parse_column(), 740 exp.Condition: lambda self: self._parse_assignment(), 741 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 742 exp.Expression: lambda self: self._parse_expression(), 743 exp.From: lambda self: self._parse_from(joins=True), 744 exp.Group: lambda self: self._parse_group(), 745 exp.Having: lambda self: self._parse_having(), 746 exp.Identifier: lambda self: self._parse_id_var(), 747 exp.Join: lambda self: self._parse_join(), 748 exp.Lambda: lambda self: self._parse_lambda(), 749 exp.Lateral: lambda self: self._parse_lateral(), 750 exp.Limit: lambda self: self._parse_limit(), 751 exp.Offset: lambda self: self._parse_offset(), 752 exp.Order: lambda self: self._parse_order(), 753 exp.Ordered: lambda self: self._parse_ordered(), 754 exp.Properties: lambda self: self._parse_properties(), 755 exp.Qualify: lambda self: self._parse_qualify(), 756 exp.Returning: lambda self: self._parse_returning(), 757 exp.Select: lambda self: self._parse_select(), 758 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 759 exp.Table: lambda self: self._parse_table_parts(), 760 exp.TableAlias: lambda self: self._parse_table_alias(), 761 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 762 exp.Where: lambda self: self._parse_where(), 763 exp.Window: lambda self: self._parse_named_window(), 764 exp.With: lambda self: self._parse_with(), 765 "JOIN_TYPE": lambda self: self._parse_join_parts(), 766 } 767 768 STATEMENT_PARSERS = { 769 TokenType.ALTER: lambda self: self._parse_alter(), 770 TokenType.BEGIN: lambda self: self._parse_transaction(), 771 TokenType.CACHE: lambda self: self._parse_cache(), 772 TokenType.COMMENT: lambda self: self._parse_comment(), 773 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 774 TokenType.COPY: lambda self: self._parse_copy(), 775 TokenType.CREATE: lambda self: self._parse_create(), 776 TokenType.DELETE: lambda self: self._parse_delete(), 777 TokenType.DESC: lambda self: self._parse_describe(), 778 TokenType.DESCRIBE: lambda self: self._parse_describe(), 779 TokenType.DROP: lambda self: self._parse_drop(), 780 TokenType.INSERT: lambda self: self._parse_insert(), 781 TokenType.KILL: lambda self: self._parse_kill(), 782 TokenType.LOAD: lambda self: self._parse_load(), 783 TokenType.MERGE: lambda self: self._parse_merge(), 784 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 785 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 786 TokenType.REFRESH: lambda self: self._parse_refresh(), 787 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 788 TokenType.SET: lambda self: self._parse_set(), 789 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 790 TokenType.UNCACHE: lambda self: self._parse_uncache(), 791 TokenType.UPDATE: lambda self: self._parse_update(), 792 TokenType.USE: lambda self: self.expression( 793 exp.Use, 794 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 795 this=self._parse_table(schema=False), 796 ), 797 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 798 } 799 800 UNARY_PARSERS = { 801 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 802 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 803 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 804 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 805 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 806 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 807 } 808 809 STRING_PARSERS = { 810 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 811 exp.RawString, this=token.text 812 ), 813 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 814 exp.National, this=token.text 815 ), 816 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 817 TokenType.STRING: lambda self, token: self.expression( 818 exp.Literal, this=token.text, is_string=True 819 ), 820 TokenType.UNICODE_STRING: lambda self, token: self.expression( 821 exp.UnicodeString, 822 this=token.text, 823 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 824 ), 825 } 826 827 NUMERIC_PARSERS = { 828 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 829 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 830 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 831 TokenType.NUMBER: lambda self, token: self.expression( 832 exp.Literal, this=token.text, is_string=False 833 ), 834 } 835 836 PRIMARY_PARSERS = { 837 **STRING_PARSERS, 838 **NUMERIC_PARSERS, 839 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 840 TokenType.NULL: lambda self, _: self.expression(exp.Null), 841 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 842 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 843 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 844 TokenType.STAR: lambda self, _: self._parse_star_ops(), 845 } 846 847 PLACEHOLDER_PARSERS = { 848 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 849 TokenType.PARAMETER: lambda self: self._parse_parameter(), 850 TokenType.COLON: lambda self: ( 851 self.expression(exp.Placeholder, this=self._prev.text) 852 if self._match_set(self.ID_VAR_TOKENS) 853 else None 854 ), 855 } 856 857 RANGE_PARSERS = { 858 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 859 TokenType.GLOB: binary_range_parser(exp.Glob), 860 TokenType.ILIKE: binary_range_parser(exp.ILike), 861 TokenType.IN: lambda self, this: self._parse_in(this), 862 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 863 TokenType.IS: lambda self, this: self._parse_is(this), 864 TokenType.LIKE: binary_range_parser(exp.Like), 865 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 866 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 867 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 868 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 869 } 870 871 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 872 "ALLOWED_VALUES": lambda self: self.expression( 873 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 874 ), 875 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 876 "AUTO": lambda self: self._parse_auto_property(), 877 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 878 "BACKUP": lambda self: self.expression( 879 exp.BackupProperty, this=self._parse_var(any_token=True) 880 ), 881 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 882 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 883 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 884 "CHECKSUM": lambda self: self._parse_checksum(), 885 "CLUSTER BY": lambda self: self._parse_cluster(), 886 "CLUSTERED": lambda self: self._parse_clustered_by(), 887 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 888 exp.CollateProperty, **kwargs 889 ), 890 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 891 "CONTAINS": lambda self: self._parse_contains_property(), 892 "COPY": lambda self: self._parse_copy_property(), 893 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 894 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 895 "DEFINER": lambda self: self._parse_definer(), 896 "DETERMINISTIC": lambda self: self.expression( 897 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 898 ), 899 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 900 "DUPLICATE": lambda self: self._parse_duplicate(), 901 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 902 "DISTKEY": lambda self: self._parse_distkey(), 903 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 904 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 905 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 906 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 907 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 908 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 909 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 910 "FREESPACE": lambda self: self._parse_freespace(), 911 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 912 "HEAP": lambda self: self.expression(exp.HeapProperty), 913 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 914 "IMMUTABLE": lambda self: self.expression( 915 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 916 ), 917 "INHERITS": lambda self: self.expression( 918 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 919 ), 920 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 921 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 922 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 923 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 924 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 925 "LIKE": lambda self: self._parse_create_like(), 926 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 927 "LOCK": lambda self: self._parse_locking(), 928 "LOCKING": lambda self: self._parse_locking(), 929 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 930 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 931 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 932 "MODIFIES": lambda self: self._parse_modifies_property(), 933 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 934 "NO": lambda self: self._parse_no_property(), 935 "ON": lambda self: self._parse_on_property(), 936 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 937 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 938 "PARTITION": lambda self: self._parse_partitioned_of(), 939 "PARTITION BY": lambda self: self._parse_partitioned_by(), 940 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 941 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 942 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 943 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 944 "READS": lambda self: self._parse_reads_property(), 945 "REMOTE": lambda self: self._parse_remote_with_connection(), 946 "RETURNS": lambda self: self._parse_returns(), 947 "STRICT": lambda self: self.expression(exp.StrictProperty), 948 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 949 "ROW": lambda self: self._parse_row(), 950 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 951 "SAMPLE": lambda self: self.expression( 952 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 953 ), 954 "SECURE": lambda self: self.expression(exp.SecureProperty), 955 "SECURITY": lambda self: self._parse_security(), 956 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 957 "SETTINGS": lambda self: self._parse_settings_property(), 958 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 959 "SORTKEY": lambda self: self._parse_sortkey(), 960 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 961 "STABLE": lambda self: self.expression( 962 exp.StabilityProperty, this=exp.Literal.string("STABLE") 963 ), 964 "STORED": lambda self: self._parse_stored(), 965 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 966 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 967 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 968 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 969 "TO": lambda self: self._parse_to_table(), 970 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 971 "TRANSFORM": lambda self: self.expression( 972 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 973 ), 974 "TTL": lambda self: self._parse_ttl(), 975 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 976 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 977 "VOLATILE": lambda self: self._parse_volatile_property(), 978 "WITH": lambda self: self._parse_with_property(), 979 } 980 981 CONSTRAINT_PARSERS = { 982 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 983 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 984 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 985 "CHARACTER SET": lambda self: self.expression( 986 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 987 ), 988 "CHECK": lambda self: self.expression( 989 exp.CheckColumnConstraint, 990 this=self._parse_wrapped(self._parse_assignment), 991 enforced=self._match_text_seq("ENFORCED"), 992 ), 993 "COLLATE": lambda self: self.expression( 994 exp.CollateColumnConstraint, 995 this=self._parse_identifier() or self._parse_column(), 996 ), 997 "COMMENT": lambda self: self.expression( 998 exp.CommentColumnConstraint, this=self._parse_string() 999 ), 1000 "COMPRESS": lambda self: self._parse_compress(), 1001 "CLUSTERED": lambda self: self.expression( 1002 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1003 ), 1004 "NONCLUSTERED": lambda self: self.expression( 1005 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1006 ), 1007 "DEFAULT": lambda self: self.expression( 1008 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1009 ), 1010 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1011 "EPHEMERAL": lambda self: self.expression( 1012 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1013 ), 1014 "EXCLUDE": lambda self: self.expression( 1015 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1016 ), 1017 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1018 "FORMAT": lambda self: self.expression( 1019 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1020 ), 1021 "GENERATED": lambda self: self._parse_generated_as_identity(), 1022 "IDENTITY": lambda self: self._parse_auto_increment(), 1023 "INLINE": lambda self: self._parse_inline(), 1024 "LIKE": lambda self: self._parse_create_like(), 1025 "NOT": lambda self: self._parse_not_constraint(), 1026 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1027 "ON": lambda self: ( 1028 self._match(TokenType.UPDATE) 1029 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1030 ) 1031 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1032 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1033 "PERIOD": lambda self: self._parse_period_for_system_time(), 1034 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1035 "REFERENCES": lambda self: self._parse_references(match=False), 1036 "TITLE": lambda self: self.expression( 1037 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1038 ), 1039 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1040 "UNIQUE": lambda self: self._parse_unique(), 1041 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1042 "WITH": lambda self: self.expression( 1043 exp.Properties, expressions=self._parse_wrapped_properties() 1044 ), 1045 } 1046 1047 ALTER_PARSERS = { 1048 "ADD": lambda self: self._parse_alter_table_add(), 1049 "ALTER": lambda self: self._parse_alter_table_alter(), 1050 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1051 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1052 "DROP": lambda self: self._parse_alter_table_drop(), 1053 "RENAME": lambda self: self._parse_alter_table_rename(), 1054 "SET": lambda self: self._parse_alter_table_set(), 1055 "AS": lambda self: self._parse_select(), 1056 } 1057 1058 ALTER_ALTER_PARSERS = { 1059 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1060 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1061 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1062 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1063 } 1064 1065 SCHEMA_UNNAMED_CONSTRAINTS = { 1066 "CHECK", 1067 "EXCLUDE", 1068 "FOREIGN KEY", 1069 "LIKE", 1070 "PERIOD", 1071 "PRIMARY KEY", 1072 "UNIQUE", 1073 } 1074 1075 NO_PAREN_FUNCTION_PARSERS = { 1076 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1077 "CASE": lambda self: self._parse_case(), 1078 "CONNECT_BY_ROOT": lambda self: self.expression( 1079 exp.ConnectByRoot, this=self._parse_column() 1080 ), 1081 "IF": lambda self: self._parse_if(), 1082 "NEXT": lambda self: self._parse_next_value_for(), 1083 } 1084 1085 INVALID_FUNC_NAME_TOKENS = { 1086 TokenType.IDENTIFIER, 1087 TokenType.STRING, 1088 } 1089 1090 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1091 1092 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1093 1094 FUNCTION_PARSERS = { 1095 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1096 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1097 "DECODE": lambda self: self._parse_decode(), 1098 "EXTRACT": lambda self: self._parse_extract(), 1099 "GAP_FILL": lambda self: self._parse_gap_fill(), 1100 "JSON_OBJECT": lambda self: self._parse_json_object(), 1101 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1102 "JSON_TABLE": lambda self: self._parse_json_table(), 1103 "MATCH": lambda self: self._parse_match_against(), 1104 "NORMALIZE": lambda self: self._parse_normalize(), 1105 "OPENJSON": lambda self: self._parse_open_json(), 1106 "POSITION": lambda self: self._parse_position(), 1107 "PREDICT": lambda self: self._parse_predict(), 1108 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1109 "STRING_AGG": lambda self: self._parse_string_agg(), 1110 "SUBSTRING": lambda self: self._parse_substring(), 1111 "TRIM": lambda self: self._parse_trim(), 1112 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1113 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1114 } 1115 1116 QUERY_MODIFIER_PARSERS = { 1117 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1118 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1119 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1120 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1121 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1122 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1123 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1124 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1125 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1126 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1127 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1128 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1129 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1130 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1131 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1132 TokenType.CLUSTER_BY: lambda self: ( 1133 "cluster", 1134 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1135 ), 1136 TokenType.DISTRIBUTE_BY: lambda self: ( 1137 "distribute", 1138 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1139 ), 1140 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1141 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1142 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1143 } 1144 1145 SET_PARSERS = { 1146 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1147 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1148 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1149 "TRANSACTION": lambda self: self._parse_set_transaction(), 1150 } 1151 1152 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1153 1154 TYPE_LITERAL_PARSERS = { 1155 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1156 } 1157 1158 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1159 1160 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1161 1162 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1163 1164 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1165 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1166 "ISOLATION": ( 1167 ("LEVEL", "REPEATABLE", "READ"), 1168 ("LEVEL", "READ", "COMMITTED"), 1169 ("LEVEL", "READ", "UNCOMITTED"), 1170 ("LEVEL", "SERIALIZABLE"), 1171 ), 1172 "READ": ("WRITE", "ONLY"), 1173 } 1174 1175 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1176 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1177 ) 1178 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1179 1180 CREATE_SEQUENCE: OPTIONS_TYPE = { 1181 "SCALE": ("EXTEND", "NOEXTEND"), 1182 "SHARD": ("EXTEND", "NOEXTEND"), 1183 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1184 **dict.fromkeys( 1185 ( 1186 "SESSION", 1187 "GLOBAL", 1188 "KEEP", 1189 "NOKEEP", 1190 "ORDER", 1191 "NOORDER", 1192 "NOCACHE", 1193 "CYCLE", 1194 "NOCYCLE", 1195 "NOMINVALUE", 1196 "NOMAXVALUE", 1197 "NOSCALE", 1198 "NOSHARD", 1199 ), 1200 tuple(), 1201 ), 1202 } 1203 1204 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1205 1206 USABLES: OPTIONS_TYPE = dict.fromkeys( 1207 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1208 ) 1209 1210 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1211 1212 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1213 "TYPE": ("EVOLUTION",), 1214 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1215 } 1216 1217 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1218 "NOT": ("ENFORCED",), 1219 "MATCH": ( 1220 "FULL", 1221 "PARTIAL", 1222 "SIMPLE", 1223 ), 1224 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1225 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1226 } 1227 1228 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1229 1230 CLONE_KEYWORDS = {"CLONE", "COPY"} 1231 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1232 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1233 1234 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1235 1236 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1237 1238 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1239 1240 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1241 1242 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1243 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1244 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1245 1246 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1247 1248 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1249 1250 ADD_CONSTRAINT_TOKENS = { 1251 TokenType.CONSTRAINT, 1252 TokenType.FOREIGN_KEY, 1253 TokenType.INDEX, 1254 TokenType.KEY, 1255 TokenType.PRIMARY_KEY, 1256 TokenType.UNIQUE, 1257 } 1258 1259 DISTINCT_TOKENS = {TokenType.DISTINCT} 1260 1261 NULL_TOKENS = {TokenType.NULL} 1262 1263 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1264 1265 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1266 1267 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1268 1269 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1270 1271 ODBC_DATETIME_LITERALS = { 1272 "d": exp.Date, 1273 "t": exp.Time, 1274 "ts": exp.Timestamp, 1275 } 1276 1277 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1278 1279 STRICT_CAST = True 1280 1281 PREFIXED_PIVOT_COLUMNS = False 1282 IDENTIFY_PIVOT_STRINGS = False 1283 1284 LOG_DEFAULTS_TO_LN = False 1285 1286 # Whether ADD is present for each column added by ALTER TABLE 1287 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1288 1289 # Whether the table sample clause expects CSV syntax 1290 TABLESAMPLE_CSV = False 1291 1292 # The default method used for table sampling 1293 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1294 1295 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1296 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1297 1298 # Whether the TRIM function expects the characters to trim as its first argument 1299 TRIM_PATTERN_FIRST = False 1300 1301 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1302 STRING_ALIASES = False 1303 1304 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1305 MODIFIERS_ATTACHED_TO_SET_OP = True 1306 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1307 1308 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1309 NO_PAREN_IF_COMMANDS = True 1310 1311 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1312 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1313 1314 # Whether the `:` operator is used to extract a value from a VARIANT column 1315 COLON_IS_VARIANT_EXTRACT = False 1316 1317 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1318 # If this is True and '(' is not found, the keyword will be treated as an identifier 1319 VALUES_FOLLOWED_BY_PAREN = True 1320 1321 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1322 SUPPORTS_IMPLICIT_UNNEST = False 1323 1324 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1325 INTERVAL_SPANS = True 1326 1327 # Whether a PARTITION clause can follow a table reference 1328 SUPPORTS_PARTITION_SELECTION = False 1329 1330 __slots__ = ( 1331 "error_level", 1332 "error_message_context", 1333 "max_errors", 1334 "dialect", 1335 "sql", 1336 "errors", 1337 "_tokens", 1338 "_index", 1339 "_curr", 1340 "_next", 1341 "_prev", 1342 "_prev_comments", 1343 ) 1344 1345 # Autofilled 1346 SHOW_TRIE: t.Dict = {} 1347 SET_TRIE: t.Dict = {} 1348 1349 def __init__( 1350 self, 1351 error_level: t.Optional[ErrorLevel] = None, 1352 error_message_context: int = 100, 1353 max_errors: int = 3, 1354 dialect: DialectType = None, 1355 ): 1356 from sqlglot.dialects import Dialect 1357 1358 self.error_level = error_level or ErrorLevel.IMMEDIATE 1359 self.error_message_context = error_message_context 1360 self.max_errors = max_errors 1361 self.dialect = Dialect.get_or_raise(dialect) 1362 self.reset() 1363 1364 def reset(self): 1365 self.sql = "" 1366 self.errors = [] 1367 self._tokens = [] 1368 self._index = 0 1369 self._curr = None 1370 self._next = None 1371 self._prev = None 1372 self._prev_comments = None 1373 1374 def parse( 1375 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1376 ) -> t.List[t.Optional[exp.Expression]]: 1377 """ 1378 Parses a list of tokens and returns a list of syntax trees, one tree 1379 per parsed SQL statement. 1380 1381 Args: 1382 raw_tokens: The list of tokens. 1383 sql: The original SQL string, used to produce helpful debug messages. 1384 1385 Returns: 1386 The list of the produced syntax trees. 1387 """ 1388 return self._parse( 1389 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1390 ) 1391 1392 def parse_into( 1393 self, 1394 expression_types: exp.IntoType, 1395 raw_tokens: t.List[Token], 1396 sql: t.Optional[str] = None, 1397 ) -> t.List[t.Optional[exp.Expression]]: 1398 """ 1399 Parses a list of tokens into a given Expression type. If a collection of Expression 1400 types is given instead, this method will try to parse the token list into each one 1401 of them, stopping at the first for which the parsing succeeds. 1402 1403 Args: 1404 expression_types: The expression type(s) to try and parse the token list into. 1405 raw_tokens: The list of tokens. 1406 sql: The original SQL string, used to produce helpful debug messages. 1407 1408 Returns: 1409 The target Expression. 1410 """ 1411 errors = [] 1412 for expression_type in ensure_list(expression_types): 1413 parser = self.EXPRESSION_PARSERS.get(expression_type) 1414 if not parser: 1415 raise TypeError(f"No parser registered for {expression_type}") 1416 1417 try: 1418 return self._parse(parser, raw_tokens, sql) 1419 except ParseError as e: 1420 e.errors[0]["into_expression"] = expression_type 1421 errors.append(e) 1422 1423 raise ParseError( 1424 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1425 errors=merge_errors(errors), 1426 ) from errors[-1] 1427 1428 def _parse( 1429 self, 1430 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1431 raw_tokens: t.List[Token], 1432 sql: t.Optional[str] = None, 1433 ) -> t.List[t.Optional[exp.Expression]]: 1434 self.reset() 1435 self.sql = sql or "" 1436 1437 total = len(raw_tokens) 1438 chunks: t.List[t.List[Token]] = [[]] 1439 1440 for i, token in enumerate(raw_tokens): 1441 if token.token_type == TokenType.SEMICOLON: 1442 if token.comments: 1443 chunks.append([token]) 1444 1445 if i < total - 1: 1446 chunks.append([]) 1447 else: 1448 chunks[-1].append(token) 1449 1450 expressions = [] 1451 1452 for tokens in chunks: 1453 self._index = -1 1454 self._tokens = tokens 1455 self._advance() 1456 1457 expressions.append(parse_method(self)) 1458 1459 if self._index < len(self._tokens): 1460 self.raise_error("Invalid expression / Unexpected token") 1461 1462 self.check_errors() 1463 1464 return expressions 1465 1466 def check_errors(self) -> None: 1467 """Logs or raises any found errors, depending on the chosen error level setting.""" 1468 if self.error_level == ErrorLevel.WARN: 1469 for error in self.errors: 1470 logger.error(str(error)) 1471 elif self.error_level == ErrorLevel.RAISE and self.errors: 1472 raise ParseError( 1473 concat_messages(self.errors, self.max_errors), 1474 errors=merge_errors(self.errors), 1475 ) 1476 1477 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1478 """ 1479 Appends an error in the list of recorded errors or raises it, depending on the chosen 1480 error level setting. 1481 """ 1482 token = token or self._curr or self._prev or Token.string("") 1483 start = token.start 1484 end = token.end + 1 1485 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1486 highlight = self.sql[start:end] 1487 end_context = self.sql[end : end + self.error_message_context] 1488 1489 error = ParseError.new( 1490 f"{message}. Line {token.line}, Col: {token.col}.\n" 1491 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1492 description=message, 1493 line=token.line, 1494 col=token.col, 1495 start_context=start_context, 1496 highlight=highlight, 1497 end_context=end_context, 1498 ) 1499 1500 if self.error_level == ErrorLevel.IMMEDIATE: 1501 raise error 1502 1503 self.errors.append(error) 1504 1505 def expression( 1506 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1507 ) -> E: 1508 """ 1509 Creates a new, validated Expression. 1510 1511 Args: 1512 exp_class: The expression class to instantiate. 1513 comments: An optional list of comments to attach to the expression. 1514 kwargs: The arguments to set for the expression along with their respective values. 1515 1516 Returns: 1517 The target expression. 1518 """ 1519 instance = exp_class(**kwargs) 1520 instance.add_comments(comments) if comments else self._add_comments(instance) 1521 return self.validate_expression(instance) 1522 1523 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1524 if expression and self._prev_comments: 1525 expression.add_comments(self._prev_comments) 1526 self._prev_comments = None 1527 1528 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1529 """ 1530 Validates an Expression, making sure that all its mandatory arguments are set. 1531 1532 Args: 1533 expression: The expression to validate. 1534 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1535 1536 Returns: 1537 The validated expression. 1538 """ 1539 if self.error_level != ErrorLevel.IGNORE: 1540 for error_message in expression.error_messages(args): 1541 self.raise_error(error_message) 1542 1543 return expression 1544 1545 def _find_sql(self, start: Token, end: Token) -> str: 1546 return self.sql[start.start : end.end + 1] 1547 1548 def _is_connected(self) -> bool: 1549 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1550 1551 def _advance(self, times: int = 1) -> None: 1552 self._index += times 1553 self._curr = seq_get(self._tokens, self._index) 1554 self._next = seq_get(self._tokens, self._index + 1) 1555 1556 if self._index > 0: 1557 self._prev = self._tokens[self._index - 1] 1558 self._prev_comments = self._prev.comments 1559 else: 1560 self._prev = None 1561 self._prev_comments = None 1562 1563 def _retreat(self, index: int) -> None: 1564 if index != self._index: 1565 self._advance(index - self._index) 1566 1567 def _warn_unsupported(self) -> None: 1568 if len(self._tokens) <= 1: 1569 return 1570 1571 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1572 # interested in emitting a warning for the one being currently processed. 1573 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1574 1575 logger.warning( 1576 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1577 ) 1578 1579 def _parse_command(self) -> exp.Command: 1580 self._warn_unsupported() 1581 return self.expression( 1582 exp.Command, 1583 comments=self._prev_comments, 1584 this=self._prev.text.upper(), 1585 expression=self._parse_string(), 1586 ) 1587 1588 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1589 """ 1590 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1591 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1592 solve this by setting & resetting the parser state accordingly 1593 """ 1594 index = self._index 1595 error_level = self.error_level 1596 1597 self.error_level = ErrorLevel.IMMEDIATE 1598 try: 1599 this = parse_method() 1600 except ParseError: 1601 this = None 1602 finally: 1603 if not this or retreat: 1604 self._retreat(index) 1605 self.error_level = error_level 1606 1607 return this 1608 1609 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1610 start = self._prev 1611 exists = self._parse_exists() if allow_exists else None 1612 1613 self._match(TokenType.ON) 1614 1615 materialized = self._match_text_seq("MATERIALIZED") 1616 kind = self._match_set(self.CREATABLES) and self._prev 1617 if not kind: 1618 return self._parse_as_command(start) 1619 1620 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1621 this = self._parse_user_defined_function(kind=kind.token_type) 1622 elif kind.token_type == TokenType.TABLE: 1623 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1624 elif kind.token_type == TokenType.COLUMN: 1625 this = self._parse_column() 1626 else: 1627 this = self._parse_id_var() 1628 1629 self._match(TokenType.IS) 1630 1631 return self.expression( 1632 exp.Comment, 1633 this=this, 1634 kind=kind.text, 1635 expression=self._parse_string(), 1636 exists=exists, 1637 materialized=materialized, 1638 ) 1639 1640 def _parse_to_table( 1641 self, 1642 ) -> exp.ToTableProperty: 1643 table = self._parse_table_parts(schema=True) 1644 return self.expression(exp.ToTableProperty, this=table) 1645 1646 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1647 def _parse_ttl(self) -> exp.Expression: 1648 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1649 this = self._parse_bitwise() 1650 1651 if self._match_text_seq("DELETE"): 1652 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1653 if self._match_text_seq("RECOMPRESS"): 1654 return self.expression( 1655 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1656 ) 1657 if self._match_text_seq("TO", "DISK"): 1658 return self.expression( 1659 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1660 ) 1661 if self._match_text_seq("TO", "VOLUME"): 1662 return self.expression( 1663 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1664 ) 1665 1666 return this 1667 1668 expressions = self._parse_csv(_parse_ttl_action) 1669 where = self._parse_where() 1670 group = self._parse_group() 1671 1672 aggregates = None 1673 if group and self._match(TokenType.SET): 1674 aggregates = self._parse_csv(self._parse_set_item) 1675 1676 return self.expression( 1677 exp.MergeTreeTTL, 1678 expressions=expressions, 1679 where=where, 1680 group=group, 1681 aggregates=aggregates, 1682 ) 1683 1684 def _parse_statement(self) -> t.Optional[exp.Expression]: 1685 if self._curr is None: 1686 return None 1687 1688 if self._match_set(self.STATEMENT_PARSERS): 1689 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1690 1691 if self._match_set(self.dialect.tokenizer.COMMANDS): 1692 return self._parse_command() 1693 1694 expression = self._parse_expression() 1695 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1696 return self._parse_query_modifiers(expression) 1697 1698 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1699 start = self._prev 1700 temporary = self._match(TokenType.TEMPORARY) 1701 materialized = self._match_text_seq("MATERIALIZED") 1702 1703 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1704 if not kind: 1705 return self._parse_as_command(start) 1706 1707 concurrently = self._match_text_seq("CONCURRENTLY") 1708 if_exists = exists or self._parse_exists() 1709 table = self._parse_table_parts( 1710 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1711 ) 1712 1713 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1714 1715 if self._match(TokenType.L_PAREN, advance=False): 1716 expressions = self._parse_wrapped_csv(self._parse_types) 1717 else: 1718 expressions = None 1719 1720 return self.expression( 1721 exp.Drop, 1722 comments=start.comments, 1723 exists=if_exists, 1724 this=table, 1725 expressions=expressions, 1726 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1727 temporary=temporary, 1728 materialized=materialized, 1729 cascade=self._match_text_seq("CASCADE"), 1730 constraints=self._match_text_seq("CONSTRAINTS"), 1731 purge=self._match_text_seq("PURGE"), 1732 cluster=cluster, 1733 concurrently=concurrently, 1734 ) 1735 1736 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1737 return ( 1738 self._match_text_seq("IF") 1739 and (not not_ or self._match(TokenType.NOT)) 1740 and self._match(TokenType.EXISTS) 1741 ) 1742 1743 def _parse_create(self) -> exp.Create | exp.Command: 1744 # Note: this can't be None because we've matched a statement parser 1745 start = self._prev 1746 comments = self._prev_comments 1747 1748 replace = ( 1749 start.token_type == TokenType.REPLACE 1750 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1751 or self._match_pair(TokenType.OR, TokenType.ALTER) 1752 ) 1753 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1754 1755 unique = self._match(TokenType.UNIQUE) 1756 1757 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1758 clustered = True 1759 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1760 "COLUMNSTORE" 1761 ): 1762 clustered = False 1763 else: 1764 clustered = None 1765 1766 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1767 self._advance() 1768 1769 properties = None 1770 create_token = self._match_set(self.CREATABLES) and self._prev 1771 1772 if not create_token: 1773 # exp.Properties.Location.POST_CREATE 1774 properties = self._parse_properties() 1775 create_token = self._match_set(self.CREATABLES) and self._prev 1776 1777 if not properties or not create_token: 1778 return self._parse_as_command(start) 1779 1780 concurrently = self._match_text_seq("CONCURRENTLY") 1781 exists = self._parse_exists(not_=True) 1782 this = None 1783 expression: t.Optional[exp.Expression] = None 1784 indexes = None 1785 no_schema_binding = None 1786 begin = None 1787 end = None 1788 clone = None 1789 1790 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1791 nonlocal properties 1792 if properties and temp_props: 1793 properties.expressions.extend(temp_props.expressions) 1794 elif temp_props: 1795 properties = temp_props 1796 1797 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1798 this = self._parse_user_defined_function(kind=create_token.token_type) 1799 1800 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1801 extend_props(self._parse_properties()) 1802 1803 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1804 extend_props(self._parse_properties()) 1805 1806 if not expression: 1807 if self._match(TokenType.COMMAND): 1808 expression = self._parse_as_command(self._prev) 1809 else: 1810 begin = self._match(TokenType.BEGIN) 1811 return_ = self._match_text_seq("RETURN") 1812 1813 if self._match(TokenType.STRING, advance=False): 1814 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1815 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1816 expression = self._parse_string() 1817 extend_props(self._parse_properties()) 1818 else: 1819 expression = self._parse_statement() 1820 1821 end = self._match_text_seq("END") 1822 1823 if return_: 1824 expression = self.expression(exp.Return, this=expression) 1825 elif create_token.token_type == TokenType.INDEX: 1826 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1827 if not self._match(TokenType.ON): 1828 index = self._parse_id_var() 1829 anonymous = False 1830 else: 1831 index = None 1832 anonymous = True 1833 1834 this = self._parse_index(index=index, anonymous=anonymous) 1835 elif create_token.token_type in self.DB_CREATABLES: 1836 table_parts = self._parse_table_parts( 1837 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1838 ) 1839 1840 # exp.Properties.Location.POST_NAME 1841 self._match(TokenType.COMMA) 1842 extend_props(self._parse_properties(before=True)) 1843 1844 this = self._parse_schema(this=table_parts) 1845 1846 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1847 extend_props(self._parse_properties()) 1848 1849 self._match(TokenType.ALIAS) 1850 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1851 # exp.Properties.Location.POST_ALIAS 1852 extend_props(self._parse_properties()) 1853 1854 if create_token.token_type == TokenType.SEQUENCE: 1855 expression = self._parse_types() 1856 extend_props(self._parse_properties()) 1857 else: 1858 expression = self._parse_ddl_select() 1859 1860 if create_token.token_type == TokenType.TABLE: 1861 # exp.Properties.Location.POST_EXPRESSION 1862 extend_props(self._parse_properties()) 1863 1864 indexes = [] 1865 while True: 1866 index = self._parse_index() 1867 1868 # exp.Properties.Location.POST_INDEX 1869 extend_props(self._parse_properties()) 1870 if not index: 1871 break 1872 else: 1873 self._match(TokenType.COMMA) 1874 indexes.append(index) 1875 elif create_token.token_type == TokenType.VIEW: 1876 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1877 no_schema_binding = True 1878 1879 shallow = self._match_text_seq("SHALLOW") 1880 1881 if self._match_texts(self.CLONE_KEYWORDS): 1882 copy = self._prev.text.lower() == "copy" 1883 clone = self.expression( 1884 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1885 ) 1886 1887 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1888 return self._parse_as_command(start) 1889 1890 create_kind_text = create_token.text.upper() 1891 return self.expression( 1892 exp.Create, 1893 comments=comments, 1894 this=this, 1895 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1896 replace=replace, 1897 refresh=refresh, 1898 unique=unique, 1899 expression=expression, 1900 exists=exists, 1901 properties=properties, 1902 indexes=indexes, 1903 no_schema_binding=no_schema_binding, 1904 begin=begin, 1905 end=end, 1906 clone=clone, 1907 concurrently=concurrently, 1908 clustered=clustered, 1909 ) 1910 1911 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1912 seq = exp.SequenceProperties() 1913 1914 options = [] 1915 index = self._index 1916 1917 while self._curr: 1918 self._match(TokenType.COMMA) 1919 if self._match_text_seq("INCREMENT"): 1920 self._match_text_seq("BY") 1921 self._match_text_seq("=") 1922 seq.set("increment", self._parse_term()) 1923 elif self._match_text_seq("MINVALUE"): 1924 seq.set("minvalue", self._parse_term()) 1925 elif self._match_text_seq("MAXVALUE"): 1926 seq.set("maxvalue", self._parse_term()) 1927 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1928 self._match_text_seq("=") 1929 seq.set("start", self._parse_term()) 1930 elif self._match_text_seq("CACHE"): 1931 # T-SQL allows empty CACHE which is initialized dynamically 1932 seq.set("cache", self._parse_number() or True) 1933 elif self._match_text_seq("OWNED", "BY"): 1934 # "OWNED BY NONE" is the default 1935 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1936 else: 1937 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1938 if opt: 1939 options.append(opt) 1940 else: 1941 break 1942 1943 seq.set("options", options if options else None) 1944 return None if self._index == index else seq 1945 1946 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1947 # only used for teradata currently 1948 self._match(TokenType.COMMA) 1949 1950 kwargs = { 1951 "no": self._match_text_seq("NO"), 1952 "dual": self._match_text_seq("DUAL"), 1953 "before": self._match_text_seq("BEFORE"), 1954 "default": self._match_text_seq("DEFAULT"), 1955 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1956 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1957 "after": self._match_text_seq("AFTER"), 1958 "minimum": self._match_texts(("MIN", "MINIMUM")), 1959 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1960 } 1961 1962 if self._match_texts(self.PROPERTY_PARSERS): 1963 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1964 try: 1965 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1966 except TypeError: 1967 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1968 1969 return None 1970 1971 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1972 return self._parse_wrapped_csv(self._parse_property) 1973 1974 def _parse_property(self) -> t.Optional[exp.Expression]: 1975 if self._match_texts(self.PROPERTY_PARSERS): 1976 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1977 1978 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1979 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1980 1981 if self._match_text_seq("COMPOUND", "SORTKEY"): 1982 return self._parse_sortkey(compound=True) 1983 1984 if self._match_text_seq("SQL", "SECURITY"): 1985 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1986 1987 index = self._index 1988 key = self._parse_column() 1989 1990 if not self._match(TokenType.EQ): 1991 self._retreat(index) 1992 return self._parse_sequence_properties() 1993 1994 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1995 if isinstance(key, exp.Column): 1996 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1997 1998 value = self._parse_bitwise() or self._parse_var(any_token=True) 1999 2000 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2001 if isinstance(value, exp.Column): 2002 value = exp.var(value.name) 2003 2004 return self.expression(exp.Property, this=key, value=value) 2005 2006 def _parse_stored(self) -> exp.FileFormatProperty: 2007 self._match(TokenType.ALIAS) 2008 2009 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2010 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2011 2012 return self.expression( 2013 exp.FileFormatProperty, 2014 this=( 2015 self.expression( 2016 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2017 ) 2018 if input_format or output_format 2019 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2020 ), 2021 ) 2022 2023 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2024 field = self._parse_field() 2025 if isinstance(field, exp.Identifier) and not field.quoted: 2026 field = exp.var(field) 2027 2028 return field 2029 2030 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2031 self._match(TokenType.EQ) 2032 self._match(TokenType.ALIAS) 2033 2034 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2035 2036 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2037 properties = [] 2038 while True: 2039 if before: 2040 prop = self._parse_property_before() 2041 else: 2042 prop = self._parse_property() 2043 if not prop: 2044 break 2045 for p in ensure_list(prop): 2046 properties.append(p) 2047 2048 if properties: 2049 return self.expression(exp.Properties, expressions=properties) 2050 2051 return None 2052 2053 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2054 return self.expression( 2055 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2056 ) 2057 2058 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2059 if self._match_texts(("DEFINER", "INVOKER")): 2060 security_specifier = self._prev.text.upper() 2061 return self.expression(exp.SecurityProperty, this=security_specifier) 2062 return None 2063 2064 def _parse_settings_property(self) -> exp.SettingsProperty: 2065 return self.expression( 2066 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2067 ) 2068 2069 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2070 if self._index >= 2: 2071 pre_volatile_token = self._tokens[self._index - 2] 2072 else: 2073 pre_volatile_token = None 2074 2075 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2076 return exp.VolatileProperty() 2077 2078 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2079 2080 def _parse_retention_period(self) -> exp.Var: 2081 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2082 number = self._parse_number() 2083 number_str = f"{number} " if number else "" 2084 unit = self._parse_var(any_token=True) 2085 return exp.var(f"{number_str}{unit}") 2086 2087 def _parse_system_versioning_property( 2088 self, with_: bool = False 2089 ) -> exp.WithSystemVersioningProperty: 2090 self._match(TokenType.EQ) 2091 prop = self.expression( 2092 exp.WithSystemVersioningProperty, 2093 **{ # type: ignore 2094 "on": True, 2095 "with": with_, 2096 }, 2097 ) 2098 2099 if self._match_text_seq("OFF"): 2100 prop.set("on", False) 2101 return prop 2102 2103 self._match(TokenType.ON) 2104 if self._match(TokenType.L_PAREN): 2105 while self._curr and not self._match(TokenType.R_PAREN): 2106 if self._match_text_seq("HISTORY_TABLE", "="): 2107 prop.set("this", self._parse_table_parts()) 2108 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2109 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2110 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2111 prop.set("retention_period", self._parse_retention_period()) 2112 2113 self._match(TokenType.COMMA) 2114 2115 return prop 2116 2117 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2118 self._match(TokenType.EQ) 2119 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2120 prop = self.expression(exp.DataDeletionProperty, on=on) 2121 2122 if self._match(TokenType.L_PAREN): 2123 while self._curr and not self._match(TokenType.R_PAREN): 2124 if self._match_text_seq("FILTER_COLUMN", "="): 2125 prop.set("filter_column", self._parse_column()) 2126 elif self._match_text_seq("RETENTION_PERIOD", "="): 2127 prop.set("retention_period", self._parse_retention_period()) 2128 2129 self._match(TokenType.COMMA) 2130 2131 return prop 2132 2133 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2134 kind = "HASH" 2135 expressions: t.Optional[t.List[exp.Expression]] = None 2136 if self._match_text_seq("BY", "HASH"): 2137 expressions = self._parse_wrapped_csv(self._parse_id_var) 2138 elif self._match_text_seq("BY", "RANDOM"): 2139 kind = "RANDOM" 2140 2141 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2142 buckets: t.Optional[exp.Expression] = None 2143 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2144 buckets = self._parse_number() 2145 2146 return self.expression( 2147 exp.DistributedByProperty, 2148 expressions=expressions, 2149 kind=kind, 2150 buckets=buckets, 2151 order=self._parse_order(), 2152 ) 2153 2154 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2155 self._match_text_seq("KEY") 2156 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2157 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2158 2159 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2160 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2161 prop = self._parse_system_versioning_property(with_=True) 2162 self._match_r_paren() 2163 return prop 2164 2165 if self._match(TokenType.L_PAREN, advance=False): 2166 return self._parse_wrapped_properties() 2167 2168 if self._match_text_seq("JOURNAL"): 2169 return self._parse_withjournaltable() 2170 2171 if self._match_texts(self.VIEW_ATTRIBUTES): 2172 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2173 2174 if self._match_text_seq("DATA"): 2175 return self._parse_withdata(no=False) 2176 elif self._match_text_seq("NO", "DATA"): 2177 return self._parse_withdata(no=True) 2178 2179 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2180 return self._parse_serde_properties(with_=True) 2181 2182 if self._match(TokenType.SCHEMA): 2183 return self.expression( 2184 exp.WithSchemaBindingProperty, 2185 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2186 ) 2187 2188 if not self._next: 2189 return None 2190 2191 return self._parse_withisolatedloading() 2192 2193 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2194 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2195 self._match(TokenType.EQ) 2196 2197 user = self._parse_id_var() 2198 self._match(TokenType.PARAMETER) 2199 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2200 2201 if not user or not host: 2202 return None 2203 2204 return exp.DefinerProperty(this=f"{user}@{host}") 2205 2206 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2207 self._match(TokenType.TABLE) 2208 self._match(TokenType.EQ) 2209 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2210 2211 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2212 return self.expression(exp.LogProperty, no=no) 2213 2214 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2215 return self.expression(exp.JournalProperty, **kwargs) 2216 2217 def _parse_checksum(self) -> exp.ChecksumProperty: 2218 self._match(TokenType.EQ) 2219 2220 on = None 2221 if self._match(TokenType.ON): 2222 on = True 2223 elif self._match_text_seq("OFF"): 2224 on = False 2225 2226 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2227 2228 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2229 return self.expression( 2230 exp.Cluster, 2231 expressions=( 2232 self._parse_wrapped_csv(self._parse_ordered) 2233 if wrapped 2234 else self._parse_csv(self._parse_ordered) 2235 ), 2236 ) 2237 2238 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2239 self._match_text_seq("BY") 2240 2241 self._match_l_paren() 2242 expressions = self._parse_csv(self._parse_column) 2243 self._match_r_paren() 2244 2245 if self._match_text_seq("SORTED", "BY"): 2246 self._match_l_paren() 2247 sorted_by = self._parse_csv(self._parse_ordered) 2248 self._match_r_paren() 2249 else: 2250 sorted_by = None 2251 2252 self._match(TokenType.INTO) 2253 buckets = self._parse_number() 2254 self._match_text_seq("BUCKETS") 2255 2256 return self.expression( 2257 exp.ClusteredByProperty, 2258 expressions=expressions, 2259 sorted_by=sorted_by, 2260 buckets=buckets, 2261 ) 2262 2263 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2264 if not self._match_text_seq("GRANTS"): 2265 self._retreat(self._index - 1) 2266 return None 2267 2268 return self.expression(exp.CopyGrantsProperty) 2269 2270 def _parse_freespace(self) -> exp.FreespaceProperty: 2271 self._match(TokenType.EQ) 2272 return self.expression( 2273 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2274 ) 2275 2276 def _parse_mergeblockratio( 2277 self, no: bool = False, default: bool = False 2278 ) -> exp.MergeBlockRatioProperty: 2279 if self._match(TokenType.EQ): 2280 return self.expression( 2281 exp.MergeBlockRatioProperty, 2282 this=self._parse_number(), 2283 percent=self._match(TokenType.PERCENT), 2284 ) 2285 2286 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2287 2288 def _parse_datablocksize( 2289 self, 2290 default: t.Optional[bool] = None, 2291 minimum: t.Optional[bool] = None, 2292 maximum: t.Optional[bool] = None, 2293 ) -> exp.DataBlocksizeProperty: 2294 self._match(TokenType.EQ) 2295 size = self._parse_number() 2296 2297 units = None 2298 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2299 units = self._prev.text 2300 2301 return self.expression( 2302 exp.DataBlocksizeProperty, 2303 size=size, 2304 units=units, 2305 default=default, 2306 minimum=minimum, 2307 maximum=maximum, 2308 ) 2309 2310 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2311 self._match(TokenType.EQ) 2312 always = self._match_text_seq("ALWAYS") 2313 manual = self._match_text_seq("MANUAL") 2314 never = self._match_text_seq("NEVER") 2315 default = self._match_text_seq("DEFAULT") 2316 2317 autotemp = None 2318 if self._match_text_seq("AUTOTEMP"): 2319 autotemp = self._parse_schema() 2320 2321 return self.expression( 2322 exp.BlockCompressionProperty, 2323 always=always, 2324 manual=manual, 2325 never=never, 2326 default=default, 2327 autotemp=autotemp, 2328 ) 2329 2330 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2331 index = self._index 2332 no = self._match_text_seq("NO") 2333 concurrent = self._match_text_seq("CONCURRENT") 2334 2335 if not self._match_text_seq("ISOLATED", "LOADING"): 2336 self._retreat(index) 2337 return None 2338 2339 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2340 return self.expression( 2341 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2342 ) 2343 2344 def _parse_locking(self) -> exp.LockingProperty: 2345 if self._match(TokenType.TABLE): 2346 kind = "TABLE" 2347 elif self._match(TokenType.VIEW): 2348 kind = "VIEW" 2349 elif self._match(TokenType.ROW): 2350 kind = "ROW" 2351 elif self._match_text_seq("DATABASE"): 2352 kind = "DATABASE" 2353 else: 2354 kind = None 2355 2356 if kind in ("DATABASE", "TABLE", "VIEW"): 2357 this = self._parse_table_parts() 2358 else: 2359 this = None 2360 2361 if self._match(TokenType.FOR): 2362 for_or_in = "FOR" 2363 elif self._match(TokenType.IN): 2364 for_or_in = "IN" 2365 else: 2366 for_or_in = None 2367 2368 if self._match_text_seq("ACCESS"): 2369 lock_type = "ACCESS" 2370 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2371 lock_type = "EXCLUSIVE" 2372 elif self._match_text_seq("SHARE"): 2373 lock_type = "SHARE" 2374 elif self._match_text_seq("READ"): 2375 lock_type = "READ" 2376 elif self._match_text_seq("WRITE"): 2377 lock_type = "WRITE" 2378 elif self._match_text_seq("CHECKSUM"): 2379 lock_type = "CHECKSUM" 2380 else: 2381 lock_type = None 2382 2383 override = self._match_text_seq("OVERRIDE") 2384 2385 return self.expression( 2386 exp.LockingProperty, 2387 this=this, 2388 kind=kind, 2389 for_or_in=for_or_in, 2390 lock_type=lock_type, 2391 override=override, 2392 ) 2393 2394 def _parse_partition_by(self) -> t.List[exp.Expression]: 2395 if self._match(TokenType.PARTITION_BY): 2396 return self._parse_csv(self._parse_assignment) 2397 return [] 2398 2399 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2400 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2401 if self._match_text_seq("MINVALUE"): 2402 return exp.var("MINVALUE") 2403 if self._match_text_seq("MAXVALUE"): 2404 return exp.var("MAXVALUE") 2405 return self._parse_bitwise() 2406 2407 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2408 expression = None 2409 from_expressions = None 2410 to_expressions = None 2411 2412 if self._match(TokenType.IN): 2413 this = self._parse_wrapped_csv(self._parse_bitwise) 2414 elif self._match(TokenType.FROM): 2415 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2416 self._match_text_seq("TO") 2417 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2418 elif self._match_text_seq("WITH", "(", "MODULUS"): 2419 this = self._parse_number() 2420 self._match_text_seq(",", "REMAINDER") 2421 expression = self._parse_number() 2422 self._match_r_paren() 2423 else: 2424 self.raise_error("Failed to parse partition bound spec.") 2425 2426 return self.expression( 2427 exp.PartitionBoundSpec, 2428 this=this, 2429 expression=expression, 2430 from_expressions=from_expressions, 2431 to_expressions=to_expressions, 2432 ) 2433 2434 # https://www.postgresql.org/docs/current/sql-createtable.html 2435 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2436 if not self._match_text_seq("OF"): 2437 self._retreat(self._index - 1) 2438 return None 2439 2440 this = self._parse_table(schema=True) 2441 2442 if self._match(TokenType.DEFAULT): 2443 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2444 elif self._match_text_seq("FOR", "VALUES"): 2445 expression = self._parse_partition_bound_spec() 2446 else: 2447 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2448 2449 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2450 2451 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2452 self._match(TokenType.EQ) 2453 return self.expression( 2454 exp.PartitionedByProperty, 2455 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2456 ) 2457 2458 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2459 if self._match_text_seq("AND", "STATISTICS"): 2460 statistics = True 2461 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2462 statistics = False 2463 else: 2464 statistics = None 2465 2466 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2467 2468 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2469 if self._match_text_seq("SQL"): 2470 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2471 return None 2472 2473 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2474 if self._match_text_seq("SQL", "DATA"): 2475 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2476 return None 2477 2478 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2479 if self._match_text_seq("PRIMARY", "INDEX"): 2480 return exp.NoPrimaryIndexProperty() 2481 if self._match_text_seq("SQL"): 2482 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2483 return None 2484 2485 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2486 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2487 return exp.OnCommitProperty() 2488 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2489 return exp.OnCommitProperty(delete=True) 2490 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2491 2492 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2493 if self._match_text_seq("SQL", "DATA"): 2494 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2495 return None 2496 2497 def _parse_distkey(self) -> exp.DistKeyProperty: 2498 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2499 2500 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2501 table = self._parse_table(schema=True) 2502 2503 options = [] 2504 while self._match_texts(("INCLUDING", "EXCLUDING")): 2505 this = self._prev.text.upper() 2506 2507 id_var = self._parse_id_var() 2508 if not id_var: 2509 return None 2510 2511 options.append( 2512 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2513 ) 2514 2515 return self.expression(exp.LikeProperty, this=table, expressions=options) 2516 2517 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2518 return self.expression( 2519 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2520 ) 2521 2522 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2523 self._match(TokenType.EQ) 2524 return self.expression( 2525 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2526 ) 2527 2528 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2529 self._match_text_seq("WITH", "CONNECTION") 2530 return self.expression( 2531 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2532 ) 2533 2534 def _parse_returns(self) -> exp.ReturnsProperty: 2535 value: t.Optional[exp.Expression] 2536 null = None 2537 is_table = self._match(TokenType.TABLE) 2538 2539 if is_table: 2540 if self._match(TokenType.LT): 2541 value = self.expression( 2542 exp.Schema, 2543 this="TABLE", 2544 expressions=self._parse_csv(self._parse_struct_types), 2545 ) 2546 if not self._match(TokenType.GT): 2547 self.raise_error("Expecting >") 2548 else: 2549 value = self._parse_schema(exp.var("TABLE")) 2550 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2551 null = True 2552 value = None 2553 else: 2554 value = self._parse_types() 2555 2556 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2557 2558 def _parse_describe(self) -> exp.Describe: 2559 kind = self._match_set(self.CREATABLES) and self._prev.text 2560 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2561 if self._match(TokenType.DOT): 2562 style = None 2563 self._retreat(self._index - 2) 2564 this = self._parse_table(schema=True) 2565 properties = self._parse_properties() 2566 expressions = properties.expressions if properties else None 2567 partition = self._parse_partition() 2568 return self.expression( 2569 exp.Describe, 2570 this=this, 2571 style=style, 2572 kind=kind, 2573 expressions=expressions, 2574 partition=partition, 2575 ) 2576 2577 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2578 kind = self._prev.text.upper() 2579 expressions = [] 2580 2581 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2582 if self._match(TokenType.WHEN): 2583 expression = self._parse_disjunction() 2584 self._match(TokenType.THEN) 2585 else: 2586 expression = None 2587 2588 else_ = self._match(TokenType.ELSE) 2589 2590 if not self._match(TokenType.INTO): 2591 return None 2592 2593 return self.expression( 2594 exp.ConditionalInsert, 2595 this=self.expression( 2596 exp.Insert, 2597 this=self._parse_table(schema=True), 2598 expression=self._parse_derived_table_values(), 2599 ), 2600 expression=expression, 2601 else_=else_, 2602 ) 2603 2604 expression = parse_conditional_insert() 2605 while expression is not None: 2606 expressions.append(expression) 2607 expression = parse_conditional_insert() 2608 2609 return self.expression( 2610 exp.MultitableInserts, 2611 kind=kind, 2612 comments=comments, 2613 expressions=expressions, 2614 source=self._parse_table(), 2615 ) 2616 2617 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2618 comments = ensure_list(self._prev_comments) 2619 hint = self._parse_hint() 2620 overwrite = self._match(TokenType.OVERWRITE) 2621 ignore = self._match(TokenType.IGNORE) 2622 local = self._match_text_seq("LOCAL") 2623 alternative = None 2624 is_function = None 2625 2626 if self._match_text_seq("DIRECTORY"): 2627 this: t.Optional[exp.Expression] = self.expression( 2628 exp.Directory, 2629 this=self._parse_var_or_string(), 2630 local=local, 2631 row_format=self._parse_row_format(match_row=True), 2632 ) 2633 else: 2634 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2635 comments += ensure_list(self._prev_comments) 2636 return self._parse_multitable_inserts(comments) 2637 2638 if self._match(TokenType.OR): 2639 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2640 2641 self._match(TokenType.INTO) 2642 comments += ensure_list(self._prev_comments) 2643 self._match(TokenType.TABLE) 2644 is_function = self._match(TokenType.FUNCTION) 2645 2646 this = ( 2647 self._parse_table(schema=True, parse_partition=True) 2648 if not is_function 2649 else self._parse_function() 2650 ) 2651 2652 returning = self._parse_returning() 2653 2654 return self.expression( 2655 exp.Insert, 2656 comments=comments, 2657 hint=hint, 2658 is_function=is_function, 2659 this=this, 2660 stored=self._match_text_seq("STORED") and self._parse_stored(), 2661 by_name=self._match_text_seq("BY", "NAME"), 2662 exists=self._parse_exists(), 2663 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2664 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2665 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2666 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2667 conflict=self._parse_on_conflict(), 2668 returning=returning or self._parse_returning(), 2669 overwrite=overwrite, 2670 alternative=alternative, 2671 ignore=ignore, 2672 source=self._match(TokenType.TABLE) and self._parse_table(), 2673 ) 2674 2675 def _parse_kill(self) -> exp.Kill: 2676 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2677 2678 return self.expression( 2679 exp.Kill, 2680 this=self._parse_primary(), 2681 kind=kind, 2682 ) 2683 2684 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2685 conflict = self._match_text_seq("ON", "CONFLICT") 2686 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2687 2688 if not conflict and not duplicate: 2689 return None 2690 2691 conflict_keys = None 2692 constraint = None 2693 2694 if conflict: 2695 if self._match_text_seq("ON", "CONSTRAINT"): 2696 constraint = self._parse_id_var() 2697 elif self._match(TokenType.L_PAREN): 2698 conflict_keys = self._parse_csv(self._parse_id_var) 2699 self._match_r_paren() 2700 2701 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2702 if self._prev.token_type == TokenType.UPDATE: 2703 self._match(TokenType.SET) 2704 expressions = self._parse_csv(self._parse_equality) 2705 else: 2706 expressions = None 2707 2708 return self.expression( 2709 exp.OnConflict, 2710 duplicate=duplicate, 2711 expressions=expressions, 2712 action=action, 2713 conflict_keys=conflict_keys, 2714 constraint=constraint, 2715 ) 2716 2717 def _parse_returning(self) -> t.Optional[exp.Returning]: 2718 if not self._match(TokenType.RETURNING): 2719 return None 2720 return self.expression( 2721 exp.Returning, 2722 expressions=self._parse_csv(self._parse_expression), 2723 into=self._match(TokenType.INTO) and self._parse_table_part(), 2724 ) 2725 2726 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2727 if not self._match(TokenType.FORMAT): 2728 return None 2729 return self._parse_row_format() 2730 2731 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2732 index = self._index 2733 with_ = with_ or self._match_text_seq("WITH") 2734 2735 if not self._match(TokenType.SERDE_PROPERTIES): 2736 self._retreat(index) 2737 return None 2738 return self.expression( 2739 exp.SerdeProperties, 2740 **{ # type: ignore 2741 "expressions": self._parse_wrapped_properties(), 2742 "with": with_, 2743 }, 2744 ) 2745 2746 def _parse_row_format( 2747 self, match_row: bool = False 2748 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2749 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2750 return None 2751 2752 if self._match_text_seq("SERDE"): 2753 this = self._parse_string() 2754 2755 serde_properties = self._parse_serde_properties() 2756 2757 return self.expression( 2758 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2759 ) 2760 2761 self._match_text_seq("DELIMITED") 2762 2763 kwargs = {} 2764 2765 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2766 kwargs["fields"] = self._parse_string() 2767 if self._match_text_seq("ESCAPED", "BY"): 2768 kwargs["escaped"] = self._parse_string() 2769 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2770 kwargs["collection_items"] = self._parse_string() 2771 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2772 kwargs["map_keys"] = self._parse_string() 2773 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2774 kwargs["lines"] = self._parse_string() 2775 if self._match_text_seq("NULL", "DEFINED", "AS"): 2776 kwargs["null"] = self._parse_string() 2777 2778 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2779 2780 def _parse_load(self) -> exp.LoadData | exp.Command: 2781 if self._match_text_seq("DATA"): 2782 local = self._match_text_seq("LOCAL") 2783 self._match_text_seq("INPATH") 2784 inpath = self._parse_string() 2785 overwrite = self._match(TokenType.OVERWRITE) 2786 self._match_pair(TokenType.INTO, TokenType.TABLE) 2787 2788 return self.expression( 2789 exp.LoadData, 2790 this=self._parse_table(schema=True), 2791 local=local, 2792 overwrite=overwrite, 2793 inpath=inpath, 2794 partition=self._parse_partition(), 2795 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2796 serde=self._match_text_seq("SERDE") and self._parse_string(), 2797 ) 2798 return self._parse_as_command(self._prev) 2799 2800 def _parse_delete(self) -> exp.Delete: 2801 # This handles MySQL's "Multiple-Table Syntax" 2802 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2803 tables = None 2804 comments = self._prev_comments 2805 if not self._match(TokenType.FROM, advance=False): 2806 tables = self._parse_csv(self._parse_table) or None 2807 2808 returning = self._parse_returning() 2809 2810 return self.expression( 2811 exp.Delete, 2812 comments=comments, 2813 tables=tables, 2814 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2815 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2816 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2817 where=self._parse_where(), 2818 returning=returning or self._parse_returning(), 2819 limit=self._parse_limit(), 2820 ) 2821 2822 def _parse_update(self) -> exp.Update: 2823 comments = self._prev_comments 2824 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2825 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2826 returning = self._parse_returning() 2827 return self.expression( 2828 exp.Update, 2829 comments=comments, 2830 **{ # type: ignore 2831 "this": this, 2832 "expressions": expressions, 2833 "from": self._parse_from(joins=True), 2834 "where": self._parse_where(), 2835 "returning": returning or self._parse_returning(), 2836 "order": self._parse_order(), 2837 "limit": self._parse_limit(), 2838 }, 2839 ) 2840 2841 def _parse_uncache(self) -> exp.Uncache: 2842 if not self._match(TokenType.TABLE): 2843 self.raise_error("Expecting TABLE after UNCACHE") 2844 2845 return self.expression( 2846 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2847 ) 2848 2849 def _parse_cache(self) -> exp.Cache: 2850 lazy = self._match_text_seq("LAZY") 2851 self._match(TokenType.TABLE) 2852 table = self._parse_table(schema=True) 2853 2854 options = [] 2855 if self._match_text_seq("OPTIONS"): 2856 self._match_l_paren() 2857 k = self._parse_string() 2858 self._match(TokenType.EQ) 2859 v = self._parse_string() 2860 options = [k, v] 2861 self._match_r_paren() 2862 2863 self._match(TokenType.ALIAS) 2864 return self.expression( 2865 exp.Cache, 2866 this=table, 2867 lazy=lazy, 2868 options=options, 2869 expression=self._parse_select(nested=True), 2870 ) 2871 2872 def _parse_partition(self) -> t.Optional[exp.Partition]: 2873 if not self._match(TokenType.PARTITION): 2874 return None 2875 2876 return self.expression( 2877 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2878 ) 2879 2880 def _parse_value(self) -> t.Optional[exp.Tuple]: 2881 if self._match(TokenType.L_PAREN): 2882 expressions = self._parse_csv(self._parse_expression) 2883 self._match_r_paren() 2884 return self.expression(exp.Tuple, expressions=expressions) 2885 2886 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2887 expression = self._parse_expression() 2888 if expression: 2889 return self.expression(exp.Tuple, expressions=[expression]) 2890 return None 2891 2892 def _parse_projections(self) -> t.List[exp.Expression]: 2893 return self._parse_expressions() 2894 2895 def _parse_select( 2896 self, 2897 nested: bool = False, 2898 table: bool = False, 2899 parse_subquery_alias: bool = True, 2900 parse_set_operation: bool = True, 2901 ) -> t.Optional[exp.Expression]: 2902 cte = self._parse_with() 2903 2904 if cte: 2905 this = self._parse_statement() 2906 2907 if not this: 2908 self.raise_error("Failed to parse any statement following CTE") 2909 return cte 2910 2911 if "with" in this.arg_types: 2912 this.set("with", cte) 2913 else: 2914 self.raise_error(f"{this.key} does not support CTE") 2915 this = cte 2916 2917 return this 2918 2919 # duckdb supports leading with FROM x 2920 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2921 2922 if self._match(TokenType.SELECT): 2923 comments = self._prev_comments 2924 2925 hint = self._parse_hint() 2926 2927 if self._next and not self._next.token_type == TokenType.DOT: 2928 all_ = self._match(TokenType.ALL) 2929 distinct = self._match_set(self.DISTINCT_TOKENS) 2930 else: 2931 all_, distinct = None, None 2932 2933 kind = ( 2934 self._match(TokenType.ALIAS) 2935 and self._match_texts(("STRUCT", "VALUE")) 2936 and self._prev.text.upper() 2937 ) 2938 2939 if distinct: 2940 distinct = self.expression( 2941 exp.Distinct, 2942 on=self._parse_value() if self._match(TokenType.ON) else None, 2943 ) 2944 2945 if all_ and distinct: 2946 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2947 2948 limit = self._parse_limit(top=True) 2949 projections = self._parse_projections() 2950 2951 this = self.expression( 2952 exp.Select, 2953 kind=kind, 2954 hint=hint, 2955 distinct=distinct, 2956 expressions=projections, 2957 limit=limit, 2958 ) 2959 this.comments = comments 2960 2961 into = self._parse_into() 2962 if into: 2963 this.set("into", into) 2964 2965 if not from_: 2966 from_ = self._parse_from() 2967 2968 if from_: 2969 this.set("from", from_) 2970 2971 this = self._parse_query_modifiers(this) 2972 elif (table or nested) and self._match(TokenType.L_PAREN): 2973 if self._match(TokenType.PIVOT): 2974 this = self._parse_simplified_pivot() 2975 elif self._match(TokenType.FROM): 2976 this = exp.select("*").from_( 2977 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2978 ) 2979 else: 2980 this = ( 2981 self._parse_table() 2982 if table 2983 else self._parse_select(nested=True, parse_set_operation=False) 2984 ) 2985 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2986 2987 self._match_r_paren() 2988 2989 # We return early here so that the UNION isn't attached to the subquery by the 2990 # following call to _parse_set_operations, but instead becomes the parent node 2991 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2992 elif self._match(TokenType.VALUES, advance=False): 2993 this = self._parse_derived_table_values() 2994 elif from_: 2995 this = exp.select("*").from_(from_.this, copy=False) 2996 elif self._match(TokenType.SUMMARIZE): 2997 table = self._match(TokenType.TABLE) 2998 this = self._parse_select() or self._parse_string() or self._parse_table() 2999 return self.expression(exp.Summarize, this=this, table=table) 3000 elif self._match(TokenType.DESCRIBE): 3001 this = self._parse_describe() 3002 elif self._match_text_seq("STREAM"): 3003 this = self.expression(exp.Stream, this=self._parse_function()) 3004 else: 3005 this = None 3006 3007 return self._parse_set_operations(this) if parse_set_operation else this 3008 3009 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3010 if not skip_with_token and not self._match(TokenType.WITH): 3011 return None 3012 3013 comments = self._prev_comments 3014 recursive = self._match(TokenType.RECURSIVE) 3015 3016 expressions = [] 3017 while True: 3018 expressions.append(self._parse_cte()) 3019 3020 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3021 break 3022 else: 3023 self._match(TokenType.WITH) 3024 3025 return self.expression( 3026 exp.With, comments=comments, expressions=expressions, recursive=recursive 3027 ) 3028 3029 def _parse_cte(self) -> exp.CTE: 3030 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3031 if not alias or not alias.this: 3032 self.raise_error("Expected CTE to have alias") 3033 3034 self._match(TokenType.ALIAS) 3035 comments = self._prev_comments 3036 3037 if self._match_text_seq("NOT", "MATERIALIZED"): 3038 materialized = False 3039 elif self._match_text_seq("MATERIALIZED"): 3040 materialized = True 3041 else: 3042 materialized = None 3043 3044 return self.expression( 3045 exp.CTE, 3046 this=self._parse_wrapped(self._parse_statement), 3047 alias=alias, 3048 materialized=materialized, 3049 comments=comments, 3050 ) 3051 3052 def _parse_table_alias( 3053 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3054 ) -> t.Optional[exp.TableAlias]: 3055 any_token = self._match(TokenType.ALIAS) 3056 alias = ( 3057 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3058 or self._parse_string_as_identifier() 3059 ) 3060 3061 index = self._index 3062 if self._match(TokenType.L_PAREN): 3063 columns = self._parse_csv(self._parse_function_parameter) 3064 self._match_r_paren() if columns else self._retreat(index) 3065 else: 3066 columns = None 3067 3068 if not alias and not columns: 3069 return None 3070 3071 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3072 3073 # We bubble up comments from the Identifier to the TableAlias 3074 if isinstance(alias, exp.Identifier): 3075 table_alias.add_comments(alias.pop_comments()) 3076 3077 return table_alias 3078 3079 def _parse_subquery( 3080 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3081 ) -> t.Optional[exp.Subquery]: 3082 if not this: 3083 return None 3084 3085 return self.expression( 3086 exp.Subquery, 3087 this=this, 3088 pivots=self._parse_pivots(), 3089 alias=self._parse_table_alias() if parse_alias else None, 3090 sample=self._parse_table_sample(), 3091 ) 3092 3093 def _implicit_unnests_to_explicit(self, this: E) -> E: 3094 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3095 3096 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3097 for i, join in enumerate(this.args.get("joins") or []): 3098 table = join.this 3099 normalized_table = table.copy() 3100 normalized_table.meta["maybe_column"] = True 3101 normalized_table = _norm(normalized_table, dialect=self.dialect) 3102 3103 if isinstance(table, exp.Table) and not join.args.get("on"): 3104 if normalized_table.parts[0].name in refs: 3105 table_as_column = table.to_column() 3106 unnest = exp.Unnest(expressions=[table_as_column]) 3107 3108 # Table.to_column creates a parent Alias node that we want to convert to 3109 # a TableAlias and attach to the Unnest, so it matches the parser's output 3110 if isinstance(table.args.get("alias"), exp.TableAlias): 3111 table_as_column.replace(table_as_column.this) 3112 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3113 3114 table.replace(unnest) 3115 3116 refs.add(normalized_table.alias_or_name) 3117 3118 return this 3119 3120 def _parse_query_modifiers( 3121 self, this: t.Optional[exp.Expression] 3122 ) -> t.Optional[exp.Expression]: 3123 if isinstance(this, (exp.Query, exp.Table)): 3124 for join in self._parse_joins(): 3125 this.append("joins", join) 3126 for lateral in iter(self._parse_lateral, None): 3127 this.append("laterals", lateral) 3128 3129 while True: 3130 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3131 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3132 key, expression = parser(self) 3133 3134 if expression: 3135 this.set(key, expression) 3136 if key == "limit": 3137 offset = expression.args.pop("offset", None) 3138 3139 if offset: 3140 offset = exp.Offset(expression=offset) 3141 this.set("offset", offset) 3142 3143 limit_by_expressions = expression.expressions 3144 expression.set("expressions", None) 3145 offset.set("expressions", limit_by_expressions) 3146 continue 3147 break 3148 3149 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3150 this = self._implicit_unnests_to_explicit(this) 3151 3152 return this 3153 3154 def _parse_hint(self) -> t.Optional[exp.Hint]: 3155 if self._match(TokenType.HINT): 3156 hints = [] 3157 for hint in iter( 3158 lambda: self._parse_csv( 3159 lambda: self._parse_function() or self._parse_var(upper=True) 3160 ), 3161 [], 3162 ): 3163 hints.extend(hint) 3164 3165 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3166 self.raise_error("Expected */ after HINT") 3167 3168 return self.expression(exp.Hint, expressions=hints) 3169 3170 return None 3171 3172 def _parse_into(self) -> t.Optional[exp.Into]: 3173 if not self._match(TokenType.INTO): 3174 return None 3175 3176 temp = self._match(TokenType.TEMPORARY) 3177 unlogged = self._match_text_seq("UNLOGGED") 3178 self._match(TokenType.TABLE) 3179 3180 return self.expression( 3181 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3182 ) 3183 3184 def _parse_from( 3185 self, joins: bool = False, skip_from_token: bool = False 3186 ) -> t.Optional[exp.From]: 3187 if not skip_from_token and not self._match(TokenType.FROM): 3188 return None 3189 3190 return self.expression( 3191 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3192 ) 3193 3194 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3195 return self.expression( 3196 exp.MatchRecognizeMeasure, 3197 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3198 this=self._parse_expression(), 3199 ) 3200 3201 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3202 if not self._match(TokenType.MATCH_RECOGNIZE): 3203 return None 3204 3205 self._match_l_paren() 3206 3207 partition = self._parse_partition_by() 3208 order = self._parse_order() 3209 3210 measures = ( 3211 self._parse_csv(self._parse_match_recognize_measure) 3212 if self._match_text_seq("MEASURES") 3213 else None 3214 ) 3215 3216 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3217 rows = exp.var("ONE ROW PER MATCH") 3218 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3219 text = "ALL ROWS PER MATCH" 3220 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3221 text += " SHOW EMPTY MATCHES" 3222 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3223 text += " OMIT EMPTY MATCHES" 3224 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3225 text += " WITH UNMATCHED ROWS" 3226 rows = exp.var(text) 3227 else: 3228 rows = None 3229 3230 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3231 text = "AFTER MATCH SKIP" 3232 if self._match_text_seq("PAST", "LAST", "ROW"): 3233 text += " PAST LAST ROW" 3234 elif self._match_text_seq("TO", "NEXT", "ROW"): 3235 text += " TO NEXT ROW" 3236 elif self._match_text_seq("TO", "FIRST"): 3237 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3238 elif self._match_text_seq("TO", "LAST"): 3239 text += f" TO LAST {self._advance_any().text}" # type: ignore 3240 after = exp.var(text) 3241 else: 3242 after = None 3243 3244 if self._match_text_seq("PATTERN"): 3245 self._match_l_paren() 3246 3247 if not self._curr: 3248 self.raise_error("Expecting )", self._curr) 3249 3250 paren = 1 3251 start = self._curr 3252 3253 while self._curr and paren > 0: 3254 if self._curr.token_type == TokenType.L_PAREN: 3255 paren += 1 3256 if self._curr.token_type == TokenType.R_PAREN: 3257 paren -= 1 3258 3259 end = self._prev 3260 self._advance() 3261 3262 if paren > 0: 3263 self.raise_error("Expecting )", self._curr) 3264 3265 pattern = exp.var(self._find_sql(start, end)) 3266 else: 3267 pattern = None 3268 3269 define = ( 3270 self._parse_csv(self._parse_name_as_expression) 3271 if self._match_text_seq("DEFINE") 3272 else None 3273 ) 3274 3275 self._match_r_paren() 3276 3277 return self.expression( 3278 exp.MatchRecognize, 3279 partition_by=partition, 3280 order=order, 3281 measures=measures, 3282 rows=rows, 3283 after=after, 3284 pattern=pattern, 3285 define=define, 3286 alias=self._parse_table_alias(), 3287 ) 3288 3289 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3290 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3291 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3292 cross_apply = False 3293 3294 if cross_apply is not None: 3295 this = self._parse_select(table=True) 3296 view = None 3297 outer = None 3298 elif self._match(TokenType.LATERAL): 3299 this = self._parse_select(table=True) 3300 view = self._match(TokenType.VIEW) 3301 outer = self._match(TokenType.OUTER) 3302 else: 3303 return None 3304 3305 if not this: 3306 this = ( 3307 self._parse_unnest() 3308 or self._parse_function() 3309 or self._parse_id_var(any_token=False) 3310 ) 3311 3312 while self._match(TokenType.DOT): 3313 this = exp.Dot( 3314 this=this, 3315 expression=self._parse_function() or self._parse_id_var(any_token=False), 3316 ) 3317 3318 if view: 3319 table = self._parse_id_var(any_token=False) 3320 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3321 table_alias: t.Optional[exp.TableAlias] = self.expression( 3322 exp.TableAlias, this=table, columns=columns 3323 ) 3324 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3325 # We move the alias from the lateral's child node to the lateral itself 3326 table_alias = this.args["alias"].pop() 3327 else: 3328 table_alias = self._parse_table_alias() 3329 3330 return self.expression( 3331 exp.Lateral, 3332 this=this, 3333 view=view, 3334 outer=outer, 3335 alias=table_alias, 3336 cross_apply=cross_apply, 3337 ) 3338 3339 def _parse_join_parts( 3340 self, 3341 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3342 return ( 3343 self._match_set(self.JOIN_METHODS) and self._prev, 3344 self._match_set(self.JOIN_SIDES) and self._prev, 3345 self._match_set(self.JOIN_KINDS) and self._prev, 3346 ) 3347 3348 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3349 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3350 this = self._parse_column() 3351 if isinstance(this, exp.Column): 3352 return this.this 3353 return this 3354 3355 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3356 3357 def _parse_join( 3358 self, skip_join_token: bool = False, parse_bracket: bool = False 3359 ) -> t.Optional[exp.Join]: 3360 if self._match(TokenType.COMMA): 3361 return self.expression(exp.Join, this=self._parse_table()) 3362 3363 index = self._index 3364 method, side, kind = self._parse_join_parts() 3365 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3366 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3367 3368 if not skip_join_token and not join: 3369 self._retreat(index) 3370 kind = None 3371 method = None 3372 side = None 3373 3374 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3375 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3376 3377 if not skip_join_token and not join and not outer_apply and not cross_apply: 3378 return None 3379 3380 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3381 3382 if method: 3383 kwargs["method"] = method.text 3384 if side: 3385 kwargs["side"] = side.text 3386 if kind: 3387 kwargs["kind"] = kind.text 3388 if hint: 3389 kwargs["hint"] = hint 3390 3391 if self._match(TokenType.MATCH_CONDITION): 3392 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3393 3394 if self._match(TokenType.ON): 3395 kwargs["on"] = self._parse_assignment() 3396 elif self._match(TokenType.USING): 3397 kwargs["using"] = self._parse_using_identifiers() 3398 elif ( 3399 not (outer_apply or cross_apply) 3400 and not isinstance(kwargs["this"], exp.Unnest) 3401 and not (kind and kind.token_type == TokenType.CROSS) 3402 ): 3403 index = self._index 3404 joins: t.Optional[list] = list(self._parse_joins()) 3405 3406 if joins and self._match(TokenType.ON): 3407 kwargs["on"] = self._parse_assignment() 3408 elif joins and self._match(TokenType.USING): 3409 kwargs["using"] = self._parse_using_identifiers() 3410 else: 3411 joins = None 3412 self._retreat(index) 3413 3414 kwargs["this"].set("joins", joins if joins else None) 3415 3416 comments = [c for token in (method, side, kind) if token for c in token.comments] 3417 return self.expression(exp.Join, comments=comments, **kwargs) 3418 3419 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3420 this = self._parse_assignment() 3421 3422 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3423 return this 3424 3425 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3426 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3427 3428 return this 3429 3430 def _parse_index_params(self) -> exp.IndexParameters: 3431 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3432 3433 if self._match(TokenType.L_PAREN, advance=False): 3434 columns = self._parse_wrapped_csv(self._parse_with_operator) 3435 else: 3436 columns = None 3437 3438 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3439 partition_by = self._parse_partition_by() 3440 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3441 tablespace = ( 3442 self._parse_var(any_token=True) 3443 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3444 else None 3445 ) 3446 where = self._parse_where() 3447 3448 on = self._parse_field() if self._match(TokenType.ON) else None 3449 3450 return self.expression( 3451 exp.IndexParameters, 3452 using=using, 3453 columns=columns, 3454 include=include, 3455 partition_by=partition_by, 3456 where=where, 3457 with_storage=with_storage, 3458 tablespace=tablespace, 3459 on=on, 3460 ) 3461 3462 def _parse_index( 3463 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3464 ) -> t.Optional[exp.Index]: 3465 if index or anonymous: 3466 unique = None 3467 primary = None 3468 amp = None 3469 3470 self._match(TokenType.ON) 3471 self._match(TokenType.TABLE) # hive 3472 table = self._parse_table_parts(schema=True) 3473 else: 3474 unique = self._match(TokenType.UNIQUE) 3475 primary = self._match_text_seq("PRIMARY") 3476 amp = self._match_text_seq("AMP") 3477 3478 if not self._match(TokenType.INDEX): 3479 return None 3480 3481 index = self._parse_id_var() 3482 table = None 3483 3484 params = self._parse_index_params() 3485 3486 return self.expression( 3487 exp.Index, 3488 this=index, 3489 table=table, 3490 unique=unique, 3491 primary=primary, 3492 amp=amp, 3493 params=params, 3494 ) 3495 3496 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3497 hints: t.List[exp.Expression] = [] 3498 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3499 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3500 hints.append( 3501 self.expression( 3502 exp.WithTableHint, 3503 expressions=self._parse_csv( 3504 lambda: self._parse_function() or self._parse_var(any_token=True) 3505 ), 3506 ) 3507 ) 3508 self._match_r_paren() 3509 else: 3510 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3511 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3512 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3513 3514 self._match_set((TokenType.INDEX, TokenType.KEY)) 3515 if self._match(TokenType.FOR): 3516 hint.set("target", self._advance_any() and self._prev.text.upper()) 3517 3518 hint.set("expressions", self._parse_wrapped_id_vars()) 3519 hints.append(hint) 3520 3521 return hints or None 3522 3523 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3524 return ( 3525 (not schema and self._parse_function(optional_parens=False)) 3526 or self._parse_id_var(any_token=False) 3527 or self._parse_string_as_identifier() 3528 or self._parse_placeholder() 3529 ) 3530 3531 def _parse_table_parts( 3532 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3533 ) -> exp.Table: 3534 catalog = None 3535 db = None 3536 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3537 3538 while self._match(TokenType.DOT): 3539 if catalog: 3540 # This allows nesting the table in arbitrarily many dot expressions if needed 3541 table = self.expression( 3542 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3543 ) 3544 else: 3545 catalog = db 3546 db = table 3547 # "" used for tsql FROM a..b case 3548 table = self._parse_table_part(schema=schema) or "" 3549 3550 if ( 3551 wildcard 3552 and self._is_connected() 3553 and (isinstance(table, exp.Identifier) or not table) 3554 and self._match(TokenType.STAR) 3555 ): 3556 if isinstance(table, exp.Identifier): 3557 table.args["this"] += "*" 3558 else: 3559 table = exp.Identifier(this="*") 3560 3561 # We bubble up comments from the Identifier to the Table 3562 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3563 3564 if is_db_reference: 3565 catalog = db 3566 db = table 3567 table = None 3568 3569 if not table and not is_db_reference: 3570 self.raise_error(f"Expected table name but got {self._curr}") 3571 if not db and is_db_reference: 3572 self.raise_error(f"Expected database name but got {self._curr}") 3573 3574 table = self.expression( 3575 exp.Table, 3576 comments=comments, 3577 this=table, 3578 db=db, 3579 catalog=catalog, 3580 ) 3581 3582 changes = self._parse_changes() 3583 if changes: 3584 table.set("changes", changes) 3585 3586 at_before = self._parse_historical_data() 3587 if at_before: 3588 table.set("when", at_before) 3589 3590 pivots = self._parse_pivots() 3591 if pivots: 3592 table.set("pivots", pivots) 3593 3594 return table 3595 3596 def _parse_table( 3597 self, 3598 schema: bool = False, 3599 joins: bool = False, 3600 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3601 parse_bracket: bool = False, 3602 is_db_reference: bool = False, 3603 parse_partition: bool = False, 3604 ) -> t.Optional[exp.Expression]: 3605 lateral = self._parse_lateral() 3606 if lateral: 3607 return lateral 3608 3609 unnest = self._parse_unnest() 3610 if unnest: 3611 return unnest 3612 3613 values = self._parse_derived_table_values() 3614 if values: 3615 return values 3616 3617 subquery = self._parse_select(table=True) 3618 if subquery: 3619 if not subquery.args.get("pivots"): 3620 subquery.set("pivots", self._parse_pivots()) 3621 return subquery 3622 3623 bracket = parse_bracket and self._parse_bracket(None) 3624 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3625 3626 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3627 self._parse_table 3628 ) 3629 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3630 3631 only = self._match(TokenType.ONLY) 3632 3633 this = t.cast( 3634 exp.Expression, 3635 bracket 3636 or rows_from 3637 or self._parse_bracket( 3638 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3639 ), 3640 ) 3641 3642 if only: 3643 this.set("only", only) 3644 3645 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3646 self._match_text_seq("*") 3647 3648 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3649 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3650 this.set("partition", self._parse_partition()) 3651 3652 if schema: 3653 return self._parse_schema(this=this) 3654 3655 version = self._parse_version() 3656 3657 if version: 3658 this.set("version", version) 3659 3660 if self.dialect.ALIAS_POST_TABLESAMPLE: 3661 this.set("sample", self._parse_table_sample()) 3662 3663 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3664 if alias: 3665 this.set("alias", alias) 3666 3667 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3668 return self.expression( 3669 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3670 ) 3671 3672 this.set("hints", self._parse_table_hints()) 3673 3674 if not this.args.get("pivots"): 3675 this.set("pivots", self._parse_pivots()) 3676 3677 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3678 this.set("sample", self._parse_table_sample()) 3679 3680 if joins: 3681 for join in self._parse_joins(): 3682 this.append("joins", join) 3683 3684 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3685 this.set("ordinality", True) 3686 this.set("alias", self._parse_table_alias()) 3687 3688 return this 3689 3690 def _parse_version(self) -> t.Optional[exp.Version]: 3691 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3692 this = "TIMESTAMP" 3693 elif self._match(TokenType.VERSION_SNAPSHOT): 3694 this = "VERSION" 3695 else: 3696 return None 3697 3698 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3699 kind = self._prev.text.upper() 3700 start = self._parse_bitwise() 3701 self._match_texts(("TO", "AND")) 3702 end = self._parse_bitwise() 3703 expression: t.Optional[exp.Expression] = self.expression( 3704 exp.Tuple, expressions=[start, end] 3705 ) 3706 elif self._match_text_seq("CONTAINED", "IN"): 3707 kind = "CONTAINED IN" 3708 expression = self.expression( 3709 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3710 ) 3711 elif self._match(TokenType.ALL): 3712 kind = "ALL" 3713 expression = None 3714 else: 3715 self._match_text_seq("AS", "OF") 3716 kind = "AS OF" 3717 expression = self._parse_type() 3718 3719 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3720 3721 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3722 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3723 index = self._index 3724 historical_data = None 3725 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3726 this = self._prev.text.upper() 3727 kind = ( 3728 self._match(TokenType.L_PAREN) 3729 and self._match_texts(self.HISTORICAL_DATA_KIND) 3730 and self._prev.text.upper() 3731 ) 3732 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3733 3734 if expression: 3735 self._match_r_paren() 3736 historical_data = self.expression( 3737 exp.HistoricalData, this=this, kind=kind, expression=expression 3738 ) 3739 else: 3740 self._retreat(index) 3741 3742 return historical_data 3743 3744 def _parse_changes(self) -> t.Optional[exp.Changes]: 3745 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3746 return None 3747 3748 information = self._parse_var(any_token=True) 3749 self._match_r_paren() 3750 3751 return self.expression( 3752 exp.Changes, 3753 information=information, 3754 at_before=self._parse_historical_data(), 3755 end=self._parse_historical_data(), 3756 ) 3757 3758 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3759 if not self._match(TokenType.UNNEST): 3760 return None 3761 3762 expressions = self._parse_wrapped_csv(self._parse_equality) 3763 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3764 3765 alias = self._parse_table_alias() if with_alias else None 3766 3767 if alias: 3768 if self.dialect.UNNEST_COLUMN_ONLY: 3769 if alias.args.get("columns"): 3770 self.raise_error("Unexpected extra column alias in unnest.") 3771 3772 alias.set("columns", [alias.this]) 3773 alias.set("this", None) 3774 3775 columns = alias.args.get("columns") or [] 3776 if offset and len(expressions) < len(columns): 3777 offset = columns.pop() 3778 3779 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3780 self._match(TokenType.ALIAS) 3781 offset = self._parse_id_var( 3782 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3783 ) or exp.to_identifier("offset") 3784 3785 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3786 3787 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3788 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3789 if not is_derived and not ( 3790 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3791 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3792 ): 3793 return None 3794 3795 expressions = self._parse_csv(self._parse_value) 3796 alias = self._parse_table_alias() 3797 3798 if is_derived: 3799 self._match_r_paren() 3800 3801 return self.expression( 3802 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3803 ) 3804 3805 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3806 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3807 as_modifier and self._match_text_seq("USING", "SAMPLE") 3808 ): 3809 return None 3810 3811 bucket_numerator = None 3812 bucket_denominator = None 3813 bucket_field = None 3814 percent = None 3815 size = None 3816 seed = None 3817 3818 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3819 matched_l_paren = self._match(TokenType.L_PAREN) 3820 3821 if self.TABLESAMPLE_CSV: 3822 num = None 3823 expressions = self._parse_csv(self._parse_primary) 3824 else: 3825 expressions = None 3826 num = ( 3827 self._parse_factor() 3828 if self._match(TokenType.NUMBER, advance=False) 3829 else self._parse_primary() or self._parse_placeholder() 3830 ) 3831 3832 if self._match_text_seq("BUCKET"): 3833 bucket_numerator = self._parse_number() 3834 self._match_text_seq("OUT", "OF") 3835 bucket_denominator = bucket_denominator = self._parse_number() 3836 self._match(TokenType.ON) 3837 bucket_field = self._parse_field() 3838 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3839 percent = num 3840 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3841 size = num 3842 else: 3843 percent = num 3844 3845 if matched_l_paren: 3846 self._match_r_paren() 3847 3848 if self._match(TokenType.L_PAREN): 3849 method = self._parse_var(upper=True) 3850 seed = self._match(TokenType.COMMA) and self._parse_number() 3851 self._match_r_paren() 3852 elif self._match_texts(("SEED", "REPEATABLE")): 3853 seed = self._parse_wrapped(self._parse_number) 3854 3855 if not method and self.DEFAULT_SAMPLING_METHOD: 3856 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3857 3858 return self.expression( 3859 exp.TableSample, 3860 expressions=expressions, 3861 method=method, 3862 bucket_numerator=bucket_numerator, 3863 bucket_denominator=bucket_denominator, 3864 bucket_field=bucket_field, 3865 percent=percent, 3866 size=size, 3867 seed=seed, 3868 ) 3869 3870 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3871 return list(iter(self._parse_pivot, None)) or None 3872 3873 def _parse_joins(self) -> t.Iterator[exp.Join]: 3874 return iter(self._parse_join, None) 3875 3876 # https://duckdb.org/docs/sql/statements/pivot 3877 def _parse_simplified_pivot(self) -> exp.Pivot: 3878 def _parse_on() -> t.Optional[exp.Expression]: 3879 this = self._parse_bitwise() 3880 return self._parse_in(this) if self._match(TokenType.IN) else this 3881 3882 this = self._parse_table() 3883 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3884 using = self._match(TokenType.USING) and self._parse_csv( 3885 lambda: self._parse_alias(self._parse_function()) 3886 ) 3887 group = self._parse_group() 3888 return self.expression( 3889 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3890 ) 3891 3892 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3893 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3894 this = self._parse_select_or_expression() 3895 3896 self._match(TokenType.ALIAS) 3897 alias = self._parse_bitwise() 3898 if alias: 3899 if isinstance(alias, exp.Column) and not alias.db: 3900 alias = alias.this 3901 return self.expression(exp.PivotAlias, this=this, alias=alias) 3902 3903 return this 3904 3905 value = self._parse_column() 3906 3907 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3908 self.raise_error("Expecting IN (") 3909 3910 if self._match(TokenType.ANY): 3911 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3912 else: 3913 exprs = self._parse_csv(_parse_aliased_expression) 3914 3915 self._match_r_paren() 3916 return self.expression(exp.In, this=value, expressions=exprs) 3917 3918 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3919 index = self._index 3920 include_nulls = None 3921 3922 if self._match(TokenType.PIVOT): 3923 unpivot = False 3924 elif self._match(TokenType.UNPIVOT): 3925 unpivot = True 3926 3927 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3928 if self._match_text_seq("INCLUDE", "NULLS"): 3929 include_nulls = True 3930 elif self._match_text_seq("EXCLUDE", "NULLS"): 3931 include_nulls = False 3932 else: 3933 return None 3934 3935 expressions = [] 3936 3937 if not self._match(TokenType.L_PAREN): 3938 self._retreat(index) 3939 return None 3940 3941 if unpivot: 3942 expressions = self._parse_csv(self._parse_column) 3943 else: 3944 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3945 3946 if not expressions: 3947 self.raise_error("Failed to parse PIVOT's aggregation list") 3948 3949 if not self._match(TokenType.FOR): 3950 self.raise_error("Expecting FOR") 3951 3952 field = self._parse_pivot_in() 3953 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3954 self._parse_bitwise 3955 ) 3956 3957 self._match_r_paren() 3958 3959 pivot = self.expression( 3960 exp.Pivot, 3961 expressions=expressions, 3962 field=field, 3963 unpivot=unpivot, 3964 include_nulls=include_nulls, 3965 default_on_null=default_on_null, 3966 ) 3967 3968 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3969 pivot.set("alias", self._parse_table_alias()) 3970 3971 if not unpivot: 3972 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3973 3974 columns: t.List[exp.Expression] = [] 3975 for fld in pivot.args["field"].expressions: 3976 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3977 for name in names: 3978 if self.PREFIXED_PIVOT_COLUMNS: 3979 name = f"{name}_{field_name}" if name else field_name 3980 else: 3981 name = f"{field_name}_{name}" if name else field_name 3982 3983 columns.append(exp.to_identifier(name)) 3984 3985 pivot.set("columns", columns) 3986 3987 return pivot 3988 3989 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3990 return [agg.alias for agg in aggregations] 3991 3992 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3993 if not skip_where_token and not self._match(TokenType.PREWHERE): 3994 return None 3995 3996 return self.expression( 3997 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3998 ) 3999 4000 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4001 if not skip_where_token and not self._match(TokenType.WHERE): 4002 return None 4003 4004 return self.expression( 4005 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4006 ) 4007 4008 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4009 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4010 return None 4011 4012 elements: t.Dict[str, t.Any] = defaultdict(list) 4013 4014 if self._match(TokenType.ALL): 4015 elements["all"] = True 4016 elif self._match(TokenType.DISTINCT): 4017 elements["all"] = False 4018 4019 while True: 4020 index = self._index 4021 4022 elements["expressions"].extend( 4023 self._parse_csv( 4024 lambda: None 4025 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4026 else self._parse_assignment() 4027 ) 4028 ) 4029 4030 before_with_index = self._index 4031 with_prefix = self._match(TokenType.WITH) 4032 4033 if self._match(TokenType.ROLLUP): 4034 elements["rollup"].append( 4035 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4036 ) 4037 elif self._match(TokenType.CUBE): 4038 elements["cube"].append( 4039 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4040 ) 4041 elif self._match(TokenType.GROUPING_SETS): 4042 elements["grouping_sets"].append( 4043 self.expression( 4044 exp.GroupingSets, 4045 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4046 ) 4047 ) 4048 elif self._match_text_seq("TOTALS"): 4049 elements["totals"] = True # type: ignore 4050 4051 if before_with_index <= self._index <= before_with_index + 1: 4052 self._retreat(before_with_index) 4053 break 4054 4055 if index == self._index: 4056 break 4057 4058 return self.expression(exp.Group, **elements) # type: ignore 4059 4060 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4061 return self.expression( 4062 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4063 ) 4064 4065 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4066 if self._match(TokenType.L_PAREN): 4067 grouping_set = self._parse_csv(self._parse_column) 4068 self._match_r_paren() 4069 return self.expression(exp.Tuple, expressions=grouping_set) 4070 4071 return self._parse_column() 4072 4073 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4074 if not skip_having_token and not self._match(TokenType.HAVING): 4075 return None 4076 return self.expression(exp.Having, this=self._parse_assignment()) 4077 4078 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4079 if not self._match(TokenType.QUALIFY): 4080 return None 4081 return self.expression(exp.Qualify, this=self._parse_assignment()) 4082 4083 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4084 if skip_start_token: 4085 start = None 4086 elif self._match(TokenType.START_WITH): 4087 start = self._parse_assignment() 4088 else: 4089 return None 4090 4091 self._match(TokenType.CONNECT_BY) 4092 nocycle = self._match_text_seq("NOCYCLE") 4093 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4094 exp.Prior, this=self._parse_bitwise() 4095 ) 4096 connect = self._parse_assignment() 4097 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4098 4099 if not start and self._match(TokenType.START_WITH): 4100 start = self._parse_assignment() 4101 4102 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4103 4104 def _parse_name_as_expression(self) -> exp.Alias: 4105 return self.expression( 4106 exp.Alias, 4107 alias=self._parse_id_var(any_token=True), 4108 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4109 ) 4110 4111 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4112 if self._match_text_seq("INTERPOLATE"): 4113 return self._parse_wrapped_csv(self._parse_name_as_expression) 4114 return None 4115 4116 def _parse_order( 4117 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4118 ) -> t.Optional[exp.Expression]: 4119 siblings = None 4120 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4121 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4122 return this 4123 4124 siblings = True 4125 4126 return self.expression( 4127 exp.Order, 4128 this=this, 4129 expressions=self._parse_csv(self._parse_ordered), 4130 siblings=siblings, 4131 ) 4132 4133 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4134 if not self._match(token): 4135 return None 4136 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4137 4138 def _parse_ordered( 4139 self, parse_method: t.Optional[t.Callable] = None 4140 ) -> t.Optional[exp.Ordered]: 4141 this = parse_method() if parse_method else self._parse_assignment() 4142 if not this: 4143 return None 4144 4145 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4146 this = exp.var("ALL") 4147 4148 asc = self._match(TokenType.ASC) 4149 desc = self._match(TokenType.DESC) or (asc and False) 4150 4151 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4152 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4153 4154 nulls_first = is_nulls_first or False 4155 explicitly_null_ordered = is_nulls_first or is_nulls_last 4156 4157 if ( 4158 not explicitly_null_ordered 4159 and ( 4160 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4161 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4162 ) 4163 and self.dialect.NULL_ORDERING != "nulls_are_last" 4164 ): 4165 nulls_first = True 4166 4167 if self._match_text_seq("WITH", "FILL"): 4168 with_fill = self.expression( 4169 exp.WithFill, 4170 **{ # type: ignore 4171 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4172 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4173 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4174 "interpolate": self._parse_interpolate(), 4175 }, 4176 ) 4177 else: 4178 with_fill = None 4179 4180 return self.expression( 4181 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4182 ) 4183 4184 def _parse_limit( 4185 self, 4186 this: t.Optional[exp.Expression] = None, 4187 top: bool = False, 4188 skip_limit_token: bool = False, 4189 ) -> t.Optional[exp.Expression]: 4190 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4191 comments = self._prev_comments 4192 if top: 4193 limit_paren = self._match(TokenType.L_PAREN) 4194 expression = self._parse_term() if limit_paren else self._parse_number() 4195 4196 if limit_paren: 4197 self._match_r_paren() 4198 else: 4199 expression = self._parse_term() 4200 4201 if self._match(TokenType.COMMA): 4202 offset = expression 4203 expression = self._parse_term() 4204 else: 4205 offset = None 4206 4207 limit_exp = self.expression( 4208 exp.Limit, 4209 this=this, 4210 expression=expression, 4211 offset=offset, 4212 comments=comments, 4213 expressions=self._parse_limit_by(), 4214 ) 4215 4216 return limit_exp 4217 4218 if self._match(TokenType.FETCH): 4219 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4220 direction = self._prev.text.upper() if direction else "FIRST" 4221 4222 count = self._parse_field(tokens=self.FETCH_TOKENS) 4223 percent = self._match(TokenType.PERCENT) 4224 4225 self._match_set((TokenType.ROW, TokenType.ROWS)) 4226 4227 only = self._match_text_seq("ONLY") 4228 with_ties = self._match_text_seq("WITH", "TIES") 4229 4230 if only and with_ties: 4231 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4232 4233 return self.expression( 4234 exp.Fetch, 4235 direction=direction, 4236 count=count, 4237 percent=percent, 4238 with_ties=with_ties, 4239 ) 4240 4241 return this 4242 4243 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4244 if not self._match(TokenType.OFFSET): 4245 return this 4246 4247 count = self._parse_term() 4248 self._match_set((TokenType.ROW, TokenType.ROWS)) 4249 4250 return self.expression( 4251 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4252 ) 4253 4254 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4255 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4256 4257 def _parse_locks(self) -> t.List[exp.Lock]: 4258 locks = [] 4259 while True: 4260 if self._match_text_seq("FOR", "UPDATE"): 4261 update = True 4262 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4263 "LOCK", "IN", "SHARE", "MODE" 4264 ): 4265 update = False 4266 else: 4267 break 4268 4269 expressions = None 4270 if self._match_text_seq("OF"): 4271 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4272 4273 wait: t.Optional[bool | exp.Expression] = None 4274 if self._match_text_seq("NOWAIT"): 4275 wait = True 4276 elif self._match_text_seq("WAIT"): 4277 wait = self._parse_primary() 4278 elif self._match_text_seq("SKIP", "LOCKED"): 4279 wait = False 4280 4281 locks.append( 4282 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4283 ) 4284 4285 return locks 4286 4287 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4288 while this and self._match_set(self.SET_OPERATIONS): 4289 token_type = self._prev.token_type 4290 4291 if token_type == TokenType.UNION: 4292 operation: t.Type[exp.SetOperation] = exp.Union 4293 elif token_type == TokenType.EXCEPT: 4294 operation = exp.Except 4295 else: 4296 operation = exp.Intersect 4297 4298 comments = self._prev.comments 4299 4300 if self._match(TokenType.DISTINCT): 4301 distinct: t.Optional[bool] = True 4302 elif self._match(TokenType.ALL): 4303 distinct = False 4304 else: 4305 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4306 if distinct is None: 4307 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4308 4309 by_name = self._match_text_seq("BY", "NAME") 4310 expression = self._parse_select(nested=True, parse_set_operation=False) 4311 4312 this = self.expression( 4313 operation, 4314 comments=comments, 4315 this=this, 4316 distinct=distinct, 4317 by_name=by_name, 4318 expression=expression, 4319 ) 4320 4321 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4322 expression = this.expression 4323 4324 if expression: 4325 for arg in self.SET_OP_MODIFIERS: 4326 expr = expression.args.get(arg) 4327 if expr: 4328 this.set(arg, expr.pop()) 4329 4330 return this 4331 4332 def _parse_expression(self) -> t.Optional[exp.Expression]: 4333 return self._parse_alias(self._parse_assignment()) 4334 4335 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4336 this = self._parse_disjunction() 4337 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4338 # This allows us to parse <non-identifier token> := <expr> 4339 this = exp.column( 4340 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4341 ) 4342 4343 while self._match_set(self.ASSIGNMENT): 4344 this = self.expression( 4345 self.ASSIGNMENT[self._prev.token_type], 4346 this=this, 4347 comments=self._prev_comments, 4348 expression=self._parse_assignment(), 4349 ) 4350 4351 return this 4352 4353 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4354 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4355 4356 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4357 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4358 4359 def _parse_equality(self) -> t.Optional[exp.Expression]: 4360 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4361 4362 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4363 return self._parse_tokens(self._parse_range, self.COMPARISON) 4364 4365 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4366 this = this or self._parse_bitwise() 4367 negate = self._match(TokenType.NOT) 4368 4369 if self._match_set(self.RANGE_PARSERS): 4370 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4371 if not expression: 4372 return this 4373 4374 this = expression 4375 elif self._match(TokenType.ISNULL): 4376 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4377 4378 # Postgres supports ISNULL and NOTNULL for conditions. 4379 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4380 if self._match(TokenType.NOTNULL): 4381 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4382 this = self.expression(exp.Not, this=this) 4383 4384 if negate: 4385 this = self._negate_range(this) 4386 4387 if self._match(TokenType.IS): 4388 this = self._parse_is(this) 4389 4390 return this 4391 4392 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4393 if not this: 4394 return this 4395 4396 return self.expression(exp.Not, this=this) 4397 4398 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4399 index = self._index - 1 4400 negate = self._match(TokenType.NOT) 4401 4402 if self._match_text_seq("DISTINCT", "FROM"): 4403 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4404 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4405 4406 if self._match(TokenType.JSON): 4407 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4408 4409 if self._match_text_seq("WITH"): 4410 _with = True 4411 elif self._match_text_seq("WITHOUT"): 4412 _with = False 4413 else: 4414 _with = None 4415 4416 unique = self._match(TokenType.UNIQUE) 4417 self._match_text_seq("KEYS") 4418 expression: t.Optional[exp.Expression] = self.expression( 4419 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4420 ) 4421 else: 4422 expression = self._parse_primary() or self._parse_null() 4423 if not expression: 4424 self._retreat(index) 4425 return None 4426 4427 this = self.expression(exp.Is, this=this, expression=expression) 4428 return self.expression(exp.Not, this=this) if negate else this 4429 4430 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4431 unnest = self._parse_unnest(with_alias=False) 4432 if unnest: 4433 this = self.expression(exp.In, this=this, unnest=unnest) 4434 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4435 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4436 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4437 4438 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4439 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4440 else: 4441 this = self.expression(exp.In, this=this, expressions=expressions) 4442 4443 if matched_l_paren: 4444 self._match_r_paren(this) 4445 elif not self._match(TokenType.R_BRACKET, expression=this): 4446 self.raise_error("Expecting ]") 4447 else: 4448 this = self.expression(exp.In, this=this, field=self._parse_field()) 4449 4450 return this 4451 4452 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4453 low = self._parse_bitwise() 4454 self._match(TokenType.AND) 4455 high = self._parse_bitwise() 4456 return self.expression(exp.Between, this=this, low=low, high=high) 4457 4458 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4459 if not self._match(TokenType.ESCAPE): 4460 return this 4461 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4462 4463 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4464 index = self._index 4465 4466 if not self._match(TokenType.INTERVAL) and match_interval: 4467 return None 4468 4469 if self._match(TokenType.STRING, advance=False): 4470 this = self._parse_primary() 4471 else: 4472 this = self._parse_term() 4473 4474 if not this or ( 4475 isinstance(this, exp.Column) 4476 and not this.table 4477 and not this.this.quoted 4478 and this.name.upper() == "IS" 4479 ): 4480 self._retreat(index) 4481 return None 4482 4483 unit = self._parse_function() or ( 4484 not self._match(TokenType.ALIAS, advance=False) 4485 and self._parse_var(any_token=True, upper=True) 4486 ) 4487 4488 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4489 # each INTERVAL expression into this canonical form so it's easy to transpile 4490 if this and this.is_number: 4491 this = exp.Literal.string(this.to_py()) 4492 elif this and this.is_string: 4493 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4494 if len(parts) == 1: 4495 if unit: 4496 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4497 self._retreat(self._index - 1) 4498 4499 this = exp.Literal.string(parts[0][0]) 4500 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4501 4502 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4503 unit = self.expression( 4504 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4505 ) 4506 4507 interval = self.expression(exp.Interval, this=this, unit=unit) 4508 4509 index = self._index 4510 self._match(TokenType.PLUS) 4511 4512 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4513 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4514 return self.expression( 4515 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4516 ) 4517 4518 self._retreat(index) 4519 return interval 4520 4521 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4522 this = self._parse_term() 4523 4524 while True: 4525 if self._match_set(self.BITWISE): 4526 this = self.expression( 4527 self.BITWISE[self._prev.token_type], 4528 this=this, 4529 expression=self._parse_term(), 4530 ) 4531 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4532 this = self.expression( 4533 exp.DPipe, 4534 this=this, 4535 expression=self._parse_term(), 4536 safe=not self.dialect.STRICT_STRING_CONCAT, 4537 ) 4538 elif self._match(TokenType.DQMARK): 4539 this = self.expression( 4540 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4541 ) 4542 elif self._match_pair(TokenType.LT, TokenType.LT): 4543 this = self.expression( 4544 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4545 ) 4546 elif self._match_pair(TokenType.GT, TokenType.GT): 4547 this = self.expression( 4548 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4549 ) 4550 else: 4551 break 4552 4553 return this 4554 4555 def _parse_term(self) -> t.Optional[exp.Expression]: 4556 this = self._parse_factor() 4557 4558 while self._match_set(self.TERM): 4559 klass = self.TERM[self._prev.token_type] 4560 comments = self._prev_comments 4561 expression = self._parse_factor() 4562 4563 this = self.expression(klass, this=this, comments=comments, expression=expression) 4564 4565 if isinstance(this, exp.Collate): 4566 expr = this.expression 4567 4568 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4569 # fallback to Identifier / Var 4570 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4571 ident = expr.this 4572 if isinstance(ident, exp.Identifier): 4573 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4574 4575 return this 4576 4577 def _parse_factor(self) -> t.Optional[exp.Expression]: 4578 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4579 this = parse_method() 4580 4581 while self._match_set(self.FACTOR): 4582 klass = self.FACTOR[self._prev.token_type] 4583 comments = self._prev_comments 4584 expression = parse_method() 4585 4586 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4587 self._retreat(self._index - 1) 4588 return this 4589 4590 this = self.expression(klass, this=this, comments=comments, expression=expression) 4591 4592 if isinstance(this, exp.Div): 4593 this.args["typed"] = self.dialect.TYPED_DIVISION 4594 this.args["safe"] = self.dialect.SAFE_DIVISION 4595 4596 return this 4597 4598 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4599 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4600 4601 def _parse_unary(self) -> t.Optional[exp.Expression]: 4602 if self._match_set(self.UNARY_PARSERS): 4603 return self.UNARY_PARSERS[self._prev.token_type](self) 4604 return self._parse_at_time_zone(self._parse_type()) 4605 4606 def _parse_type( 4607 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4608 ) -> t.Optional[exp.Expression]: 4609 interval = parse_interval and self._parse_interval() 4610 if interval: 4611 return interval 4612 4613 index = self._index 4614 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4615 4616 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4617 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4618 if isinstance(data_type, exp.Cast): 4619 # This constructor can contain ops directly after it, for instance struct unnesting: 4620 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4621 return self._parse_column_ops(data_type) 4622 4623 if data_type: 4624 index2 = self._index 4625 this = self._parse_primary() 4626 4627 if isinstance(this, exp.Literal): 4628 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4629 if parser: 4630 return parser(self, this, data_type) 4631 4632 return self.expression(exp.Cast, this=this, to=data_type) 4633 4634 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4635 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4636 # 4637 # If the index difference here is greater than 1, that means the parser itself must have 4638 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4639 # 4640 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4641 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4642 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4643 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4644 # 4645 # In these cases, we don't really want to return the converted type, but instead retreat 4646 # and try to parse a Column or Identifier in the section below. 4647 if data_type.expressions and index2 - index > 1: 4648 self._retreat(index2) 4649 return self._parse_column_ops(data_type) 4650 4651 self._retreat(index) 4652 4653 if fallback_to_identifier: 4654 return self._parse_id_var() 4655 4656 this = self._parse_column() 4657 return this and self._parse_column_ops(this) 4658 4659 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4660 this = self._parse_type() 4661 if not this: 4662 return None 4663 4664 if isinstance(this, exp.Column) and not this.table: 4665 this = exp.var(this.name.upper()) 4666 4667 return self.expression( 4668 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4669 ) 4670 4671 def _parse_types( 4672 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4673 ) -> t.Optional[exp.Expression]: 4674 index = self._index 4675 4676 this: t.Optional[exp.Expression] = None 4677 prefix = self._match_text_seq("SYSUDTLIB", ".") 4678 4679 if not self._match_set(self.TYPE_TOKENS): 4680 identifier = allow_identifiers and self._parse_id_var( 4681 any_token=False, tokens=(TokenType.VAR,) 4682 ) 4683 if isinstance(identifier, exp.Identifier): 4684 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4685 4686 if len(tokens) != 1: 4687 self.raise_error("Unexpected identifier", self._prev) 4688 4689 if tokens[0].token_type in self.TYPE_TOKENS: 4690 self._prev = tokens[0] 4691 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4692 type_name = identifier.name 4693 4694 while self._match(TokenType.DOT): 4695 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4696 4697 this = exp.DataType.build(type_name, udt=True) 4698 else: 4699 self._retreat(self._index - 1) 4700 return None 4701 else: 4702 return None 4703 4704 type_token = self._prev.token_type 4705 4706 if type_token == TokenType.PSEUDO_TYPE: 4707 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4708 4709 if type_token == TokenType.OBJECT_IDENTIFIER: 4710 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4711 4712 # https://materialize.com/docs/sql/types/map/ 4713 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4714 key_type = self._parse_types( 4715 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4716 ) 4717 if not self._match(TokenType.FARROW): 4718 self._retreat(index) 4719 return None 4720 4721 value_type = self._parse_types( 4722 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4723 ) 4724 if not self._match(TokenType.R_BRACKET): 4725 self._retreat(index) 4726 return None 4727 4728 return exp.DataType( 4729 this=exp.DataType.Type.MAP, 4730 expressions=[key_type, value_type], 4731 nested=True, 4732 prefix=prefix, 4733 ) 4734 4735 nested = type_token in self.NESTED_TYPE_TOKENS 4736 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4737 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4738 expressions = None 4739 maybe_func = False 4740 4741 if self._match(TokenType.L_PAREN): 4742 if is_struct: 4743 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4744 elif nested: 4745 expressions = self._parse_csv( 4746 lambda: self._parse_types( 4747 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4748 ) 4749 ) 4750 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4751 this = expressions[0] 4752 this.set("nullable", True) 4753 self._match_r_paren() 4754 return this 4755 elif type_token in self.ENUM_TYPE_TOKENS: 4756 expressions = self._parse_csv(self._parse_equality) 4757 elif is_aggregate: 4758 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4759 any_token=False, tokens=(TokenType.VAR,) 4760 ) 4761 if not func_or_ident or not self._match(TokenType.COMMA): 4762 return None 4763 expressions = self._parse_csv( 4764 lambda: self._parse_types( 4765 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4766 ) 4767 ) 4768 expressions.insert(0, func_or_ident) 4769 else: 4770 expressions = self._parse_csv(self._parse_type_size) 4771 4772 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4773 if type_token == TokenType.VECTOR and len(expressions) == 2: 4774 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4775 4776 if not expressions or not self._match(TokenType.R_PAREN): 4777 self._retreat(index) 4778 return None 4779 4780 maybe_func = True 4781 4782 values: t.Optional[t.List[exp.Expression]] = None 4783 4784 if nested and self._match(TokenType.LT): 4785 if is_struct: 4786 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4787 else: 4788 expressions = self._parse_csv( 4789 lambda: self._parse_types( 4790 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4791 ) 4792 ) 4793 4794 if not self._match(TokenType.GT): 4795 self.raise_error("Expecting >") 4796 4797 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4798 values = self._parse_csv(self._parse_assignment) 4799 if not values and is_struct: 4800 values = None 4801 self._retreat(self._index - 1) 4802 else: 4803 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4804 4805 if type_token in self.TIMESTAMPS: 4806 if self._match_text_seq("WITH", "TIME", "ZONE"): 4807 maybe_func = False 4808 tz_type = ( 4809 exp.DataType.Type.TIMETZ 4810 if type_token in self.TIMES 4811 else exp.DataType.Type.TIMESTAMPTZ 4812 ) 4813 this = exp.DataType(this=tz_type, expressions=expressions) 4814 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4815 maybe_func = False 4816 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4817 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4818 maybe_func = False 4819 elif type_token == TokenType.INTERVAL: 4820 unit = self._parse_var(upper=True) 4821 if unit: 4822 if self._match_text_seq("TO"): 4823 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4824 4825 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4826 else: 4827 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4828 4829 if maybe_func and check_func: 4830 index2 = self._index 4831 peek = self._parse_string() 4832 4833 if not peek: 4834 self._retreat(index) 4835 return None 4836 4837 self._retreat(index2) 4838 4839 if not this: 4840 if self._match_text_seq("UNSIGNED"): 4841 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4842 if not unsigned_type_token: 4843 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4844 4845 type_token = unsigned_type_token or type_token 4846 4847 this = exp.DataType( 4848 this=exp.DataType.Type[type_token.value], 4849 expressions=expressions, 4850 nested=nested, 4851 prefix=prefix, 4852 ) 4853 4854 # Empty arrays/structs are allowed 4855 if values is not None: 4856 cls = exp.Struct if is_struct else exp.Array 4857 this = exp.cast(cls(expressions=values), this, copy=False) 4858 4859 elif expressions: 4860 this.set("expressions", expressions) 4861 4862 # https://materialize.com/docs/sql/types/list/#type-name 4863 while self._match(TokenType.LIST): 4864 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4865 4866 index = self._index 4867 4868 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4869 matched_array = self._match(TokenType.ARRAY) 4870 4871 while self._curr: 4872 datatype_token = self._prev.token_type 4873 matched_l_bracket = self._match(TokenType.L_BRACKET) 4874 if not matched_l_bracket and not matched_array: 4875 break 4876 4877 matched_array = False 4878 values = self._parse_csv(self._parse_assignment) or None 4879 if ( 4880 values 4881 and not schema 4882 and ( 4883 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4884 ) 4885 ): 4886 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4887 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4888 self._retreat(index) 4889 break 4890 4891 this = exp.DataType( 4892 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4893 ) 4894 self._match(TokenType.R_BRACKET) 4895 4896 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4897 converter = self.TYPE_CONVERTERS.get(this.this) 4898 if converter: 4899 this = converter(t.cast(exp.DataType, this)) 4900 4901 return this 4902 4903 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4904 index = self._index 4905 4906 if ( 4907 self._curr 4908 and self._next 4909 and self._curr.token_type in self.TYPE_TOKENS 4910 and self._next.token_type in self.TYPE_TOKENS 4911 ): 4912 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4913 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4914 this = self._parse_id_var() 4915 else: 4916 this = ( 4917 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4918 or self._parse_id_var() 4919 ) 4920 4921 self._match(TokenType.COLON) 4922 4923 if ( 4924 type_required 4925 and not isinstance(this, exp.DataType) 4926 and not self._match_set(self.TYPE_TOKENS, advance=False) 4927 ): 4928 self._retreat(index) 4929 return self._parse_types() 4930 4931 return self._parse_column_def(this) 4932 4933 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4934 if not self._match_text_seq("AT", "TIME", "ZONE"): 4935 return this 4936 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4937 4938 def _parse_column(self) -> t.Optional[exp.Expression]: 4939 this = self._parse_column_reference() 4940 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4941 4942 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4943 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4944 4945 return column 4946 4947 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4948 this = self._parse_field() 4949 if ( 4950 not this 4951 and self._match(TokenType.VALUES, advance=False) 4952 and self.VALUES_FOLLOWED_BY_PAREN 4953 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4954 ): 4955 this = self._parse_id_var() 4956 4957 if isinstance(this, exp.Identifier): 4958 # We bubble up comments from the Identifier to the Column 4959 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4960 4961 return this 4962 4963 def _parse_colon_as_variant_extract( 4964 self, this: t.Optional[exp.Expression] 4965 ) -> t.Optional[exp.Expression]: 4966 casts = [] 4967 json_path = [] 4968 escape = None 4969 4970 while self._match(TokenType.COLON): 4971 start_index = self._index 4972 4973 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4974 path = self._parse_column_ops( 4975 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4976 ) 4977 4978 # The cast :: operator has a lower precedence than the extraction operator :, so 4979 # we rearrange the AST appropriately to avoid casting the JSON path 4980 while isinstance(path, exp.Cast): 4981 casts.append(path.to) 4982 path = path.this 4983 4984 if casts: 4985 dcolon_offset = next( 4986 i 4987 for i, t in enumerate(self._tokens[start_index:]) 4988 if t.token_type == TokenType.DCOLON 4989 ) 4990 end_token = self._tokens[start_index + dcolon_offset - 1] 4991 else: 4992 end_token = self._prev 4993 4994 if path: 4995 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 4996 # it'll roundtrip to a string literal in GET_PATH 4997 if isinstance(path, exp.Identifier) and path.quoted: 4998 escape = True 4999 5000 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5001 5002 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5003 # Databricks transforms it back to the colon/dot notation 5004 if json_path: 5005 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5006 5007 if json_path_expr: 5008 json_path_expr.set("escape", escape) 5009 5010 this = self.expression( 5011 exp.JSONExtract, 5012 this=this, 5013 expression=json_path_expr, 5014 variant_extract=True, 5015 ) 5016 5017 while casts: 5018 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5019 5020 return this 5021 5022 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5023 return self._parse_types() 5024 5025 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5026 this = self._parse_bracket(this) 5027 5028 while self._match_set(self.COLUMN_OPERATORS): 5029 op_token = self._prev.token_type 5030 op = self.COLUMN_OPERATORS.get(op_token) 5031 5032 if op_token == TokenType.DCOLON: 5033 field = self._parse_dcolon() 5034 if not field: 5035 self.raise_error("Expected type") 5036 elif op and self._curr: 5037 field = self._parse_column_reference() 5038 else: 5039 field = self._parse_field(any_token=True, anonymous_func=True) 5040 5041 if isinstance(field, exp.Func) and this: 5042 # bigquery allows function calls like x.y.count(...) 5043 # SAFE.SUBSTR(...) 5044 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5045 this = exp.replace_tree( 5046 this, 5047 lambda n: ( 5048 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5049 if n.table 5050 else n.this 5051 ) 5052 if isinstance(n, exp.Column) 5053 else n, 5054 ) 5055 5056 if op: 5057 this = op(self, this, field) 5058 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5059 this = self.expression( 5060 exp.Column, 5061 this=field, 5062 table=this.this, 5063 db=this.args.get("table"), 5064 catalog=this.args.get("db"), 5065 ) 5066 else: 5067 this = self.expression(exp.Dot, this=this, expression=field) 5068 5069 this = self._parse_bracket(this) 5070 5071 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5072 5073 def _parse_primary(self) -> t.Optional[exp.Expression]: 5074 if self._match_set(self.PRIMARY_PARSERS): 5075 token_type = self._prev.token_type 5076 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5077 5078 if token_type == TokenType.STRING: 5079 expressions = [primary] 5080 while self._match(TokenType.STRING): 5081 expressions.append(exp.Literal.string(self._prev.text)) 5082 5083 if len(expressions) > 1: 5084 return self.expression(exp.Concat, expressions=expressions) 5085 5086 return primary 5087 5088 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5089 return exp.Literal.number(f"0.{self._prev.text}") 5090 5091 if self._match(TokenType.L_PAREN): 5092 comments = self._prev_comments 5093 query = self._parse_select() 5094 5095 if query: 5096 expressions = [query] 5097 else: 5098 expressions = self._parse_expressions() 5099 5100 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5101 5102 if not this and self._match(TokenType.R_PAREN, advance=False): 5103 this = self.expression(exp.Tuple) 5104 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5105 this = self._parse_subquery(this=this, parse_alias=False) 5106 elif isinstance(this, exp.Subquery): 5107 this = self._parse_subquery( 5108 this=self._parse_set_operations(this), parse_alias=False 5109 ) 5110 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5111 this = self.expression(exp.Tuple, expressions=expressions) 5112 else: 5113 this = self.expression(exp.Paren, this=this) 5114 5115 if this: 5116 this.add_comments(comments) 5117 5118 self._match_r_paren(expression=this) 5119 return this 5120 5121 return None 5122 5123 def _parse_field( 5124 self, 5125 any_token: bool = False, 5126 tokens: t.Optional[t.Collection[TokenType]] = None, 5127 anonymous_func: bool = False, 5128 ) -> t.Optional[exp.Expression]: 5129 if anonymous_func: 5130 field = ( 5131 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5132 or self._parse_primary() 5133 ) 5134 else: 5135 field = self._parse_primary() or self._parse_function( 5136 anonymous=anonymous_func, any_token=any_token 5137 ) 5138 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5139 5140 def _parse_function( 5141 self, 5142 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5143 anonymous: bool = False, 5144 optional_parens: bool = True, 5145 any_token: bool = False, 5146 ) -> t.Optional[exp.Expression]: 5147 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5148 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5149 fn_syntax = False 5150 if ( 5151 self._match(TokenType.L_BRACE, advance=False) 5152 and self._next 5153 and self._next.text.upper() == "FN" 5154 ): 5155 self._advance(2) 5156 fn_syntax = True 5157 5158 func = self._parse_function_call( 5159 functions=functions, 5160 anonymous=anonymous, 5161 optional_parens=optional_parens, 5162 any_token=any_token, 5163 ) 5164 5165 if fn_syntax: 5166 self._match(TokenType.R_BRACE) 5167 5168 return func 5169 5170 def _parse_function_call( 5171 self, 5172 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5173 anonymous: bool = False, 5174 optional_parens: bool = True, 5175 any_token: bool = False, 5176 ) -> t.Optional[exp.Expression]: 5177 if not self._curr: 5178 return None 5179 5180 comments = self._curr.comments 5181 token_type = self._curr.token_type 5182 this = self._curr.text 5183 upper = this.upper() 5184 5185 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5186 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5187 self._advance() 5188 return self._parse_window(parser(self)) 5189 5190 if not self._next or self._next.token_type != TokenType.L_PAREN: 5191 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5192 self._advance() 5193 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5194 5195 return None 5196 5197 if any_token: 5198 if token_type in self.RESERVED_TOKENS: 5199 return None 5200 elif token_type not in self.FUNC_TOKENS: 5201 return None 5202 5203 self._advance(2) 5204 5205 parser = self.FUNCTION_PARSERS.get(upper) 5206 if parser and not anonymous: 5207 this = parser(self) 5208 else: 5209 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5210 5211 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5212 this = self.expression(subquery_predicate, this=self._parse_select()) 5213 self._match_r_paren() 5214 return this 5215 5216 if functions is None: 5217 functions = self.FUNCTIONS 5218 5219 function = functions.get(upper) 5220 5221 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5222 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5223 5224 if alias: 5225 args = self._kv_to_prop_eq(args) 5226 5227 if function and not anonymous: 5228 if "dialect" in function.__code__.co_varnames: 5229 func = function(args, dialect=self.dialect) 5230 else: 5231 func = function(args) 5232 5233 func = self.validate_expression(func, args) 5234 if not self.dialect.NORMALIZE_FUNCTIONS: 5235 func.meta["name"] = this 5236 5237 this = func 5238 else: 5239 if token_type == TokenType.IDENTIFIER: 5240 this = exp.Identifier(this=this, quoted=True) 5241 this = self.expression(exp.Anonymous, this=this, expressions=args) 5242 5243 if isinstance(this, exp.Expression): 5244 this.add_comments(comments) 5245 5246 self._match_r_paren(this) 5247 return self._parse_window(this) 5248 5249 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5250 return expression 5251 5252 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5253 transformed = [] 5254 5255 for index, e in enumerate(expressions): 5256 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5257 if isinstance(e, exp.Alias): 5258 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5259 5260 if not isinstance(e, exp.PropertyEQ): 5261 e = self.expression( 5262 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5263 ) 5264 5265 if isinstance(e.this, exp.Column): 5266 e.this.replace(e.this.this) 5267 else: 5268 e = self._to_prop_eq(e, index) 5269 5270 transformed.append(e) 5271 5272 return transformed 5273 5274 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5275 return self._parse_column_def(self._parse_id_var()) 5276 5277 def _parse_user_defined_function( 5278 self, kind: t.Optional[TokenType] = None 5279 ) -> t.Optional[exp.Expression]: 5280 this = self._parse_id_var() 5281 5282 while self._match(TokenType.DOT): 5283 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5284 5285 if not self._match(TokenType.L_PAREN): 5286 return this 5287 5288 expressions = self._parse_csv(self._parse_function_parameter) 5289 self._match_r_paren() 5290 return self.expression( 5291 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5292 ) 5293 5294 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5295 literal = self._parse_primary() 5296 if literal: 5297 return self.expression(exp.Introducer, this=token.text, expression=literal) 5298 5299 return self.expression(exp.Identifier, this=token.text) 5300 5301 def _parse_session_parameter(self) -> exp.SessionParameter: 5302 kind = None 5303 this = self._parse_id_var() or self._parse_primary() 5304 5305 if this and self._match(TokenType.DOT): 5306 kind = this.name 5307 this = self._parse_var() or self._parse_primary() 5308 5309 return self.expression(exp.SessionParameter, this=this, kind=kind) 5310 5311 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5312 return self._parse_id_var() 5313 5314 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5315 index = self._index 5316 5317 if self._match(TokenType.L_PAREN): 5318 expressions = t.cast( 5319 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5320 ) 5321 5322 if not self._match(TokenType.R_PAREN): 5323 self._retreat(index) 5324 else: 5325 expressions = [self._parse_lambda_arg()] 5326 5327 if self._match_set(self.LAMBDAS): 5328 return self.LAMBDAS[self._prev.token_type](self, expressions) 5329 5330 self._retreat(index) 5331 5332 this: t.Optional[exp.Expression] 5333 5334 if self._match(TokenType.DISTINCT): 5335 this = self.expression( 5336 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5337 ) 5338 else: 5339 this = self._parse_select_or_expression(alias=alias) 5340 5341 return self._parse_limit( 5342 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5343 ) 5344 5345 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5346 index = self._index 5347 if not self._match(TokenType.L_PAREN): 5348 return this 5349 5350 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5351 # expr can be of both types 5352 if self._match_set(self.SELECT_START_TOKENS): 5353 self._retreat(index) 5354 return this 5355 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5356 self._match_r_paren() 5357 return self.expression(exp.Schema, this=this, expressions=args) 5358 5359 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5360 return self._parse_column_def(self._parse_field(any_token=True)) 5361 5362 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5363 # column defs are not really columns, they're identifiers 5364 if isinstance(this, exp.Column): 5365 this = this.this 5366 5367 kind = self._parse_types(schema=True) 5368 5369 if self._match_text_seq("FOR", "ORDINALITY"): 5370 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5371 5372 constraints: t.List[exp.Expression] = [] 5373 5374 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5375 ("ALIAS", "MATERIALIZED") 5376 ): 5377 persisted = self._prev.text.upper() == "MATERIALIZED" 5378 constraint_kind = exp.ComputedColumnConstraint( 5379 this=self._parse_assignment(), 5380 persisted=persisted or self._match_text_seq("PERSISTED"), 5381 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5382 ) 5383 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5384 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5385 self._match(TokenType.ALIAS) 5386 constraints.append( 5387 self.expression( 5388 exp.ColumnConstraint, 5389 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5390 ) 5391 ) 5392 5393 while True: 5394 constraint = self._parse_column_constraint() 5395 if not constraint: 5396 break 5397 constraints.append(constraint) 5398 5399 if not kind and not constraints: 5400 return this 5401 5402 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5403 5404 def _parse_auto_increment( 5405 self, 5406 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5407 start = None 5408 increment = None 5409 5410 if self._match(TokenType.L_PAREN, advance=False): 5411 args = self._parse_wrapped_csv(self._parse_bitwise) 5412 start = seq_get(args, 0) 5413 increment = seq_get(args, 1) 5414 elif self._match_text_seq("START"): 5415 start = self._parse_bitwise() 5416 self._match_text_seq("INCREMENT") 5417 increment = self._parse_bitwise() 5418 5419 if start and increment: 5420 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5421 5422 return exp.AutoIncrementColumnConstraint() 5423 5424 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5425 if not self._match_text_seq("REFRESH"): 5426 self._retreat(self._index - 1) 5427 return None 5428 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5429 5430 def _parse_compress(self) -> exp.CompressColumnConstraint: 5431 if self._match(TokenType.L_PAREN, advance=False): 5432 return self.expression( 5433 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5434 ) 5435 5436 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5437 5438 def _parse_generated_as_identity( 5439 self, 5440 ) -> ( 5441 exp.GeneratedAsIdentityColumnConstraint 5442 | exp.ComputedColumnConstraint 5443 | exp.GeneratedAsRowColumnConstraint 5444 ): 5445 if self._match_text_seq("BY", "DEFAULT"): 5446 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5447 this = self.expression( 5448 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5449 ) 5450 else: 5451 self._match_text_seq("ALWAYS") 5452 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5453 5454 self._match(TokenType.ALIAS) 5455 5456 if self._match_text_seq("ROW"): 5457 start = self._match_text_seq("START") 5458 if not start: 5459 self._match(TokenType.END) 5460 hidden = self._match_text_seq("HIDDEN") 5461 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5462 5463 identity = self._match_text_seq("IDENTITY") 5464 5465 if self._match(TokenType.L_PAREN): 5466 if self._match(TokenType.START_WITH): 5467 this.set("start", self._parse_bitwise()) 5468 if self._match_text_seq("INCREMENT", "BY"): 5469 this.set("increment", self._parse_bitwise()) 5470 if self._match_text_seq("MINVALUE"): 5471 this.set("minvalue", self._parse_bitwise()) 5472 if self._match_text_seq("MAXVALUE"): 5473 this.set("maxvalue", self._parse_bitwise()) 5474 5475 if self._match_text_seq("CYCLE"): 5476 this.set("cycle", True) 5477 elif self._match_text_seq("NO", "CYCLE"): 5478 this.set("cycle", False) 5479 5480 if not identity: 5481 this.set("expression", self._parse_range()) 5482 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5483 args = self._parse_csv(self._parse_bitwise) 5484 this.set("start", seq_get(args, 0)) 5485 this.set("increment", seq_get(args, 1)) 5486 5487 self._match_r_paren() 5488 5489 return this 5490 5491 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5492 self._match_text_seq("LENGTH") 5493 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5494 5495 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5496 if self._match_text_seq("NULL"): 5497 return self.expression(exp.NotNullColumnConstraint) 5498 if self._match_text_seq("CASESPECIFIC"): 5499 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5500 if self._match_text_seq("FOR", "REPLICATION"): 5501 return self.expression(exp.NotForReplicationColumnConstraint) 5502 5503 # Unconsume the `NOT` token 5504 self._retreat(self._index - 1) 5505 return None 5506 5507 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5508 if self._match(TokenType.CONSTRAINT): 5509 this = self._parse_id_var() 5510 else: 5511 this = None 5512 5513 if self._match_texts(self.CONSTRAINT_PARSERS): 5514 return self.expression( 5515 exp.ColumnConstraint, 5516 this=this, 5517 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5518 ) 5519 5520 return this 5521 5522 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5523 if not self._match(TokenType.CONSTRAINT): 5524 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5525 5526 return self.expression( 5527 exp.Constraint, 5528 this=self._parse_id_var(), 5529 expressions=self._parse_unnamed_constraints(), 5530 ) 5531 5532 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5533 constraints = [] 5534 while True: 5535 constraint = self._parse_unnamed_constraint() or self._parse_function() 5536 if not constraint: 5537 break 5538 constraints.append(constraint) 5539 5540 return constraints 5541 5542 def _parse_unnamed_constraint( 5543 self, constraints: t.Optional[t.Collection[str]] = None 5544 ) -> t.Optional[exp.Expression]: 5545 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5546 constraints or self.CONSTRAINT_PARSERS 5547 ): 5548 return None 5549 5550 constraint = self._prev.text.upper() 5551 if constraint not in self.CONSTRAINT_PARSERS: 5552 self.raise_error(f"No parser found for schema constraint {constraint}.") 5553 5554 return self.CONSTRAINT_PARSERS[constraint](self) 5555 5556 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5557 return self._parse_id_var(any_token=False) 5558 5559 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5560 self._match_text_seq("KEY") 5561 return self.expression( 5562 exp.UniqueColumnConstraint, 5563 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5564 this=self._parse_schema(self._parse_unique_key()), 5565 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5566 on_conflict=self._parse_on_conflict(), 5567 ) 5568 5569 def _parse_key_constraint_options(self) -> t.List[str]: 5570 options = [] 5571 while True: 5572 if not self._curr: 5573 break 5574 5575 if self._match(TokenType.ON): 5576 action = None 5577 on = self._advance_any() and self._prev.text 5578 5579 if self._match_text_seq("NO", "ACTION"): 5580 action = "NO ACTION" 5581 elif self._match_text_seq("CASCADE"): 5582 action = "CASCADE" 5583 elif self._match_text_seq("RESTRICT"): 5584 action = "RESTRICT" 5585 elif self._match_pair(TokenType.SET, TokenType.NULL): 5586 action = "SET NULL" 5587 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5588 action = "SET DEFAULT" 5589 else: 5590 self.raise_error("Invalid key constraint") 5591 5592 options.append(f"ON {on} {action}") 5593 else: 5594 var = self._parse_var_from_options( 5595 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5596 ) 5597 if not var: 5598 break 5599 options.append(var.name) 5600 5601 return options 5602 5603 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5604 if match and not self._match(TokenType.REFERENCES): 5605 return None 5606 5607 expressions = None 5608 this = self._parse_table(schema=True) 5609 options = self._parse_key_constraint_options() 5610 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5611 5612 def _parse_foreign_key(self) -> exp.ForeignKey: 5613 expressions = self._parse_wrapped_id_vars() 5614 reference = self._parse_references() 5615 options = {} 5616 5617 while self._match(TokenType.ON): 5618 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5619 self.raise_error("Expected DELETE or UPDATE") 5620 5621 kind = self._prev.text.lower() 5622 5623 if self._match_text_seq("NO", "ACTION"): 5624 action = "NO ACTION" 5625 elif self._match(TokenType.SET): 5626 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5627 action = "SET " + self._prev.text.upper() 5628 else: 5629 self._advance() 5630 action = self._prev.text.upper() 5631 5632 options[kind] = action 5633 5634 return self.expression( 5635 exp.ForeignKey, 5636 expressions=expressions, 5637 reference=reference, 5638 **options, # type: ignore 5639 ) 5640 5641 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5642 return self._parse_field() 5643 5644 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5645 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5646 self._retreat(self._index - 1) 5647 return None 5648 5649 id_vars = self._parse_wrapped_id_vars() 5650 return self.expression( 5651 exp.PeriodForSystemTimeConstraint, 5652 this=seq_get(id_vars, 0), 5653 expression=seq_get(id_vars, 1), 5654 ) 5655 5656 def _parse_primary_key( 5657 self, wrapped_optional: bool = False, in_props: bool = False 5658 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5659 desc = ( 5660 self._match_set((TokenType.ASC, TokenType.DESC)) 5661 and self._prev.token_type == TokenType.DESC 5662 ) 5663 5664 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5665 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5666 5667 expressions = self._parse_wrapped_csv( 5668 self._parse_primary_key_part, optional=wrapped_optional 5669 ) 5670 options = self._parse_key_constraint_options() 5671 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5672 5673 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5674 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5675 5676 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5677 """ 5678 Parses a datetime column in ODBC format. We parse the column into the corresponding 5679 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5680 same as we did for `DATE('yyyy-mm-dd')`. 5681 5682 Reference: 5683 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5684 """ 5685 self._match(TokenType.VAR) 5686 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5687 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5688 if not self._match(TokenType.R_BRACE): 5689 self.raise_error("Expected }") 5690 return expression 5691 5692 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5693 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5694 return this 5695 5696 bracket_kind = self._prev.token_type 5697 if ( 5698 bracket_kind == TokenType.L_BRACE 5699 and self._curr 5700 and self._curr.token_type == TokenType.VAR 5701 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5702 ): 5703 return self._parse_odbc_datetime_literal() 5704 5705 expressions = self._parse_csv( 5706 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5707 ) 5708 5709 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5710 self.raise_error("Expected ]") 5711 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5712 self.raise_error("Expected }") 5713 5714 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5715 if bracket_kind == TokenType.L_BRACE: 5716 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5717 elif not this: 5718 this = build_array_constructor( 5719 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5720 ) 5721 else: 5722 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5723 if constructor_type: 5724 return build_array_constructor( 5725 constructor_type, 5726 args=expressions, 5727 bracket_kind=bracket_kind, 5728 dialect=self.dialect, 5729 ) 5730 5731 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5732 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5733 5734 self._add_comments(this) 5735 return self._parse_bracket(this) 5736 5737 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5738 if self._match(TokenType.COLON): 5739 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5740 return this 5741 5742 def _parse_case(self) -> t.Optional[exp.Expression]: 5743 ifs = [] 5744 default = None 5745 5746 comments = self._prev_comments 5747 expression = self._parse_assignment() 5748 5749 while self._match(TokenType.WHEN): 5750 this = self._parse_assignment() 5751 self._match(TokenType.THEN) 5752 then = self._parse_assignment() 5753 ifs.append(self.expression(exp.If, this=this, true=then)) 5754 5755 if self._match(TokenType.ELSE): 5756 default = self._parse_assignment() 5757 5758 if not self._match(TokenType.END): 5759 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5760 default = exp.column("interval") 5761 else: 5762 self.raise_error("Expected END after CASE", self._prev) 5763 5764 return self.expression( 5765 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5766 ) 5767 5768 def _parse_if(self) -> t.Optional[exp.Expression]: 5769 if self._match(TokenType.L_PAREN): 5770 args = self._parse_csv(self._parse_assignment) 5771 this = self.validate_expression(exp.If.from_arg_list(args), args) 5772 self._match_r_paren() 5773 else: 5774 index = self._index - 1 5775 5776 if self.NO_PAREN_IF_COMMANDS and index == 0: 5777 return self._parse_as_command(self._prev) 5778 5779 condition = self._parse_assignment() 5780 5781 if not condition: 5782 self._retreat(index) 5783 return None 5784 5785 self._match(TokenType.THEN) 5786 true = self._parse_assignment() 5787 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5788 self._match(TokenType.END) 5789 this = self.expression(exp.If, this=condition, true=true, false=false) 5790 5791 return this 5792 5793 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5794 if not self._match_text_seq("VALUE", "FOR"): 5795 self._retreat(self._index - 1) 5796 return None 5797 5798 return self.expression( 5799 exp.NextValueFor, 5800 this=self._parse_column(), 5801 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5802 ) 5803 5804 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5805 this = self._parse_function() or self._parse_var_or_string(upper=True) 5806 5807 if self._match(TokenType.FROM): 5808 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5809 5810 if not self._match(TokenType.COMMA): 5811 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5812 5813 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5814 5815 def _parse_gap_fill(self) -> exp.GapFill: 5816 self._match(TokenType.TABLE) 5817 this = self._parse_table() 5818 5819 self._match(TokenType.COMMA) 5820 args = [this, *self._parse_csv(self._parse_lambda)] 5821 5822 gap_fill = exp.GapFill.from_arg_list(args) 5823 return self.validate_expression(gap_fill, args) 5824 5825 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5826 this = self._parse_assignment() 5827 5828 if not self._match(TokenType.ALIAS): 5829 if self._match(TokenType.COMMA): 5830 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5831 5832 self.raise_error("Expected AS after CAST") 5833 5834 fmt = None 5835 to = self._parse_types() 5836 5837 if self._match(TokenType.FORMAT): 5838 fmt_string = self._parse_string() 5839 fmt = self._parse_at_time_zone(fmt_string) 5840 5841 if not to: 5842 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5843 if to.this in exp.DataType.TEMPORAL_TYPES: 5844 this = self.expression( 5845 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5846 this=this, 5847 format=exp.Literal.string( 5848 format_time( 5849 fmt_string.this if fmt_string else "", 5850 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5851 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5852 ) 5853 ), 5854 safe=safe, 5855 ) 5856 5857 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5858 this.set("zone", fmt.args["zone"]) 5859 return this 5860 elif not to: 5861 self.raise_error("Expected TYPE after CAST") 5862 elif isinstance(to, exp.Identifier): 5863 to = exp.DataType.build(to.name, udt=True) 5864 elif to.this == exp.DataType.Type.CHAR: 5865 if self._match(TokenType.CHARACTER_SET): 5866 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5867 5868 return self.expression( 5869 exp.Cast if strict else exp.TryCast, 5870 this=this, 5871 to=to, 5872 format=fmt, 5873 safe=safe, 5874 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5875 ) 5876 5877 def _parse_string_agg(self) -> exp.Expression: 5878 if self._match(TokenType.DISTINCT): 5879 args: t.List[t.Optional[exp.Expression]] = [ 5880 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5881 ] 5882 if self._match(TokenType.COMMA): 5883 args.extend(self._parse_csv(self._parse_assignment)) 5884 else: 5885 args = self._parse_csv(self._parse_assignment) # type: ignore 5886 5887 index = self._index 5888 if not self._match(TokenType.R_PAREN) and args: 5889 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5890 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5891 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5892 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5893 5894 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5895 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5896 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5897 if not self._match_text_seq("WITHIN", "GROUP"): 5898 self._retreat(index) 5899 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5900 5901 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5902 order = self._parse_order(this=seq_get(args, 0)) 5903 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5904 5905 def _parse_convert( 5906 self, strict: bool, safe: t.Optional[bool] = None 5907 ) -> t.Optional[exp.Expression]: 5908 this = self._parse_bitwise() 5909 5910 if self._match(TokenType.USING): 5911 to: t.Optional[exp.Expression] = self.expression( 5912 exp.CharacterSet, this=self._parse_var() 5913 ) 5914 elif self._match(TokenType.COMMA): 5915 to = self._parse_types() 5916 else: 5917 to = None 5918 5919 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5920 5921 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5922 """ 5923 There are generally two variants of the DECODE function: 5924 5925 - DECODE(bin, charset) 5926 - DECODE(expression, search, result [, search, result] ... [, default]) 5927 5928 The second variant will always be parsed into a CASE expression. Note that NULL 5929 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5930 instead of relying on pattern matching. 5931 """ 5932 args = self._parse_csv(self._parse_assignment) 5933 5934 if len(args) < 3: 5935 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5936 5937 expression, *expressions = args 5938 if not expression: 5939 return None 5940 5941 ifs = [] 5942 for search, result in zip(expressions[::2], expressions[1::2]): 5943 if not search or not result: 5944 return None 5945 5946 if isinstance(search, exp.Literal): 5947 ifs.append( 5948 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5949 ) 5950 elif isinstance(search, exp.Null): 5951 ifs.append( 5952 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5953 ) 5954 else: 5955 cond = exp.or_( 5956 exp.EQ(this=expression.copy(), expression=search), 5957 exp.and_( 5958 exp.Is(this=expression.copy(), expression=exp.Null()), 5959 exp.Is(this=search.copy(), expression=exp.Null()), 5960 copy=False, 5961 ), 5962 copy=False, 5963 ) 5964 ifs.append(exp.If(this=cond, true=result)) 5965 5966 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5967 5968 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5969 self._match_text_seq("KEY") 5970 key = self._parse_column() 5971 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5972 self._match_text_seq("VALUE") 5973 value = self._parse_bitwise() 5974 5975 if not key and not value: 5976 return None 5977 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5978 5979 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5980 if not this or not self._match_text_seq("FORMAT", "JSON"): 5981 return this 5982 5983 return self.expression(exp.FormatJson, this=this) 5984 5985 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 5986 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 5987 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 5988 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 5989 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 5990 else: 5991 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 5992 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 5993 5994 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 5995 5996 if not empty and not error and not null: 5997 return None 5998 5999 return self.expression( 6000 exp.OnCondition, 6001 empty=empty, 6002 error=error, 6003 null=null, 6004 ) 6005 6006 def _parse_on_handling( 6007 self, on: str, *values: str 6008 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6009 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6010 for value in values: 6011 if self._match_text_seq(value, "ON", on): 6012 return f"{value} ON {on}" 6013 6014 index = self._index 6015 if self._match(TokenType.DEFAULT): 6016 default_value = self._parse_bitwise() 6017 if self._match_text_seq("ON", on): 6018 return default_value 6019 6020 self._retreat(index) 6021 6022 return None 6023 6024 @t.overload 6025 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6026 6027 @t.overload 6028 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6029 6030 def _parse_json_object(self, agg=False): 6031 star = self._parse_star() 6032 expressions = ( 6033 [star] 6034 if star 6035 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6036 ) 6037 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6038 6039 unique_keys = None 6040 if self._match_text_seq("WITH", "UNIQUE"): 6041 unique_keys = True 6042 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6043 unique_keys = False 6044 6045 self._match_text_seq("KEYS") 6046 6047 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6048 self._parse_type() 6049 ) 6050 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6051 6052 return self.expression( 6053 exp.JSONObjectAgg if agg else exp.JSONObject, 6054 expressions=expressions, 6055 null_handling=null_handling, 6056 unique_keys=unique_keys, 6057 return_type=return_type, 6058 encoding=encoding, 6059 ) 6060 6061 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6062 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6063 if not self._match_text_seq("NESTED"): 6064 this = self._parse_id_var() 6065 kind = self._parse_types(allow_identifiers=False) 6066 nested = None 6067 else: 6068 this = None 6069 kind = None 6070 nested = True 6071 6072 path = self._match_text_seq("PATH") and self._parse_string() 6073 nested_schema = nested and self._parse_json_schema() 6074 6075 return self.expression( 6076 exp.JSONColumnDef, 6077 this=this, 6078 kind=kind, 6079 path=path, 6080 nested_schema=nested_schema, 6081 ) 6082 6083 def _parse_json_schema(self) -> exp.JSONSchema: 6084 self._match_text_seq("COLUMNS") 6085 return self.expression( 6086 exp.JSONSchema, 6087 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6088 ) 6089 6090 def _parse_json_table(self) -> exp.JSONTable: 6091 this = self._parse_format_json(self._parse_bitwise()) 6092 path = self._match(TokenType.COMMA) and self._parse_string() 6093 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6094 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6095 schema = self._parse_json_schema() 6096 6097 return exp.JSONTable( 6098 this=this, 6099 schema=schema, 6100 path=path, 6101 error_handling=error_handling, 6102 empty_handling=empty_handling, 6103 ) 6104 6105 def _parse_match_against(self) -> exp.MatchAgainst: 6106 expressions = self._parse_csv(self._parse_column) 6107 6108 self._match_text_seq(")", "AGAINST", "(") 6109 6110 this = self._parse_string() 6111 6112 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6113 modifier = "IN NATURAL LANGUAGE MODE" 6114 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6115 modifier = f"{modifier} WITH QUERY EXPANSION" 6116 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6117 modifier = "IN BOOLEAN MODE" 6118 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6119 modifier = "WITH QUERY EXPANSION" 6120 else: 6121 modifier = None 6122 6123 return self.expression( 6124 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6125 ) 6126 6127 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6128 def _parse_open_json(self) -> exp.OpenJSON: 6129 this = self._parse_bitwise() 6130 path = self._match(TokenType.COMMA) and self._parse_string() 6131 6132 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6133 this = self._parse_field(any_token=True) 6134 kind = self._parse_types() 6135 path = self._parse_string() 6136 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6137 6138 return self.expression( 6139 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6140 ) 6141 6142 expressions = None 6143 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6144 self._match_l_paren() 6145 expressions = self._parse_csv(_parse_open_json_column_def) 6146 6147 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6148 6149 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6150 args = self._parse_csv(self._parse_bitwise) 6151 6152 if self._match(TokenType.IN): 6153 return self.expression( 6154 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6155 ) 6156 6157 if haystack_first: 6158 haystack = seq_get(args, 0) 6159 needle = seq_get(args, 1) 6160 else: 6161 needle = seq_get(args, 0) 6162 haystack = seq_get(args, 1) 6163 6164 return self.expression( 6165 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6166 ) 6167 6168 def _parse_predict(self) -> exp.Predict: 6169 self._match_text_seq("MODEL") 6170 this = self._parse_table() 6171 6172 self._match(TokenType.COMMA) 6173 self._match_text_seq("TABLE") 6174 6175 return self.expression( 6176 exp.Predict, 6177 this=this, 6178 expression=self._parse_table(), 6179 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6180 ) 6181 6182 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6183 args = self._parse_csv(self._parse_table) 6184 return exp.JoinHint(this=func_name.upper(), expressions=args) 6185 6186 def _parse_substring(self) -> exp.Substring: 6187 # Postgres supports the form: substring(string [from int] [for int]) 6188 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6189 6190 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6191 6192 if self._match(TokenType.FROM): 6193 args.append(self._parse_bitwise()) 6194 if self._match(TokenType.FOR): 6195 if len(args) == 1: 6196 args.append(exp.Literal.number(1)) 6197 args.append(self._parse_bitwise()) 6198 6199 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6200 6201 def _parse_trim(self) -> exp.Trim: 6202 # https://www.w3resource.com/sql/character-functions/trim.php 6203 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6204 6205 position = None 6206 collation = None 6207 expression = None 6208 6209 if self._match_texts(self.TRIM_TYPES): 6210 position = self._prev.text.upper() 6211 6212 this = self._parse_bitwise() 6213 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6214 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6215 expression = self._parse_bitwise() 6216 6217 if invert_order: 6218 this, expression = expression, this 6219 6220 if self._match(TokenType.COLLATE): 6221 collation = self._parse_bitwise() 6222 6223 return self.expression( 6224 exp.Trim, this=this, position=position, expression=expression, collation=collation 6225 ) 6226 6227 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6228 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6229 6230 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6231 return self._parse_window(self._parse_id_var(), alias=True) 6232 6233 def _parse_respect_or_ignore_nulls( 6234 self, this: t.Optional[exp.Expression] 6235 ) -> t.Optional[exp.Expression]: 6236 if self._match_text_seq("IGNORE", "NULLS"): 6237 return self.expression(exp.IgnoreNulls, this=this) 6238 if self._match_text_seq("RESPECT", "NULLS"): 6239 return self.expression(exp.RespectNulls, this=this) 6240 return this 6241 6242 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6243 if self._match(TokenType.HAVING): 6244 self._match_texts(("MAX", "MIN")) 6245 max = self._prev.text.upper() != "MIN" 6246 return self.expression( 6247 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6248 ) 6249 6250 return this 6251 6252 def _parse_window( 6253 self, this: t.Optional[exp.Expression], alias: bool = False 6254 ) -> t.Optional[exp.Expression]: 6255 func = this 6256 comments = func.comments if isinstance(func, exp.Expression) else None 6257 6258 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6259 self._match(TokenType.WHERE) 6260 this = self.expression( 6261 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6262 ) 6263 self._match_r_paren() 6264 6265 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6266 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6267 if self._match_text_seq("WITHIN", "GROUP"): 6268 order = self._parse_wrapped(self._parse_order) 6269 this = self.expression(exp.WithinGroup, this=this, expression=order) 6270 6271 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6272 # Some dialects choose to implement and some do not. 6273 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6274 6275 # There is some code above in _parse_lambda that handles 6276 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6277 6278 # The below changes handle 6279 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6280 6281 # Oracle allows both formats 6282 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6283 # and Snowflake chose to do the same for familiarity 6284 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6285 if isinstance(this, exp.AggFunc): 6286 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6287 6288 if ignore_respect and ignore_respect is not this: 6289 ignore_respect.replace(ignore_respect.this) 6290 this = self.expression(ignore_respect.__class__, this=this) 6291 6292 this = self._parse_respect_or_ignore_nulls(this) 6293 6294 # bigquery select from window x AS (partition by ...) 6295 if alias: 6296 over = None 6297 self._match(TokenType.ALIAS) 6298 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6299 return this 6300 else: 6301 over = self._prev.text.upper() 6302 6303 if comments and isinstance(func, exp.Expression): 6304 func.pop_comments() 6305 6306 if not self._match(TokenType.L_PAREN): 6307 return self.expression( 6308 exp.Window, 6309 comments=comments, 6310 this=this, 6311 alias=self._parse_id_var(False), 6312 over=over, 6313 ) 6314 6315 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6316 6317 first = self._match(TokenType.FIRST) 6318 if self._match_text_seq("LAST"): 6319 first = False 6320 6321 partition, order = self._parse_partition_and_order() 6322 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6323 6324 if kind: 6325 self._match(TokenType.BETWEEN) 6326 start = self._parse_window_spec() 6327 self._match(TokenType.AND) 6328 end = self._parse_window_spec() 6329 6330 spec = self.expression( 6331 exp.WindowSpec, 6332 kind=kind, 6333 start=start["value"], 6334 start_side=start["side"], 6335 end=end["value"], 6336 end_side=end["side"], 6337 ) 6338 else: 6339 spec = None 6340 6341 self._match_r_paren() 6342 6343 window = self.expression( 6344 exp.Window, 6345 comments=comments, 6346 this=this, 6347 partition_by=partition, 6348 order=order, 6349 spec=spec, 6350 alias=window_alias, 6351 over=over, 6352 first=first, 6353 ) 6354 6355 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6356 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6357 return self._parse_window(window, alias=alias) 6358 6359 return window 6360 6361 def _parse_partition_and_order( 6362 self, 6363 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6364 return self._parse_partition_by(), self._parse_order() 6365 6366 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6367 self._match(TokenType.BETWEEN) 6368 6369 return { 6370 "value": ( 6371 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6372 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6373 or self._parse_bitwise() 6374 ), 6375 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6376 } 6377 6378 def _parse_alias( 6379 self, this: t.Optional[exp.Expression], explicit: bool = False 6380 ) -> t.Optional[exp.Expression]: 6381 any_token = self._match(TokenType.ALIAS) 6382 comments = self._prev_comments or [] 6383 6384 if explicit and not any_token: 6385 return this 6386 6387 if self._match(TokenType.L_PAREN): 6388 aliases = self.expression( 6389 exp.Aliases, 6390 comments=comments, 6391 this=this, 6392 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6393 ) 6394 self._match_r_paren(aliases) 6395 return aliases 6396 6397 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6398 self.STRING_ALIASES and self._parse_string_as_identifier() 6399 ) 6400 6401 if alias: 6402 comments.extend(alias.pop_comments()) 6403 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6404 column = this.this 6405 6406 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6407 if not this.comments and column and column.comments: 6408 this.comments = column.pop_comments() 6409 6410 return this 6411 6412 def _parse_id_var( 6413 self, 6414 any_token: bool = True, 6415 tokens: t.Optional[t.Collection[TokenType]] = None, 6416 ) -> t.Optional[exp.Expression]: 6417 expression = self._parse_identifier() 6418 if not expression and ( 6419 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6420 ): 6421 quoted = self._prev.token_type == TokenType.STRING 6422 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6423 6424 return expression 6425 6426 def _parse_string(self) -> t.Optional[exp.Expression]: 6427 if self._match_set(self.STRING_PARSERS): 6428 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6429 return self._parse_placeholder() 6430 6431 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6432 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6433 6434 def _parse_number(self) -> t.Optional[exp.Expression]: 6435 if self._match_set(self.NUMERIC_PARSERS): 6436 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6437 return self._parse_placeholder() 6438 6439 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6440 if self._match(TokenType.IDENTIFIER): 6441 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6442 return self._parse_placeholder() 6443 6444 def _parse_var( 6445 self, 6446 any_token: bool = False, 6447 tokens: t.Optional[t.Collection[TokenType]] = None, 6448 upper: bool = False, 6449 ) -> t.Optional[exp.Expression]: 6450 if ( 6451 (any_token and self._advance_any()) 6452 or self._match(TokenType.VAR) 6453 or (self._match_set(tokens) if tokens else False) 6454 ): 6455 return self.expression( 6456 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6457 ) 6458 return self._parse_placeholder() 6459 6460 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6461 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6462 self._advance() 6463 return self._prev 6464 return None 6465 6466 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6467 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6468 6469 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6470 return self._parse_primary() or self._parse_var(any_token=True) 6471 6472 def _parse_null(self) -> t.Optional[exp.Expression]: 6473 if self._match_set(self.NULL_TOKENS): 6474 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6475 return self._parse_placeholder() 6476 6477 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6478 if self._match(TokenType.TRUE): 6479 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6480 if self._match(TokenType.FALSE): 6481 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6482 return self._parse_placeholder() 6483 6484 def _parse_star(self) -> t.Optional[exp.Expression]: 6485 if self._match(TokenType.STAR): 6486 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6487 return self._parse_placeholder() 6488 6489 def _parse_parameter(self) -> exp.Parameter: 6490 this = self._parse_identifier() or self._parse_primary_or_var() 6491 return self.expression(exp.Parameter, this=this) 6492 6493 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6494 if self._match_set(self.PLACEHOLDER_PARSERS): 6495 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6496 if placeholder: 6497 return placeholder 6498 self._advance(-1) 6499 return None 6500 6501 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6502 if not self._match_texts(keywords): 6503 return None 6504 if self._match(TokenType.L_PAREN, advance=False): 6505 return self._parse_wrapped_csv(self._parse_expression) 6506 6507 expression = self._parse_expression() 6508 return [expression] if expression else None 6509 6510 def _parse_csv( 6511 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6512 ) -> t.List[exp.Expression]: 6513 parse_result = parse_method() 6514 items = [parse_result] if parse_result is not None else [] 6515 6516 while self._match(sep): 6517 self._add_comments(parse_result) 6518 parse_result = parse_method() 6519 if parse_result is not None: 6520 items.append(parse_result) 6521 6522 return items 6523 6524 def _parse_tokens( 6525 self, parse_method: t.Callable, expressions: t.Dict 6526 ) -> t.Optional[exp.Expression]: 6527 this = parse_method() 6528 6529 while self._match_set(expressions): 6530 this = self.expression( 6531 expressions[self._prev.token_type], 6532 this=this, 6533 comments=self._prev_comments, 6534 expression=parse_method(), 6535 ) 6536 6537 return this 6538 6539 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6540 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6541 6542 def _parse_wrapped_csv( 6543 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6544 ) -> t.List[exp.Expression]: 6545 return self._parse_wrapped( 6546 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6547 ) 6548 6549 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6550 wrapped = self._match(TokenType.L_PAREN) 6551 if not wrapped and not optional: 6552 self.raise_error("Expecting (") 6553 parse_result = parse_method() 6554 if wrapped: 6555 self._match_r_paren() 6556 return parse_result 6557 6558 def _parse_expressions(self) -> t.List[exp.Expression]: 6559 return self._parse_csv(self._parse_expression) 6560 6561 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6562 return self._parse_select() or self._parse_set_operations( 6563 self._parse_expression() if alias else self._parse_assignment() 6564 ) 6565 6566 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6567 return self._parse_query_modifiers( 6568 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6569 ) 6570 6571 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6572 this = None 6573 if self._match_texts(self.TRANSACTION_KIND): 6574 this = self._prev.text 6575 6576 self._match_texts(("TRANSACTION", "WORK")) 6577 6578 modes = [] 6579 while True: 6580 mode = [] 6581 while self._match(TokenType.VAR): 6582 mode.append(self._prev.text) 6583 6584 if mode: 6585 modes.append(" ".join(mode)) 6586 if not self._match(TokenType.COMMA): 6587 break 6588 6589 return self.expression(exp.Transaction, this=this, modes=modes) 6590 6591 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6592 chain = None 6593 savepoint = None 6594 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6595 6596 self._match_texts(("TRANSACTION", "WORK")) 6597 6598 if self._match_text_seq("TO"): 6599 self._match_text_seq("SAVEPOINT") 6600 savepoint = self._parse_id_var() 6601 6602 if self._match(TokenType.AND): 6603 chain = not self._match_text_seq("NO") 6604 self._match_text_seq("CHAIN") 6605 6606 if is_rollback: 6607 return self.expression(exp.Rollback, savepoint=savepoint) 6608 6609 return self.expression(exp.Commit, chain=chain) 6610 6611 def _parse_refresh(self) -> exp.Refresh: 6612 self._match(TokenType.TABLE) 6613 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6614 6615 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6616 if not self._match_text_seq("ADD"): 6617 return None 6618 6619 self._match(TokenType.COLUMN) 6620 exists_column = self._parse_exists(not_=True) 6621 expression = self._parse_field_def() 6622 6623 if expression: 6624 expression.set("exists", exists_column) 6625 6626 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6627 if self._match_texts(("FIRST", "AFTER")): 6628 position = self._prev.text 6629 column_position = self.expression( 6630 exp.ColumnPosition, this=self._parse_column(), position=position 6631 ) 6632 expression.set("position", column_position) 6633 6634 return expression 6635 6636 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6637 drop = self._match(TokenType.DROP) and self._parse_drop() 6638 if drop and not isinstance(drop, exp.Command): 6639 drop.set("kind", drop.args.get("kind", "COLUMN")) 6640 return drop 6641 6642 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6643 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6644 return self.expression( 6645 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6646 ) 6647 6648 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6649 index = self._index - 1 6650 6651 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6652 return self._parse_csv( 6653 lambda: self.expression( 6654 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6655 ) 6656 ) 6657 6658 self._retreat(index) 6659 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6660 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6661 6662 if self._match_text_seq("ADD", "COLUMNS"): 6663 schema = self._parse_schema() 6664 if schema: 6665 return [schema] 6666 return [] 6667 6668 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6669 6670 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6671 if self._match_texts(self.ALTER_ALTER_PARSERS): 6672 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6673 6674 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6675 # keyword after ALTER we default to parsing this statement 6676 self._match(TokenType.COLUMN) 6677 column = self._parse_field(any_token=True) 6678 6679 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6680 return self.expression(exp.AlterColumn, this=column, drop=True) 6681 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6682 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6683 if self._match(TokenType.COMMENT): 6684 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6685 if self._match_text_seq("DROP", "NOT", "NULL"): 6686 return self.expression( 6687 exp.AlterColumn, 6688 this=column, 6689 drop=True, 6690 allow_null=True, 6691 ) 6692 if self._match_text_seq("SET", "NOT", "NULL"): 6693 return self.expression( 6694 exp.AlterColumn, 6695 this=column, 6696 allow_null=False, 6697 ) 6698 self._match_text_seq("SET", "DATA") 6699 self._match_text_seq("TYPE") 6700 return self.expression( 6701 exp.AlterColumn, 6702 this=column, 6703 dtype=self._parse_types(), 6704 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6705 using=self._match(TokenType.USING) and self._parse_assignment(), 6706 ) 6707 6708 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6709 if self._match_texts(("ALL", "EVEN", "AUTO")): 6710 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6711 6712 self._match_text_seq("KEY", "DISTKEY") 6713 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6714 6715 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6716 if compound: 6717 self._match_text_seq("SORTKEY") 6718 6719 if self._match(TokenType.L_PAREN, advance=False): 6720 return self.expression( 6721 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6722 ) 6723 6724 self._match_texts(("AUTO", "NONE")) 6725 return self.expression( 6726 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6727 ) 6728 6729 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6730 index = self._index - 1 6731 6732 partition_exists = self._parse_exists() 6733 if self._match(TokenType.PARTITION, advance=False): 6734 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6735 6736 self._retreat(index) 6737 return self._parse_csv(self._parse_drop_column) 6738 6739 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6740 if self._match(TokenType.COLUMN): 6741 exists = self._parse_exists() 6742 old_column = self._parse_column() 6743 to = self._match_text_seq("TO") 6744 new_column = self._parse_column() 6745 6746 if old_column is None or to is None or new_column is None: 6747 return None 6748 6749 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6750 6751 self._match_text_seq("TO") 6752 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6753 6754 def _parse_alter_table_set(self) -> exp.AlterSet: 6755 alter_set = self.expression(exp.AlterSet) 6756 6757 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6758 "TABLE", "PROPERTIES" 6759 ): 6760 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6761 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6762 alter_set.set("expressions", [self._parse_assignment()]) 6763 elif self._match_texts(("LOGGED", "UNLOGGED")): 6764 alter_set.set("option", exp.var(self._prev.text.upper())) 6765 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6766 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6767 elif self._match_text_seq("LOCATION"): 6768 alter_set.set("location", self._parse_field()) 6769 elif self._match_text_seq("ACCESS", "METHOD"): 6770 alter_set.set("access_method", self._parse_field()) 6771 elif self._match_text_seq("TABLESPACE"): 6772 alter_set.set("tablespace", self._parse_field()) 6773 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6774 alter_set.set("file_format", [self._parse_field()]) 6775 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6776 alter_set.set("file_format", self._parse_wrapped_options()) 6777 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6778 alter_set.set("copy_options", self._parse_wrapped_options()) 6779 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6780 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6781 else: 6782 if self._match_text_seq("SERDE"): 6783 alter_set.set("serde", self._parse_field()) 6784 6785 alter_set.set("expressions", [self._parse_properties()]) 6786 6787 return alter_set 6788 6789 def _parse_alter(self) -> exp.Alter | exp.Command: 6790 start = self._prev 6791 6792 alter_token = self._match_set(self.ALTERABLES) and self._prev 6793 if not alter_token: 6794 return self._parse_as_command(start) 6795 6796 exists = self._parse_exists() 6797 only = self._match_text_seq("ONLY") 6798 this = self._parse_table(schema=True) 6799 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6800 6801 if self._next: 6802 self._advance() 6803 6804 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6805 if parser: 6806 actions = ensure_list(parser(self)) 6807 not_valid = self._match_text_seq("NOT", "VALID") 6808 options = self._parse_csv(self._parse_property) 6809 6810 if not self._curr and actions: 6811 return self.expression( 6812 exp.Alter, 6813 this=this, 6814 kind=alter_token.text.upper(), 6815 exists=exists, 6816 actions=actions, 6817 only=only, 6818 options=options, 6819 cluster=cluster, 6820 not_valid=not_valid, 6821 ) 6822 6823 return self._parse_as_command(start) 6824 6825 def _parse_merge(self) -> exp.Merge: 6826 self._match(TokenType.INTO) 6827 target = self._parse_table() 6828 6829 if target and self._match(TokenType.ALIAS, advance=False): 6830 target.set("alias", self._parse_table_alias()) 6831 6832 self._match(TokenType.USING) 6833 using = self._parse_table() 6834 6835 self._match(TokenType.ON) 6836 on = self._parse_assignment() 6837 6838 return self.expression( 6839 exp.Merge, 6840 this=target, 6841 using=using, 6842 on=on, 6843 expressions=self._parse_when_matched(), 6844 returning=self._match(TokenType.RETURNING) and self._parse_csv(self._parse_bitwise), 6845 ) 6846 6847 def _parse_when_matched(self) -> t.List[exp.When]: 6848 whens = [] 6849 6850 while self._match(TokenType.WHEN): 6851 matched = not self._match(TokenType.NOT) 6852 self._match_text_seq("MATCHED") 6853 source = ( 6854 False 6855 if self._match_text_seq("BY", "TARGET") 6856 else self._match_text_seq("BY", "SOURCE") 6857 ) 6858 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6859 6860 self._match(TokenType.THEN) 6861 6862 if self._match(TokenType.INSERT): 6863 this = self._parse_star() 6864 if this: 6865 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6866 else: 6867 then = self.expression( 6868 exp.Insert, 6869 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6870 expression=self._match_text_seq("VALUES") and self._parse_value(), 6871 ) 6872 elif self._match(TokenType.UPDATE): 6873 expressions = self._parse_star() 6874 if expressions: 6875 then = self.expression(exp.Update, expressions=expressions) 6876 else: 6877 then = self.expression( 6878 exp.Update, 6879 expressions=self._match(TokenType.SET) 6880 and self._parse_csv(self._parse_equality), 6881 ) 6882 elif self._match(TokenType.DELETE): 6883 then = self.expression(exp.Var, this=self._prev.text) 6884 else: 6885 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6886 6887 whens.append( 6888 self.expression( 6889 exp.When, 6890 matched=matched, 6891 source=source, 6892 condition=condition, 6893 then=then, 6894 ) 6895 ) 6896 return whens 6897 6898 def _parse_show(self) -> t.Optional[exp.Expression]: 6899 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6900 if parser: 6901 return parser(self) 6902 return self._parse_as_command(self._prev) 6903 6904 def _parse_set_item_assignment( 6905 self, kind: t.Optional[str] = None 6906 ) -> t.Optional[exp.Expression]: 6907 index = self._index 6908 6909 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6910 return self._parse_set_transaction(global_=kind == "GLOBAL") 6911 6912 left = self._parse_primary() or self._parse_column() 6913 assignment_delimiter = self._match_texts(("=", "TO")) 6914 6915 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6916 self._retreat(index) 6917 return None 6918 6919 right = self._parse_statement() or self._parse_id_var() 6920 if isinstance(right, (exp.Column, exp.Identifier)): 6921 right = exp.var(right.name) 6922 6923 this = self.expression(exp.EQ, this=left, expression=right) 6924 return self.expression(exp.SetItem, this=this, kind=kind) 6925 6926 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6927 self._match_text_seq("TRANSACTION") 6928 characteristics = self._parse_csv( 6929 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6930 ) 6931 return self.expression( 6932 exp.SetItem, 6933 expressions=characteristics, 6934 kind="TRANSACTION", 6935 **{"global": global_}, # type: ignore 6936 ) 6937 6938 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6939 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6940 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6941 6942 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6943 index = self._index 6944 set_ = self.expression( 6945 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6946 ) 6947 6948 if self._curr: 6949 self._retreat(index) 6950 return self._parse_as_command(self._prev) 6951 6952 return set_ 6953 6954 def _parse_var_from_options( 6955 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6956 ) -> t.Optional[exp.Var]: 6957 start = self._curr 6958 if not start: 6959 return None 6960 6961 option = start.text.upper() 6962 continuations = options.get(option) 6963 6964 index = self._index 6965 self._advance() 6966 for keywords in continuations or []: 6967 if isinstance(keywords, str): 6968 keywords = (keywords,) 6969 6970 if self._match_text_seq(*keywords): 6971 option = f"{option} {' '.join(keywords)}" 6972 break 6973 else: 6974 if continuations or continuations is None: 6975 if raise_unmatched: 6976 self.raise_error(f"Unknown option {option}") 6977 6978 self._retreat(index) 6979 return None 6980 6981 return exp.var(option) 6982 6983 def _parse_as_command(self, start: Token) -> exp.Command: 6984 while self._curr: 6985 self._advance() 6986 text = self._find_sql(start, self._prev) 6987 size = len(start.text) 6988 self._warn_unsupported() 6989 return exp.Command(this=text[:size], expression=text[size:]) 6990 6991 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6992 settings = [] 6993 6994 self._match_l_paren() 6995 kind = self._parse_id_var() 6996 6997 if self._match(TokenType.L_PAREN): 6998 while True: 6999 key = self._parse_id_var() 7000 value = self._parse_primary() 7001 7002 if not key and value is None: 7003 break 7004 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7005 self._match(TokenType.R_PAREN) 7006 7007 self._match_r_paren() 7008 7009 return self.expression( 7010 exp.DictProperty, 7011 this=this, 7012 kind=kind.this if kind else None, 7013 settings=settings, 7014 ) 7015 7016 def _parse_dict_range(self, this: str) -> exp.DictRange: 7017 self._match_l_paren() 7018 has_min = self._match_text_seq("MIN") 7019 if has_min: 7020 min = self._parse_var() or self._parse_primary() 7021 self._match_text_seq("MAX") 7022 max = self._parse_var() or self._parse_primary() 7023 else: 7024 max = self._parse_var() or self._parse_primary() 7025 min = exp.Literal.number(0) 7026 self._match_r_paren() 7027 return self.expression(exp.DictRange, this=this, min=min, max=max) 7028 7029 def _parse_comprehension( 7030 self, this: t.Optional[exp.Expression] 7031 ) -> t.Optional[exp.Comprehension]: 7032 index = self._index 7033 expression = self._parse_column() 7034 if not self._match(TokenType.IN): 7035 self._retreat(index - 1) 7036 return None 7037 iterator = self._parse_column() 7038 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7039 return self.expression( 7040 exp.Comprehension, 7041 this=this, 7042 expression=expression, 7043 iterator=iterator, 7044 condition=condition, 7045 ) 7046 7047 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7048 if self._match(TokenType.HEREDOC_STRING): 7049 return self.expression(exp.Heredoc, this=self._prev.text) 7050 7051 if not self._match_text_seq("$"): 7052 return None 7053 7054 tags = ["$"] 7055 tag_text = None 7056 7057 if self._is_connected(): 7058 self._advance() 7059 tags.append(self._prev.text.upper()) 7060 else: 7061 self.raise_error("No closing $ found") 7062 7063 if tags[-1] != "$": 7064 if self._is_connected() and self._match_text_seq("$"): 7065 tag_text = tags[-1] 7066 tags.append("$") 7067 else: 7068 self.raise_error("No closing $ found") 7069 7070 heredoc_start = self._curr 7071 7072 while self._curr: 7073 if self._match_text_seq(*tags, advance=False): 7074 this = self._find_sql(heredoc_start, self._prev) 7075 self._advance(len(tags)) 7076 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7077 7078 self._advance() 7079 7080 self.raise_error(f"No closing {''.join(tags)} found") 7081 return None 7082 7083 def _find_parser( 7084 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7085 ) -> t.Optional[t.Callable]: 7086 if not self._curr: 7087 return None 7088 7089 index = self._index 7090 this = [] 7091 while True: 7092 # The current token might be multiple words 7093 curr = self._curr.text.upper() 7094 key = curr.split(" ") 7095 this.append(curr) 7096 7097 self._advance() 7098 result, trie = in_trie(trie, key) 7099 if result == TrieResult.FAILED: 7100 break 7101 7102 if result == TrieResult.EXISTS: 7103 subparser = parsers[" ".join(this)] 7104 return subparser 7105 7106 self._retreat(index) 7107 return None 7108 7109 def _match(self, token_type, advance=True, expression=None): 7110 if not self._curr: 7111 return None 7112 7113 if self._curr.token_type == token_type: 7114 if advance: 7115 self._advance() 7116 self._add_comments(expression) 7117 return True 7118 7119 return None 7120 7121 def _match_set(self, types, advance=True): 7122 if not self._curr: 7123 return None 7124 7125 if self._curr.token_type in types: 7126 if advance: 7127 self._advance() 7128 return True 7129 7130 return None 7131 7132 def _match_pair(self, token_type_a, token_type_b, advance=True): 7133 if not self._curr or not self._next: 7134 return None 7135 7136 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7137 if advance: 7138 self._advance(2) 7139 return True 7140 7141 return None 7142 7143 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7144 if not self._match(TokenType.L_PAREN, expression=expression): 7145 self.raise_error("Expecting (") 7146 7147 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7148 if not self._match(TokenType.R_PAREN, expression=expression): 7149 self.raise_error("Expecting )") 7150 7151 def _match_texts(self, texts, advance=True): 7152 if ( 7153 self._curr 7154 and self._curr.token_type != TokenType.STRING 7155 and self._curr.text.upper() in texts 7156 ): 7157 if advance: 7158 self._advance() 7159 return True 7160 return None 7161 7162 def _match_text_seq(self, *texts, advance=True): 7163 index = self._index 7164 for text in texts: 7165 if ( 7166 self._curr 7167 and self._curr.token_type != TokenType.STRING 7168 and self._curr.text.upper() == text 7169 ): 7170 self._advance() 7171 else: 7172 self._retreat(index) 7173 return None 7174 7175 if not advance: 7176 self._retreat(index) 7177 7178 return True 7179 7180 def _replace_lambda( 7181 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7182 ) -> t.Optional[exp.Expression]: 7183 if not node: 7184 return node 7185 7186 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7187 7188 for column in node.find_all(exp.Column): 7189 typ = lambda_types.get(column.parts[0].name) 7190 if typ is not None: 7191 dot_or_id = column.to_dot() if column.table else column.this 7192 7193 if typ: 7194 dot_or_id = self.expression( 7195 exp.Cast, 7196 this=dot_or_id, 7197 to=typ, 7198 ) 7199 7200 parent = column.parent 7201 7202 while isinstance(parent, exp.Dot): 7203 if not isinstance(parent.parent, exp.Dot): 7204 parent.replace(dot_or_id) 7205 break 7206 parent = parent.parent 7207 else: 7208 if column is node: 7209 node = dot_or_id 7210 else: 7211 column.replace(dot_or_id) 7212 return node 7213 7214 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7215 start = self._prev 7216 7217 # Not to be confused with TRUNCATE(number, decimals) function call 7218 if self._match(TokenType.L_PAREN): 7219 self._retreat(self._index - 2) 7220 return self._parse_function() 7221 7222 # Clickhouse supports TRUNCATE DATABASE as well 7223 is_database = self._match(TokenType.DATABASE) 7224 7225 self._match(TokenType.TABLE) 7226 7227 exists = self._parse_exists(not_=False) 7228 7229 expressions = self._parse_csv( 7230 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7231 ) 7232 7233 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7234 7235 if self._match_text_seq("RESTART", "IDENTITY"): 7236 identity = "RESTART" 7237 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7238 identity = "CONTINUE" 7239 else: 7240 identity = None 7241 7242 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7243 option = self._prev.text 7244 else: 7245 option = None 7246 7247 partition = self._parse_partition() 7248 7249 # Fallback case 7250 if self._curr: 7251 return self._parse_as_command(start) 7252 7253 return self.expression( 7254 exp.TruncateTable, 7255 expressions=expressions, 7256 is_database=is_database, 7257 exists=exists, 7258 cluster=cluster, 7259 identity=identity, 7260 option=option, 7261 partition=partition, 7262 ) 7263 7264 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7265 this = self._parse_ordered(self._parse_opclass) 7266 7267 if not self._match(TokenType.WITH): 7268 return this 7269 7270 op = self._parse_var(any_token=True) 7271 7272 return self.expression(exp.WithOperator, this=this, op=op) 7273 7274 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7275 self._match(TokenType.EQ) 7276 self._match(TokenType.L_PAREN) 7277 7278 opts: t.List[t.Optional[exp.Expression]] = [] 7279 while self._curr and not self._match(TokenType.R_PAREN): 7280 if self._match_text_seq("FORMAT_NAME", "="): 7281 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7282 # so we parse it separately to use _parse_field() 7283 prop = self.expression( 7284 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7285 ) 7286 opts.append(prop) 7287 else: 7288 opts.append(self._parse_property()) 7289 7290 self._match(TokenType.COMMA) 7291 7292 return opts 7293 7294 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7295 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7296 7297 options = [] 7298 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7299 option = self._parse_var(any_token=True) 7300 prev = self._prev.text.upper() 7301 7302 # Different dialects might separate options and values by white space, "=" and "AS" 7303 self._match(TokenType.EQ) 7304 self._match(TokenType.ALIAS) 7305 7306 param = self.expression(exp.CopyParameter, this=option) 7307 7308 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7309 TokenType.L_PAREN, advance=False 7310 ): 7311 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7312 param.set("expressions", self._parse_wrapped_options()) 7313 elif prev == "FILE_FORMAT": 7314 # T-SQL's external file format case 7315 param.set("expression", self._parse_field()) 7316 else: 7317 param.set("expression", self._parse_unquoted_field()) 7318 7319 options.append(param) 7320 self._match(sep) 7321 7322 return options 7323 7324 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7325 expr = self.expression(exp.Credentials) 7326 7327 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7328 expr.set("storage", self._parse_field()) 7329 if self._match_text_seq("CREDENTIALS"): 7330 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7331 creds = ( 7332 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7333 ) 7334 expr.set("credentials", creds) 7335 if self._match_text_seq("ENCRYPTION"): 7336 expr.set("encryption", self._parse_wrapped_options()) 7337 if self._match_text_seq("IAM_ROLE"): 7338 expr.set("iam_role", self._parse_field()) 7339 if self._match_text_seq("REGION"): 7340 expr.set("region", self._parse_field()) 7341 7342 return expr 7343 7344 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7345 return self._parse_field() 7346 7347 def _parse_copy(self) -> exp.Copy | exp.Command: 7348 start = self._prev 7349 7350 self._match(TokenType.INTO) 7351 7352 this = ( 7353 self._parse_select(nested=True, parse_subquery_alias=False) 7354 if self._match(TokenType.L_PAREN, advance=False) 7355 else self._parse_table(schema=True) 7356 ) 7357 7358 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7359 7360 files = self._parse_csv(self._parse_file_location) 7361 credentials = self._parse_credentials() 7362 7363 self._match_text_seq("WITH") 7364 7365 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7366 7367 # Fallback case 7368 if self._curr: 7369 return self._parse_as_command(start) 7370 7371 return self.expression( 7372 exp.Copy, 7373 this=this, 7374 kind=kind, 7375 credentials=credentials, 7376 files=files, 7377 params=params, 7378 ) 7379 7380 def _parse_normalize(self) -> exp.Normalize: 7381 return self.expression( 7382 exp.Normalize, 7383 this=self._parse_bitwise(), 7384 form=self._match(TokenType.COMMA) and self._parse_var(), 7385 ) 7386 7387 def _parse_star_ops(self) -> exp.Star | exp.UnpackColumns: 7388 if self._match_text_seq("COLUMNS", "(", advance=False): 7389 return exp.UnpackColumns(this=self._parse_function()) 7390 7391 return self.expression( 7392 exp.Star, 7393 **{ # type: ignore 7394 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7395 "replace": self._parse_star_op("REPLACE"), 7396 "rename": self._parse_star_op("RENAME"), 7397 }, 7398 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "CHAR": lambda args: exp.Chr(expressions=args), 192 "CHR": lambda args: exp.Chr(expressions=args), 193 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 194 "CONCAT": lambda args, dialect: exp.Concat( 195 expressions=args, 196 safe=not dialect.STRICT_STRING_CONCAT, 197 coalesce=dialect.CONCAT_COALESCE, 198 ), 199 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 200 expressions=args, 201 safe=not dialect.STRICT_STRING_CONCAT, 202 coalesce=dialect.CONCAT_COALESCE, 203 ), 204 "CONVERT_TIMEZONE": build_convert_timezone, 205 "DATE_TO_DATE_STR": lambda args: exp.Cast( 206 this=seq_get(args, 0), 207 to=exp.DataType(this=exp.DataType.Type.TEXT), 208 ), 209 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 210 start=seq_get(args, 0), 211 end=seq_get(args, 1), 212 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 213 ), 214 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 215 "HEX": build_hex, 216 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 217 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 218 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 219 "LIKE": build_like, 220 "LOG": build_logarithm, 221 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 222 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 223 "LOWER": build_lower, 224 "LPAD": lambda args: build_pad(args), 225 "LEFTPAD": lambda args: build_pad(args), 226 "LTRIM": lambda args: build_trim(args), 227 "MOD": build_mod, 228 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 229 "RPAD": lambda args: build_pad(args, is_left=False), 230 "RTRIM": lambda args: build_trim(args, is_left=False), 231 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 232 if len(args) != 2 233 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 234 "TIME_TO_TIME_STR": lambda args: exp.Cast( 235 this=seq_get(args, 0), 236 to=exp.DataType(this=exp.DataType.Type.TEXT), 237 ), 238 "TO_HEX": build_hex, 239 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 240 this=exp.Cast( 241 this=seq_get(args, 0), 242 to=exp.DataType(this=exp.DataType.Type.TEXT), 243 ), 244 start=exp.Literal.number(1), 245 length=exp.Literal.number(10), 246 ), 247 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 248 "UPPER": build_upper, 249 "VAR_MAP": build_var_map, 250 } 251 252 NO_PAREN_FUNCTIONS = { 253 TokenType.CURRENT_DATE: exp.CurrentDate, 254 TokenType.CURRENT_DATETIME: exp.CurrentDate, 255 TokenType.CURRENT_TIME: exp.CurrentTime, 256 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 257 TokenType.CURRENT_USER: exp.CurrentUser, 258 } 259 260 STRUCT_TYPE_TOKENS = { 261 TokenType.NESTED, 262 TokenType.OBJECT, 263 TokenType.STRUCT, 264 } 265 266 NESTED_TYPE_TOKENS = { 267 TokenType.ARRAY, 268 TokenType.LIST, 269 TokenType.LOWCARDINALITY, 270 TokenType.MAP, 271 TokenType.NULLABLE, 272 *STRUCT_TYPE_TOKENS, 273 } 274 275 ENUM_TYPE_TOKENS = { 276 TokenType.ENUM, 277 TokenType.ENUM8, 278 TokenType.ENUM16, 279 } 280 281 AGGREGATE_TYPE_TOKENS = { 282 TokenType.AGGREGATEFUNCTION, 283 TokenType.SIMPLEAGGREGATEFUNCTION, 284 } 285 286 TYPE_TOKENS = { 287 TokenType.BIT, 288 TokenType.BOOLEAN, 289 TokenType.TINYINT, 290 TokenType.UTINYINT, 291 TokenType.SMALLINT, 292 TokenType.USMALLINT, 293 TokenType.INT, 294 TokenType.UINT, 295 TokenType.BIGINT, 296 TokenType.UBIGINT, 297 TokenType.INT128, 298 TokenType.UINT128, 299 TokenType.INT256, 300 TokenType.UINT256, 301 TokenType.MEDIUMINT, 302 TokenType.UMEDIUMINT, 303 TokenType.FIXEDSTRING, 304 TokenType.FLOAT, 305 TokenType.DOUBLE, 306 TokenType.CHAR, 307 TokenType.NCHAR, 308 TokenType.VARCHAR, 309 TokenType.NVARCHAR, 310 TokenType.BPCHAR, 311 TokenType.TEXT, 312 TokenType.MEDIUMTEXT, 313 TokenType.LONGTEXT, 314 TokenType.MEDIUMBLOB, 315 TokenType.LONGBLOB, 316 TokenType.BINARY, 317 TokenType.VARBINARY, 318 TokenType.JSON, 319 TokenType.JSONB, 320 TokenType.INTERVAL, 321 TokenType.TINYBLOB, 322 TokenType.TINYTEXT, 323 TokenType.TIME, 324 TokenType.TIMETZ, 325 TokenType.TIMESTAMP, 326 TokenType.TIMESTAMP_S, 327 TokenType.TIMESTAMP_MS, 328 TokenType.TIMESTAMP_NS, 329 TokenType.TIMESTAMPTZ, 330 TokenType.TIMESTAMPLTZ, 331 TokenType.TIMESTAMPNTZ, 332 TokenType.DATETIME, 333 TokenType.DATETIME64, 334 TokenType.DATE, 335 TokenType.DATE32, 336 TokenType.INT4RANGE, 337 TokenType.INT4MULTIRANGE, 338 TokenType.INT8RANGE, 339 TokenType.INT8MULTIRANGE, 340 TokenType.NUMRANGE, 341 TokenType.NUMMULTIRANGE, 342 TokenType.TSRANGE, 343 TokenType.TSMULTIRANGE, 344 TokenType.TSTZRANGE, 345 TokenType.TSTZMULTIRANGE, 346 TokenType.DATERANGE, 347 TokenType.DATEMULTIRANGE, 348 TokenType.DECIMAL, 349 TokenType.DECIMAL32, 350 TokenType.DECIMAL64, 351 TokenType.DECIMAL128, 352 TokenType.UDECIMAL, 353 TokenType.BIGDECIMAL, 354 TokenType.UUID, 355 TokenType.GEOGRAPHY, 356 TokenType.GEOMETRY, 357 TokenType.HLLSKETCH, 358 TokenType.HSTORE, 359 TokenType.PSEUDO_TYPE, 360 TokenType.SUPER, 361 TokenType.SERIAL, 362 TokenType.SMALLSERIAL, 363 TokenType.BIGSERIAL, 364 TokenType.XML, 365 TokenType.YEAR, 366 TokenType.UNIQUEIDENTIFIER, 367 TokenType.USERDEFINED, 368 TokenType.MONEY, 369 TokenType.SMALLMONEY, 370 TokenType.ROWVERSION, 371 TokenType.IMAGE, 372 TokenType.VARIANT, 373 TokenType.VECTOR, 374 TokenType.OBJECT, 375 TokenType.OBJECT_IDENTIFIER, 376 TokenType.INET, 377 TokenType.IPADDRESS, 378 TokenType.IPPREFIX, 379 TokenType.IPV4, 380 TokenType.IPV6, 381 TokenType.UNKNOWN, 382 TokenType.NULL, 383 TokenType.NAME, 384 TokenType.TDIGEST, 385 *ENUM_TYPE_TOKENS, 386 *NESTED_TYPE_TOKENS, 387 *AGGREGATE_TYPE_TOKENS, 388 } 389 390 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 391 TokenType.BIGINT: TokenType.UBIGINT, 392 TokenType.INT: TokenType.UINT, 393 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 394 TokenType.SMALLINT: TokenType.USMALLINT, 395 TokenType.TINYINT: TokenType.UTINYINT, 396 TokenType.DECIMAL: TokenType.UDECIMAL, 397 } 398 399 SUBQUERY_PREDICATES = { 400 TokenType.ANY: exp.Any, 401 TokenType.ALL: exp.All, 402 TokenType.EXISTS: exp.Exists, 403 TokenType.SOME: exp.Any, 404 } 405 406 RESERVED_TOKENS = { 407 *Tokenizer.SINGLE_TOKENS.values(), 408 TokenType.SELECT, 409 } - {TokenType.IDENTIFIER} 410 411 DB_CREATABLES = { 412 TokenType.DATABASE, 413 TokenType.DICTIONARY, 414 TokenType.MODEL, 415 TokenType.SCHEMA, 416 TokenType.SEQUENCE, 417 TokenType.STORAGE_INTEGRATION, 418 TokenType.TABLE, 419 TokenType.TAG, 420 TokenType.VIEW, 421 TokenType.WAREHOUSE, 422 TokenType.STREAMLIT, 423 } 424 425 CREATABLES = { 426 TokenType.COLUMN, 427 TokenType.CONSTRAINT, 428 TokenType.FOREIGN_KEY, 429 TokenType.FUNCTION, 430 TokenType.INDEX, 431 TokenType.PROCEDURE, 432 *DB_CREATABLES, 433 } 434 435 ALTERABLES = { 436 TokenType.INDEX, 437 TokenType.TABLE, 438 TokenType.VIEW, 439 } 440 441 # Tokens that can represent identifiers 442 ID_VAR_TOKENS = { 443 TokenType.ALL, 444 TokenType.VAR, 445 TokenType.ANTI, 446 TokenType.APPLY, 447 TokenType.ASC, 448 TokenType.ASOF, 449 TokenType.AUTO_INCREMENT, 450 TokenType.BEGIN, 451 TokenType.BPCHAR, 452 TokenType.CACHE, 453 TokenType.CASE, 454 TokenType.COLLATE, 455 TokenType.COMMAND, 456 TokenType.COMMENT, 457 TokenType.COMMIT, 458 TokenType.CONSTRAINT, 459 TokenType.COPY, 460 TokenType.CUBE, 461 TokenType.DEFAULT, 462 TokenType.DELETE, 463 TokenType.DESC, 464 TokenType.DESCRIBE, 465 TokenType.DICTIONARY, 466 TokenType.DIV, 467 TokenType.END, 468 TokenType.EXECUTE, 469 TokenType.ESCAPE, 470 TokenType.FALSE, 471 TokenType.FIRST, 472 TokenType.FILTER, 473 TokenType.FINAL, 474 TokenType.FORMAT, 475 TokenType.FULL, 476 TokenType.IDENTIFIER, 477 TokenType.IS, 478 TokenType.ISNULL, 479 TokenType.INTERVAL, 480 TokenType.KEEP, 481 TokenType.KILL, 482 TokenType.LEFT, 483 TokenType.LOAD, 484 TokenType.MERGE, 485 TokenType.NATURAL, 486 TokenType.NEXT, 487 TokenType.OFFSET, 488 TokenType.OPERATOR, 489 TokenType.ORDINALITY, 490 TokenType.OVERLAPS, 491 TokenType.OVERWRITE, 492 TokenType.PARTITION, 493 TokenType.PERCENT, 494 TokenType.PIVOT, 495 TokenType.PRAGMA, 496 TokenType.RANGE, 497 TokenType.RECURSIVE, 498 TokenType.REFERENCES, 499 TokenType.REFRESH, 500 TokenType.RENAME, 501 TokenType.REPLACE, 502 TokenType.RIGHT, 503 TokenType.ROLLUP, 504 TokenType.ROW, 505 TokenType.ROWS, 506 TokenType.SEMI, 507 TokenType.SET, 508 TokenType.SETTINGS, 509 TokenType.SHOW, 510 TokenType.TEMPORARY, 511 TokenType.TOP, 512 TokenType.TRUE, 513 TokenType.TRUNCATE, 514 TokenType.UNIQUE, 515 TokenType.UNNEST, 516 TokenType.UNPIVOT, 517 TokenType.UPDATE, 518 TokenType.USE, 519 TokenType.VOLATILE, 520 TokenType.WINDOW, 521 *CREATABLES, 522 *SUBQUERY_PREDICATES, 523 *TYPE_TOKENS, 524 *NO_PAREN_FUNCTIONS, 525 } 526 527 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 528 529 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 530 TokenType.ANTI, 531 TokenType.APPLY, 532 TokenType.ASOF, 533 TokenType.FULL, 534 TokenType.LEFT, 535 TokenType.LOCK, 536 TokenType.NATURAL, 537 TokenType.OFFSET, 538 TokenType.RIGHT, 539 TokenType.SEMI, 540 TokenType.WINDOW, 541 } 542 543 ALIAS_TOKENS = ID_VAR_TOKENS 544 545 ARRAY_CONSTRUCTORS = { 546 "ARRAY": exp.Array, 547 "LIST": exp.List, 548 } 549 550 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 551 552 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 553 554 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 555 556 FUNC_TOKENS = { 557 TokenType.COLLATE, 558 TokenType.COMMAND, 559 TokenType.CURRENT_DATE, 560 TokenType.CURRENT_DATETIME, 561 TokenType.CURRENT_TIMESTAMP, 562 TokenType.CURRENT_TIME, 563 TokenType.CURRENT_USER, 564 TokenType.FILTER, 565 TokenType.FIRST, 566 TokenType.FORMAT, 567 TokenType.GLOB, 568 TokenType.IDENTIFIER, 569 TokenType.INDEX, 570 TokenType.ISNULL, 571 TokenType.ILIKE, 572 TokenType.INSERT, 573 TokenType.LIKE, 574 TokenType.MERGE, 575 TokenType.OFFSET, 576 TokenType.PRIMARY_KEY, 577 TokenType.RANGE, 578 TokenType.REPLACE, 579 TokenType.RLIKE, 580 TokenType.ROW, 581 TokenType.UNNEST, 582 TokenType.VAR, 583 TokenType.LEFT, 584 TokenType.RIGHT, 585 TokenType.SEQUENCE, 586 TokenType.DATE, 587 TokenType.DATETIME, 588 TokenType.TABLE, 589 TokenType.TIMESTAMP, 590 TokenType.TIMESTAMPTZ, 591 TokenType.TRUNCATE, 592 TokenType.WINDOW, 593 TokenType.XOR, 594 *TYPE_TOKENS, 595 *SUBQUERY_PREDICATES, 596 } 597 598 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 599 TokenType.AND: exp.And, 600 } 601 602 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 603 TokenType.COLON_EQ: exp.PropertyEQ, 604 } 605 606 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 607 TokenType.OR: exp.Or, 608 } 609 610 EQUALITY = { 611 TokenType.EQ: exp.EQ, 612 TokenType.NEQ: exp.NEQ, 613 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 614 } 615 616 COMPARISON = { 617 TokenType.GT: exp.GT, 618 TokenType.GTE: exp.GTE, 619 TokenType.LT: exp.LT, 620 TokenType.LTE: exp.LTE, 621 } 622 623 BITWISE = { 624 TokenType.AMP: exp.BitwiseAnd, 625 TokenType.CARET: exp.BitwiseXor, 626 TokenType.PIPE: exp.BitwiseOr, 627 } 628 629 TERM = { 630 TokenType.DASH: exp.Sub, 631 TokenType.PLUS: exp.Add, 632 TokenType.MOD: exp.Mod, 633 TokenType.COLLATE: exp.Collate, 634 } 635 636 FACTOR = { 637 TokenType.DIV: exp.IntDiv, 638 TokenType.LR_ARROW: exp.Distance, 639 TokenType.SLASH: exp.Div, 640 TokenType.STAR: exp.Mul, 641 } 642 643 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 644 645 TIMES = { 646 TokenType.TIME, 647 TokenType.TIMETZ, 648 } 649 650 TIMESTAMPS = { 651 TokenType.TIMESTAMP, 652 TokenType.TIMESTAMPTZ, 653 TokenType.TIMESTAMPLTZ, 654 *TIMES, 655 } 656 657 SET_OPERATIONS = { 658 TokenType.UNION, 659 TokenType.INTERSECT, 660 TokenType.EXCEPT, 661 } 662 663 JOIN_METHODS = { 664 TokenType.ASOF, 665 TokenType.NATURAL, 666 TokenType.POSITIONAL, 667 } 668 669 JOIN_SIDES = { 670 TokenType.LEFT, 671 TokenType.RIGHT, 672 TokenType.FULL, 673 } 674 675 JOIN_KINDS = { 676 TokenType.ANTI, 677 TokenType.CROSS, 678 TokenType.INNER, 679 TokenType.OUTER, 680 TokenType.SEMI, 681 TokenType.STRAIGHT_JOIN, 682 } 683 684 JOIN_HINTS: t.Set[str] = set() 685 686 LAMBDAS = { 687 TokenType.ARROW: lambda self, expressions: self.expression( 688 exp.Lambda, 689 this=self._replace_lambda( 690 self._parse_assignment(), 691 expressions, 692 ), 693 expressions=expressions, 694 ), 695 TokenType.FARROW: lambda self, expressions: self.expression( 696 exp.Kwarg, 697 this=exp.var(expressions[0].name), 698 expression=self._parse_assignment(), 699 ), 700 } 701 702 COLUMN_OPERATORS = { 703 TokenType.DOT: None, 704 TokenType.DCOLON: lambda self, this, to: self.expression( 705 exp.Cast if self.STRICT_CAST else exp.TryCast, 706 this=this, 707 to=to, 708 ), 709 TokenType.ARROW: lambda self, this, path: self.expression( 710 exp.JSONExtract, 711 this=this, 712 expression=self.dialect.to_json_path(path), 713 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 714 ), 715 TokenType.DARROW: lambda self, this, path: self.expression( 716 exp.JSONExtractScalar, 717 this=this, 718 expression=self.dialect.to_json_path(path), 719 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 720 ), 721 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 722 exp.JSONBExtract, 723 this=this, 724 expression=path, 725 ), 726 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 727 exp.JSONBExtractScalar, 728 this=this, 729 expression=path, 730 ), 731 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 732 exp.JSONBContains, 733 this=this, 734 expression=key, 735 ), 736 } 737 738 EXPRESSION_PARSERS = { 739 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 740 exp.Column: lambda self: self._parse_column(), 741 exp.Condition: lambda self: self._parse_assignment(), 742 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 743 exp.Expression: lambda self: self._parse_expression(), 744 exp.From: lambda self: self._parse_from(joins=True), 745 exp.Group: lambda self: self._parse_group(), 746 exp.Having: lambda self: self._parse_having(), 747 exp.Identifier: lambda self: self._parse_id_var(), 748 exp.Join: lambda self: self._parse_join(), 749 exp.Lambda: lambda self: self._parse_lambda(), 750 exp.Lateral: lambda self: self._parse_lateral(), 751 exp.Limit: lambda self: self._parse_limit(), 752 exp.Offset: lambda self: self._parse_offset(), 753 exp.Order: lambda self: self._parse_order(), 754 exp.Ordered: lambda self: self._parse_ordered(), 755 exp.Properties: lambda self: self._parse_properties(), 756 exp.Qualify: lambda self: self._parse_qualify(), 757 exp.Returning: lambda self: self._parse_returning(), 758 exp.Select: lambda self: self._parse_select(), 759 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 760 exp.Table: lambda self: self._parse_table_parts(), 761 exp.TableAlias: lambda self: self._parse_table_alias(), 762 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 763 exp.Where: lambda self: self._parse_where(), 764 exp.Window: lambda self: self._parse_named_window(), 765 exp.With: lambda self: self._parse_with(), 766 "JOIN_TYPE": lambda self: self._parse_join_parts(), 767 } 768 769 STATEMENT_PARSERS = { 770 TokenType.ALTER: lambda self: self._parse_alter(), 771 TokenType.BEGIN: lambda self: self._parse_transaction(), 772 TokenType.CACHE: lambda self: self._parse_cache(), 773 TokenType.COMMENT: lambda self: self._parse_comment(), 774 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 775 TokenType.COPY: lambda self: self._parse_copy(), 776 TokenType.CREATE: lambda self: self._parse_create(), 777 TokenType.DELETE: lambda self: self._parse_delete(), 778 TokenType.DESC: lambda self: self._parse_describe(), 779 TokenType.DESCRIBE: lambda self: self._parse_describe(), 780 TokenType.DROP: lambda self: self._parse_drop(), 781 TokenType.INSERT: lambda self: self._parse_insert(), 782 TokenType.KILL: lambda self: self._parse_kill(), 783 TokenType.LOAD: lambda self: self._parse_load(), 784 TokenType.MERGE: lambda self: self._parse_merge(), 785 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 786 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 787 TokenType.REFRESH: lambda self: self._parse_refresh(), 788 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 789 TokenType.SET: lambda self: self._parse_set(), 790 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 791 TokenType.UNCACHE: lambda self: self._parse_uncache(), 792 TokenType.UPDATE: lambda self: self._parse_update(), 793 TokenType.USE: lambda self: self.expression( 794 exp.Use, 795 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 796 this=self._parse_table(schema=False), 797 ), 798 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 799 } 800 801 UNARY_PARSERS = { 802 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 803 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 804 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 805 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 806 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 807 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 808 } 809 810 STRING_PARSERS = { 811 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 812 exp.RawString, this=token.text 813 ), 814 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 815 exp.National, this=token.text 816 ), 817 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 818 TokenType.STRING: lambda self, token: self.expression( 819 exp.Literal, this=token.text, is_string=True 820 ), 821 TokenType.UNICODE_STRING: lambda self, token: self.expression( 822 exp.UnicodeString, 823 this=token.text, 824 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 825 ), 826 } 827 828 NUMERIC_PARSERS = { 829 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 830 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 831 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 832 TokenType.NUMBER: lambda self, token: self.expression( 833 exp.Literal, this=token.text, is_string=False 834 ), 835 } 836 837 PRIMARY_PARSERS = { 838 **STRING_PARSERS, 839 **NUMERIC_PARSERS, 840 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 841 TokenType.NULL: lambda self, _: self.expression(exp.Null), 842 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 843 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 844 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 845 TokenType.STAR: lambda self, _: self._parse_star_ops(), 846 } 847 848 PLACEHOLDER_PARSERS = { 849 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 850 TokenType.PARAMETER: lambda self: self._parse_parameter(), 851 TokenType.COLON: lambda self: ( 852 self.expression(exp.Placeholder, this=self._prev.text) 853 if self._match_set(self.ID_VAR_TOKENS) 854 else None 855 ), 856 } 857 858 RANGE_PARSERS = { 859 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 860 TokenType.GLOB: binary_range_parser(exp.Glob), 861 TokenType.ILIKE: binary_range_parser(exp.ILike), 862 TokenType.IN: lambda self, this: self._parse_in(this), 863 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 864 TokenType.IS: lambda self, this: self._parse_is(this), 865 TokenType.LIKE: binary_range_parser(exp.Like), 866 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 867 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 868 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 869 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 870 } 871 872 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 873 "ALLOWED_VALUES": lambda self: self.expression( 874 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 875 ), 876 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 877 "AUTO": lambda self: self._parse_auto_property(), 878 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 879 "BACKUP": lambda self: self.expression( 880 exp.BackupProperty, this=self._parse_var(any_token=True) 881 ), 882 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 883 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 884 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 885 "CHECKSUM": lambda self: self._parse_checksum(), 886 "CLUSTER BY": lambda self: self._parse_cluster(), 887 "CLUSTERED": lambda self: self._parse_clustered_by(), 888 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 889 exp.CollateProperty, **kwargs 890 ), 891 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 892 "CONTAINS": lambda self: self._parse_contains_property(), 893 "COPY": lambda self: self._parse_copy_property(), 894 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 895 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 896 "DEFINER": lambda self: self._parse_definer(), 897 "DETERMINISTIC": lambda self: self.expression( 898 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 899 ), 900 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 901 "DUPLICATE": lambda self: self._parse_duplicate(), 902 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 903 "DISTKEY": lambda self: self._parse_distkey(), 904 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 905 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 906 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 907 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 908 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 909 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 910 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 911 "FREESPACE": lambda self: self._parse_freespace(), 912 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 913 "HEAP": lambda self: self.expression(exp.HeapProperty), 914 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 915 "IMMUTABLE": lambda self: self.expression( 916 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 917 ), 918 "INHERITS": lambda self: self.expression( 919 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 920 ), 921 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 922 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 923 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 924 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 925 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 926 "LIKE": lambda self: self._parse_create_like(), 927 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 928 "LOCK": lambda self: self._parse_locking(), 929 "LOCKING": lambda self: self._parse_locking(), 930 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 931 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 932 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 933 "MODIFIES": lambda self: self._parse_modifies_property(), 934 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 935 "NO": lambda self: self._parse_no_property(), 936 "ON": lambda self: self._parse_on_property(), 937 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 938 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 939 "PARTITION": lambda self: self._parse_partitioned_of(), 940 "PARTITION BY": lambda self: self._parse_partitioned_by(), 941 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 942 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 943 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 944 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 945 "READS": lambda self: self._parse_reads_property(), 946 "REMOTE": lambda self: self._parse_remote_with_connection(), 947 "RETURNS": lambda self: self._parse_returns(), 948 "STRICT": lambda self: self.expression(exp.StrictProperty), 949 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 950 "ROW": lambda self: self._parse_row(), 951 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 952 "SAMPLE": lambda self: self.expression( 953 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 954 ), 955 "SECURE": lambda self: self.expression(exp.SecureProperty), 956 "SECURITY": lambda self: self._parse_security(), 957 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 958 "SETTINGS": lambda self: self._parse_settings_property(), 959 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 960 "SORTKEY": lambda self: self._parse_sortkey(), 961 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 962 "STABLE": lambda self: self.expression( 963 exp.StabilityProperty, this=exp.Literal.string("STABLE") 964 ), 965 "STORED": lambda self: self._parse_stored(), 966 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 967 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 968 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 969 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 970 "TO": lambda self: self._parse_to_table(), 971 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 972 "TRANSFORM": lambda self: self.expression( 973 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 974 ), 975 "TTL": lambda self: self._parse_ttl(), 976 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 977 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 978 "VOLATILE": lambda self: self._parse_volatile_property(), 979 "WITH": lambda self: self._parse_with_property(), 980 } 981 982 CONSTRAINT_PARSERS = { 983 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 984 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 985 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 986 "CHARACTER SET": lambda self: self.expression( 987 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 988 ), 989 "CHECK": lambda self: self.expression( 990 exp.CheckColumnConstraint, 991 this=self._parse_wrapped(self._parse_assignment), 992 enforced=self._match_text_seq("ENFORCED"), 993 ), 994 "COLLATE": lambda self: self.expression( 995 exp.CollateColumnConstraint, 996 this=self._parse_identifier() or self._parse_column(), 997 ), 998 "COMMENT": lambda self: self.expression( 999 exp.CommentColumnConstraint, this=self._parse_string() 1000 ), 1001 "COMPRESS": lambda self: self._parse_compress(), 1002 "CLUSTERED": lambda self: self.expression( 1003 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1004 ), 1005 "NONCLUSTERED": lambda self: self.expression( 1006 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1007 ), 1008 "DEFAULT": lambda self: self.expression( 1009 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1010 ), 1011 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1012 "EPHEMERAL": lambda self: self.expression( 1013 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1014 ), 1015 "EXCLUDE": lambda self: self.expression( 1016 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1017 ), 1018 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1019 "FORMAT": lambda self: self.expression( 1020 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1021 ), 1022 "GENERATED": lambda self: self._parse_generated_as_identity(), 1023 "IDENTITY": lambda self: self._parse_auto_increment(), 1024 "INLINE": lambda self: self._parse_inline(), 1025 "LIKE": lambda self: self._parse_create_like(), 1026 "NOT": lambda self: self._parse_not_constraint(), 1027 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1028 "ON": lambda self: ( 1029 self._match(TokenType.UPDATE) 1030 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1031 ) 1032 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1033 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1034 "PERIOD": lambda self: self._parse_period_for_system_time(), 1035 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1036 "REFERENCES": lambda self: self._parse_references(match=False), 1037 "TITLE": lambda self: self.expression( 1038 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1039 ), 1040 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1041 "UNIQUE": lambda self: self._parse_unique(), 1042 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1043 "WITH": lambda self: self.expression( 1044 exp.Properties, expressions=self._parse_wrapped_properties() 1045 ), 1046 } 1047 1048 ALTER_PARSERS = { 1049 "ADD": lambda self: self._parse_alter_table_add(), 1050 "ALTER": lambda self: self._parse_alter_table_alter(), 1051 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1052 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1053 "DROP": lambda self: self._parse_alter_table_drop(), 1054 "RENAME": lambda self: self._parse_alter_table_rename(), 1055 "SET": lambda self: self._parse_alter_table_set(), 1056 "AS": lambda self: self._parse_select(), 1057 } 1058 1059 ALTER_ALTER_PARSERS = { 1060 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1061 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1062 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1063 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1064 } 1065 1066 SCHEMA_UNNAMED_CONSTRAINTS = { 1067 "CHECK", 1068 "EXCLUDE", 1069 "FOREIGN KEY", 1070 "LIKE", 1071 "PERIOD", 1072 "PRIMARY KEY", 1073 "UNIQUE", 1074 } 1075 1076 NO_PAREN_FUNCTION_PARSERS = { 1077 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1078 "CASE": lambda self: self._parse_case(), 1079 "CONNECT_BY_ROOT": lambda self: self.expression( 1080 exp.ConnectByRoot, this=self._parse_column() 1081 ), 1082 "IF": lambda self: self._parse_if(), 1083 "NEXT": lambda self: self._parse_next_value_for(), 1084 } 1085 1086 INVALID_FUNC_NAME_TOKENS = { 1087 TokenType.IDENTIFIER, 1088 TokenType.STRING, 1089 } 1090 1091 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1092 1093 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1094 1095 FUNCTION_PARSERS = { 1096 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1097 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1098 "DECODE": lambda self: self._parse_decode(), 1099 "EXTRACT": lambda self: self._parse_extract(), 1100 "GAP_FILL": lambda self: self._parse_gap_fill(), 1101 "JSON_OBJECT": lambda self: self._parse_json_object(), 1102 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1103 "JSON_TABLE": lambda self: self._parse_json_table(), 1104 "MATCH": lambda self: self._parse_match_against(), 1105 "NORMALIZE": lambda self: self._parse_normalize(), 1106 "OPENJSON": lambda self: self._parse_open_json(), 1107 "POSITION": lambda self: self._parse_position(), 1108 "PREDICT": lambda self: self._parse_predict(), 1109 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1110 "STRING_AGG": lambda self: self._parse_string_agg(), 1111 "SUBSTRING": lambda self: self._parse_substring(), 1112 "TRIM": lambda self: self._parse_trim(), 1113 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1114 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1115 } 1116 1117 QUERY_MODIFIER_PARSERS = { 1118 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1119 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1120 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1121 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1122 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1123 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1124 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1125 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1126 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1127 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1128 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1129 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1130 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1131 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1132 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1133 TokenType.CLUSTER_BY: lambda self: ( 1134 "cluster", 1135 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1136 ), 1137 TokenType.DISTRIBUTE_BY: lambda self: ( 1138 "distribute", 1139 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1140 ), 1141 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1142 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1143 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1144 } 1145 1146 SET_PARSERS = { 1147 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1148 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1149 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1150 "TRANSACTION": lambda self: self._parse_set_transaction(), 1151 } 1152 1153 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1154 1155 TYPE_LITERAL_PARSERS = { 1156 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1157 } 1158 1159 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1160 1161 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1162 1163 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1164 1165 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1166 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1167 "ISOLATION": ( 1168 ("LEVEL", "REPEATABLE", "READ"), 1169 ("LEVEL", "READ", "COMMITTED"), 1170 ("LEVEL", "READ", "UNCOMITTED"), 1171 ("LEVEL", "SERIALIZABLE"), 1172 ), 1173 "READ": ("WRITE", "ONLY"), 1174 } 1175 1176 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1177 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1178 ) 1179 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1180 1181 CREATE_SEQUENCE: OPTIONS_TYPE = { 1182 "SCALE": ("EXTEND", "NOEXTEND"), 1183 "SHARD": ("EXTEND", "NOEXTEND"), 1184 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1185 **dict.fromkeys( 1186 ( 1187 "SESSION", 1188 "GLOBAL", 1189 "KEEP", 1190 "NOKEEP", 1191 "ORDER", 1192 "NOORDER", 1193 "NOCACHE", 1194 "CYCLE", 1195 "NOCYCLE", 1196 "NOMINVALUE", 1197 "NOMAXVALUE", 1198 "NOSCALE", 1199 "NOSHARD", 1200 ), 1201 tuple(), 1202 ), 1203 } 1204 1205 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1206 1207 USABLES: OPTIONS_TYPE = dict.fromkeys( 1208 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1209 ) 1210 1211 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1212 1213 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1214 "TYPE": ("EVOLUTION",), 1215 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1216 } 1217 1218 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1219 "NOT": ("ENFORCED",), 1220 "MATCH": ( 1221 "FULL", 1222 "PARTIAL", 1223 "SIMPLE", 1224 ), 1225 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1226 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1227 } 1228 1229 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1230 1231 CLONE_KEYWORDS = {"CLONE", "COPY"} 1232 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1233 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1234 1235 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1236 1237 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1238 1239 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1240 1241 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1242 1243 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1244 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1245 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1246 1247 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1248 1249 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1250 1251 ADD_CONSTRAINT_TOKENS = { 1252 TokenType.CONSTRAINT, 1253 TokenType.FOREIGN_KEY, 1254 TokenType.INDEX, 1255 TokenType.KEY, 1256 TokenType.PRIMARY_KEY, 1257 TokenType.UNIQUE, 1258 } 1259 1260 DISTINCT_TOKENS = {TokenType.DISTINCT} 1261 1262 NULL_TOKENS = {TokenType.NULL} 1263 1264 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1265 1266 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1267 1268 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1269 1270 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1271 1272 ODBC_DATETIME_LITERALS = { 1273 "d": exp.Date, 1274 "t": exp.Time, 1275 "ts": exp.Timestamp, 1276 } 1277 1278 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1279 1280 STRICT_CAST = True 1281 1282 PREFIXED_PIVOT_COLUMNS = False 1283 IDENTIFY_PIVOT_STRINGS = False 1284 1285 LOG_DEFAULTS_TO_LN = False 1286 1287 # Whether ADD is present for each column added by ALTER TABLE 1288 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1289 1290 # Whether the table sample clause expects CSV syntax 1291 TABLESAMPLE_CSV = False 1292 1293 # The default method used for table sampling 1294 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1295 1296 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1297 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1298 1299 # Whether the TRIM function expects the characters to trim as its first argument 1300 TRIM_PATTERN_FIRST = False 1301 1302 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1303 STRING_ALIASES = False 1304 1305 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1306 MODIFIERS_ATTACHED_TO_SET_OP = True 1307 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1308 1309 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1310 NO_PAREN_IF_COMMANDS = True 1311 1312 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1313 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1314 1315 # Whether the `:` operator is used to extract a value from a VARIANT column 1316 COLON_IS_VARIANT_EXTRACT = False 1317 1318 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1319 # If this is True and '(' is not found, the keyword will be treated as an identifier 1320 VALUES_FOLLOWED_BY_PAREN = True 1321 1322 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1323 SUPPORTS_IMPLICIT_UNNEST = False 1324 1325 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1326 INTERVAL_SPANS = True 1327 1328 # Whether a PARTITION clause can follow a table reference 1329 SUPPORTS_PARTITION_SELECTION = False 1330 1331 __slots__ = ( 1332 "error_level", 1333 "error_message_context", 1334 "max_errors", 1335 "dialect", 1336 "sql", 1337 "errors", 1338 "_tokens", 1339 "_index", 1340 "_curr", 1341 "_next", 1342 "_prev", 1343 "_prev_comments", 1344 ) 1345 1346 # Autofilled 1347 SHOW_TRIE: t.Dict = {} 1348 SET_TRIE: t.Dict = {} 1349 1350 def __init__( 1351 self, 1352 error_level: t.Optional[ErrorLevel] = None, 1353 error_message_context: int = 100, 1354 max_errors: int = 3, 1355 dialect: DialectType = None, 1356 ): 1357 from sqlglot.dialects import Dialect 1358 1359 self.error_level = error_level or ErrorLevel.IMMEDIATE 1360 self.error_message_context = error_message_context 1361 self.max_errors = max_errors 1362 self.dialect = Dialect.get_or_raise(dialect) 1363 self.reset() 1364 1365 def reset(self): 1366 self.sql = "" 1367 self.errors = [] 1368 self._tokens = [] 1369 self._index = 0 1370 self._curr = None 1371 self._next = None 1372 self._prev = None 1373 self._prev_comments = None 1374 1375 def parse( 1376 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1377 ) -> t.List[t.Optional[exp.Expression]]: 1378 """ 1379 Parses a list of tokens and returns a list of syntax trees, one tree 1380 per parsed SQL statement. 1381 1382 Args: 1383 raw_tokens: The list of tokens. 1384 sql: The original SQL string, used to produce helpful debug messages. 1385 1386 Returns: 1387 The list of the produced syntax trees. 1388 """ 1389 return self._parse( 1390 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1391 ) 1392 1393 def parse_into( 1394 self, 1395 expression_types: exp.IntoType, 1396 raw_tokens: t.List[Token], 1397 sql: t.Optional[str] = None, 1398 ) -> t.List[t.Optional[exp.Expression]]: 1399 """ 1400 Parses a list of tokens into a given Expression type. If a collection of Expression 1401 types is given instead, this method will try to parse the token list into each one 1402 of them, stopping at the first for which the parsing succeeds. 1403 1404 Args: 1405 expression_types: The expression type(s) to try and parse the token list into. 1406 raw_tokens: The list of tokens. 1407 sql: The original SQL string, used to produce helpful debug messages. 1408 1409 Returns: 1410 The target Expression. 1411 """ 1412 errors = [] 1413 for expression_type in ensure_list(expression_types): 1414 parser = self.EXPRESSION_PARSERS.get(expression_type) 1415 if not parser: 1416 raise TypeError(f"No parser registered for {expression_type}") 1417 1418 try: 1419 return self._parse(parser, raw_tokens, sql) 1420 except ParseError as e: 1421 e.errors[0]["into_expression"] = expression_type 1422 errors.append(e) 1423 1424 raise ParseError( 1425 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1426 errors=merge_errors(errors), 1427 ) from errors[-1] 1428 1429 def _parse( 1430 self, 1431 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1432 raw_tokens: t.List[Token], 1433 sql: t.Optional[str] = None, 1434 ) -> t.List[t.Optional[exp.Expression]]: 1435 self.reset() 1436 self.sql = sql or "" 1437 1438 total = len(raw_tokens) 1439 chunks: t.List[t.List[Token]] = [[]] 1440 1441 for i, token in enumerate(raw_tokens): 1442 if token.token_type == TokenType.SEMICOLON: 1443 if token.comments: 1444 chunks.append([token]) 1445 1446 if i < total - 1: 1447 chunks.append([]) 1448 else: 1449 chunks[-1].append(token) 1450 1451 expressions = [] 1452 1453 for tokens in chunks: 1454 self._index = -1 1455 self._tokens = tokens 1456 self._advance() 1457 1458 expressions.append(parse_method(self)) 1459 1460 if self._index < len(self._tokens): 1461 self.raise_error("Invalid expression / Unexpected token") 1462 1463 self.check_errors() 1464 1465 return expressions 1466 1467 def check_errors(self) -> None: 1468 """Logs or raises any found errors, depending on the chosen error level setting.""" 1469 if self.error_level == ErrorLevel.WARN: 1470 for error in self.errors: 1471 logger.error(str(error)) 1472 elif self.error_level == ErrorLevel.RAISE and self.errors: 1473 raise ParseError( 1474 concat_messages(self.errors, self.max_errors), 1475 errors=merge_errors(self.errors), 1476 ) 1477 1478 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1479 """ 1480 Appends an error in the list of recorded errors or raises it, depending on the chosen 1481 error level setting. 1482 """ 1483 token = token or self._curr or self._prev or Token.string("") 1484 start = token.start 1485 end = token.end + 1 1486 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1487 highlight = self.sql[start:end] 1488 end_context = self.sql[end : end + self.error_message_context] 1489 1490 error = ParseError.new( 1491 f"{message}. Line {token.line}, Col: {token.col}.\n" 1492 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1493 description=message, 1494 line=token.line, 1495 col=token.col, 1496 start_context=start_context, 1497 highlight=highlight, 1498 end_context=end_context, 1499 ) 1500 1501 if self.error_level == ErrorLevel.IMMEDIATE: 1502 raise error 1503 1504 self.errors.append(error) 1505 1506 def expression( 1507 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1508 ) -> E: 1509 """ 1510 Creates a new, validated Expression. 1511 1512 Args: 1513 exp_class: The expression class to instantiate. 1514 comments: An optional list of comments to attach to the expression. 1515 kwargs: The arguments to set for the expression along with their respective values. 1516 1517 Returns: 1518 The target expression. 1519 """ 1520 instance = exp_class(**kwargs) 1521 instance.add_comments(comments) if comments else self._add_comments(instance) 1522 return self.validate_expression(instance) 1523 1524 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1525 if expression and self._prev_comments: 1526 expression.add_comments(self._prev_comments) 1527 self._prev_comments = None 1528 1529 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1530 """ 1531 Validates an Expression, making sure that all its mandatory arguments are set. 1532 1533 Args: 1534 expression: The expression to validate. 1535 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1536 1537 Returns: 1538 The validated expression. 1539 """ 1540 if self.error_level != ErrorLevel.IGNORE: 1541 for error_message in expression.error_messages(args): 1542 self.raise_error(error_message) 1543 1544 return expression 1545 1546 def _find_sql(self, start: Token, end: Token) -> str: 1547 return self.sql[start.start : end.end + 1] 1548 1549 def _is_connected(self) -> bool: 1550 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1551 1552 def _advance(self, times: int = 1) -> None: 1553 self._index += times 1554 self._curr = seq_get(self._tokens, self._index) 1555 self._next = seq_get(self._tokens, self._index + 1) 1556 1557 if self._index > 0: 1558 self._prev = self._tokens[self._index - 1] 1559 self._prev_comments = self._prev.comments 1560 else: 1561 self._prev = None 1562 self._prev_comments = None 1563 1564 def _retreat(self, index: int) -> None: 1565 if index != self._index: 1566 self._advance(index - self._index) 1567 1568 def _warn_unsupported(self) -> None: 1569 if len(self._tokens) <= 1: 1570 return 1571 1572 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1573 # interested in emitting a warning for the one being currently processed. 1574 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1575 1576 logger.warning( 1577 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1578 ) 1579 1580 def _parse_command(self) -> exp.Command: 1581 self._warn_unsupported() 1582 return self.expression( 1583 exp.Command, 1584 comments=self._prev_comments, 1585 this=self._prev.text.upper(), 1586 expression=self._parse_string(), 1587 ) 1588 1589 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1590 """ 1591 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1592 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1593 solve this by setting & resetting the parser state accordingly 1594 """ 1595 index = self._index 1596 error_level = self.error_level 1597 1598 self.error_level = ErrorLevel.IMMEDIATE 1599 try: 1600 this = parse_method() 1601 except ParseError: 1602 this = None 1603 finally: 1604 if not this or retreat: 1605 self._retreat(index) 1606 self.error_level = error_level 1607 1608 return this 1609 1610 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1611 start = self._prev 1612 exists = self._parse_exists() if allow_exists else None 1613 1614 self._match(TokenType.ON) 1615 1616 materialized = self._match_text_seq("MATERIALIZED") 1617 kind = self._match_set(self.CREATABLES) and self._prev 1618 if not kind: 1619 return self._parse_as_command(start) 1620 1621 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1622 this = self._parse_user_defined_function(kind=kind.token_type) 1623 elif kind.token_type == TokenType.TABLE: 1624 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1625 elif kind.token_type == TokenType.COLUMN: 1626 this = self._parse_column() 1627 else: 1628 this = self._parse_id_var() 1629 1630 self._match(TokenType.IS) 1631 1632 return self.expression( 1633 exp.Comment, 1634 this=this, 1635 kind=kind.text, 1636 expression=self._parse_string(), 1637 exists=exists, 1638 materialized=materialized, 1639 ) 1640 1641 def _parse_to_table( 1642 self, 1643 ) -> exp.ToTableProperty: 1644 table = self._parse_table_parts(schema=True) 1645 return self.expression(exp.ToTableProperty, this=table) 1646 1647 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1648 def _parse_ttl(self) -> exp.Expression: 1649 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1650 this = self._parse_bitwise() 1651 1652 if self._match_text_seq("DELETE"): 1653 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1654 if self._match_text_seq("RECOMPRESS"): 1655 return self.expression( 1656 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1657 ) 1658 if self._match_text_seq("TO", "DISK"): 1659 return self.expression( 1660 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1661 ) 1662 if self._match_text_seq("TO", "VOLUME"): 1663 return self.expression( 1664 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1665 ) 1666 1667 return this 1668 1669 expressions = self._parse_csv(_parse_ttl_action) 1670 where = self._parse_where() 1671 group = self._parse_group() 1672 1673 aggregates = None 1674 if group and self._match(TokenType.SET): 1675 aggregates = self._parse_csv(self._parse_set_item) 1676 1677 return self.expression( 1678 exp.MergeTreeTTL, 1679 expressions=expressions, 1680 where=where, 1681 group=group, 1682 aggregates=aggregates, 1683 ) 1684 1685 def _parse_statement(self) -> t.Optional[exp.Expression]: 1686 if self._curr is None: 1687 return None 1688 1689 if self._match_set(self.STATEMENT_PARSERS): 1690 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1691 1692 if self._match_set(self.dialect.tokenizer.COMMANDS): 1693 return self._parse_command() 1694 1695 expression = self._parse_expression() 1696 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1697 return self._parse_query_modifiers(expression) 1698 1699 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1700 start = self._prev 1701 temporary = self._match(TokenType.TEMPORARY) 1702 materialized = self._match_text_seq("MATERIALIZED") 1703 1704 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1705 if not kind: 1706 return self._parse_as_command(start) 1707 1708 concurrently = self._match_text_seq("CONCURRENTLY") 1709 if_exists = exists or self._parse_exists() 1710 table = self._parse_table_parts( 1711 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1712 ) 1713 1714 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1715 1716 if self._match(TokenType.L_PAREN, advance=False): 1717 expressions = self._parse_wrapped_csv(self._parse_types) 1718 else: 1719 expressions = None 1720 1721 return self.expression( 1722 exp.Drop, 1723 comments=start.comments, 1724 exists=if_exists, 1725 this=table, 1726 expressions=expressions, 1727 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1728 temporary=temporary, 1729 materialized=materialized, 1730 cascade=self._match_text_seq("CASCADE"), 1731 constraints=self._match_text_seq("CONSTRAINTS"), 1732 purge=self._match_text_seq("PURGE"), 1733 cluster=cluster, 1734 concurrently=concurrently, 1735 ) 1736 1737 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1738 return ( 1739 self._match_text_seq("IF") 1740 and (not not_ or self._match(TokenType.NOT)) 1741 and self._match(TokenType.EXISTS) 1742 ) 1743 1744 def _parse_create(self) -> exp.Create | exp.Command: 1745 # Note: this can't be None because we've matched a statement parser 1746 start = self._prev 1747 comments = self._prev_comments 1748 1749 replace = ( 1750 start.token_type == TokenType.REPLACE 1751 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1752 or self._match_pair(TokenType.OR, TokenType.ALTER) 1753 ) 1754 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1755 1756 unique = self._match(TokenType.UNIQUE) 1757 1758 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1759 clustered = True 1760 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1761 "COLUMNSTORE" 1762 ): 1763 clustered = False 1764 else: 1765 clustered = None 1766 1767 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1768 self._advance() 1769 1770 properties = None 1771 create_token = self._match_set(self.CREATABLES) and self._prev 1772 1773 if not create_token: 1774 # exp.Properties.Location.POST_CREATE 1775 properties = self._parse_properties() 1776 create_token = self._match_set(self.CREATABLES) and self._prev 1777 1778 if not properties or not create_token: 1779 return self._parse_as_command(start) 1780 1781 concurrently = self._match_text_seq("CONCURRENTLY") 1782 exists = self._parse_exists(not_=True) 1783 this = None 1784 expression: t.Optional[exp.Expression] = None 1785 indexes = None 1786 no_schema_binding = None 1787 begin = None 1788 end = None 1789 clone = None 1790 1791 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1792 nonlocal properties 1793 if properties and temp_props: 1794 properties.expressions.extend(temp_props.expressions) 1795 elif temp_props: 1796 properties = temp_props 1797 1798 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1799 this = self._parse_user_defined_function(kind=create_token.token_type) 1800 1801 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1802 extend_props(self._parse_properties()) 1803 1804 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1805 extend_props(self._parse_properties()) 1806 1807 if not expression: 1808 if self._match(TokenType.COMMAND): 1809 expression = self._parse_as_command(self._prev) 1810 else: 1811 begin = self._match(TokenType.BEGIN) 1812 return_ = self._match_text_seq("RETURN") 1813 1814 if self._match(TokenType.STRING, advance=False): 1815 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1816 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1817 expression = self._parse_string() 1818 extend_props(self._parse_properties()) 1819 else: 1820 expression = self._parse_statement() 1821 1822 end = self._match_text_seq("END") 1823 1824 if return_: 1825 expression = self.expression(exp.Return, this=expression) 1826 elif create_token.token_type == TokenType.INDEX: 1827 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1828 if not self._match(TokenType.ON): 1829 index = self._parse_id_var() 1830 anonymous = False 1831 else: 1832 index = None 1833 anonymous = True 1834 1835 this = self._parse_index(index=index, anonymous=anonymous) 1836 elif create_token.token_type in self.DB_CREATABLES: 1837 table_parts = self._parse_table_parts( 1838 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1839 ) 1840 1841 # exp.Properties.Location.POST_NAME 1842 self._match(TokenType.COMMA) 1843 extend_props(self._parse_properties(before=True)) 1844 1845 this = self._parse_schema(this=table_parts) 1846 1847 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1848 extend_props(self._parse_properties()) 1849 1850 self._match(TokenType.ALIAS) 1851 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1852 # exp.Properties.Location.POST_ALIAS 1853 extend_props(self._parse_properties()) 1854 1855 if create_token.token_type == TokenType.SEQUENCE: 1856 expression = self._parse_types() 1857 extend_props(self._parse_properties()) 1858 else: 1859 expression = self._parse_ddl_select() 1860 1861 if create_token.token_type == TokenType.TABLE: 1862 # exp.Properties.Location.POST_EXPRESSION 1863 extend_props(self._parse_properties()) 1864 1865 indexes = [] 1866 while True: 1867 index = self._parse_index() 1868 1869 # exp.Properties.Location.POST_INDEX 1870 extend_props(self._parse_properties()) 1871 if not index: 1872 break 1873 else: 1874 self._match(TokenType.COMMA) 1875 indexes.append(index) 1876 elif create_token.token_type == TokenType.VIEW: 1877 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1878 no_schema_binding = True 1879 1880 shallow = self._match_text_seq("SHALLOW") 1881 1882 if self._match_texts(self.CLONE_KEYWORDS): 1883 copy = self._prev.text.lower() == "copy" 1884 clone = self.expression( 1885 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1886 ) 1887 1888 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1889 return self._parse_as_command(start) 1890 1891 create_kind_text = create_token.text.upper() 1892 return self.expression( 1893 exp.Create, 1894 comments=comments, 1895 this=this, 1896 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1897 replace=replace, 1898 refresh=refresh, 1899 unique=unique, 1900 expression=expression, 1901 exists=exists, 1902 properties=properties, 1903 indexes=indexes, 1904 no_schema_binding=no_schema_binding, 1905 begin=begin, 1906 end=end, 1907 clone=clone, 1908 concurrently=concurrently, 1909 clustered=clustered, 1910 ) 1911 1912 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1913 seq = exp.SequenceProperties() 1914 1915 options = [] 1916 index = self._index 1917 1918 while self._curr: 1919 self._match(TokenType.COMMA) 1920 if self._match_text_seq("INCREMENT"): 1921 self._match_text_seq("BY") 1922 self._match_text_seq("=") 1923 seq.set("increment", self._parse_term()) 1924 elif self._match_text_seq("MINVALUE"): 1925 seq.set("minvalue", self._parse_term()) 1926 elif self._match_text_seq("MAXVALUE"): 1927 seq.set("maxvalue", self._parse_term()) 1928 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1929 self._match_text_seq("=") 1930 seq.set("start", self._parse_term()) 1931 elif self._match_text_seq("CACHE"): 1932 # T-SQL allows empty CACHE which is initialized dynamically 1933 seq.set("cache", self._parse_number() or True) 1934 elif self._match_text_seq("OWNED", "BY"): 1935 # "OWNED BY NONE" is the default 1936 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1937 else: 1938 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1939 if opt: 1940 options.append(opt) 1941 else: 1942 break 1943 1944 seq.set("options", options if options else None) 1945 return None if self._index == index else seq 1946 1947 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1948 # only used for teradata currently 1949 self._match(TokenType.COMMA) 1950 1951 kwargs = { 1952 "no": self._match_text_seq("NO"), 1953 "dual": self._match_text_seq("DUAL"), 1954 "before": self._match_text_seq("BEFORE"), 1955 "default": self._match_text_seq("DEFAULT"), 1956 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1957 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1958 "after": self._match_text_seq("AFTER"), 1959 "minimum": self._match_texts(("MIN", "MINIMUM")), 1960 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1961 } 1962 1963 if self._match_texts(self.PROPERTY_PARSERS): 1964 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1965 try: 1966 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1967 except TypeError: 1968 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1969 1970 return None 1971 1972 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1973 return self._parse_wrapped_csv(self._parse_property) 1974 1975 def _parse_property(self) -> t.Optional[exp.Expression]: 1976 if self._match_texts(self.PROPERTY_PARSERS): 1977 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1978 1979 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1980 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1981 1982 if self._match_text_seq("COMPOUND", "SORTKEY"): 1983 return self._parse_sortkey(compound=True) 1984 1985 if self._match_text_seq("SQL", "SECURITY"): 1986 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1987 1988 index = self._index 1989 key = self._parse_column() 1990 1991 if not self._match(TokenType.EQ): 1992 self._retreat(index) 1993 return self._parse_sequence_properties() 1994 1995 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1996 if isinstance(key, exp.Column): 1997 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1998 1999 value = self._parse_bitwise() or self._parse_var(any_token=True) 2000 2001 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2002 if isinstance(value, exp.Column): 2003 value = exp.var(value.name) 2004 2005 return self.expression(exp.Property, this=key, value=value) 2006 2007 def _parse_stored(self) -> exp.FileFormatProperty: 2008 self._match(TokenType.ALIAS) 2009 2010 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2011 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2012 2013 return self.expression( 2014 exp.FileFormatProperty, 2015 this=( 2016 self.expression( 2017 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2018 ) 2019 if input_format or output_format 2020 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2021 ), 2022 ) 2023 2024 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2025 field = self._parse_field() 2026 if isinstance(field, exp.Identifier) and not field.quoted: 2027 field = exp.var(field) 2028 2029 return field 2030 2031 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2032 self._match(TokenType.EQ) 2033 self._match(TokenType.ALIAS) 2034 2035 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2036 2037 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2038 properties = [] 2039 while True: 2040 if before: 2041 prop = self._parse_property_before() 2042 else: 2043 prop = self._parse_property() 2044 if not prop: 2045 break 2046 for p in ensure_list(prop): 2047 properties.append(p) 2048 2049 if properties: 2050 return self.expression(exp.Properties, expressions=properties) 2051 2052 return None 2053 2054 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2055 return self.expression( 2056 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2057 ) 2058 2059 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2060 if self._match_texts(("DEFINER", "INVOKER")): 2061 security_specifier = self._prev.text.upper() 2062 return self.expression(exp.SecurityProperty, this=security_specifier) 2063 return None 2064 2065 def _parse_settings_property(self) -> exp.SettingsProperty: 2066 return self.expression( 2067 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2068 ) 2069 2070 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2071 if self._index >= 2: 2072 pre_volatile_token = self._tokens[self._index - 2] 2073 else: 2074 pre_volatile_token = None 2075 2076 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2077 return exp.VolatileProperty() 2078 2079 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2080 2081 def _parse_retention_period(self) -> exp.Var: 2082 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2083 number = self._parse_number() 2084 number_str = f"{number} " if number else "" 2085 unit = self._parse_var(any_token=True) 2086 return exp.var(f"{number_str}{unit}") 2087 2088 def _parse_system_versioning_property( 2089 self, with_: bool = False 2090 ) -> exp.WithSystemVersioningProperty: 2091 self._match(TokenType.EQ) 2092 prop = self.expression( 2093 exp.WithSystemVersioningProperty, 2094 **{ # type: ignore 2095 "on": True, 2096 "with": with_, 2097 }, 2098 ) 2099 2100 if self._match_text_seq("OFF"): 2101 prop.set("on", False) 2102 return prop 2103 2104 self._match(TokenType.ON) 2105 if self._match(TokenType.L_PAREN): 2106 while self._curr and not self._match(TokenType.R_PAREN): 2107 if self._match_text_seq("HISTORY_TABLE", "="): 2108 prop.set("this", self._parse_table_parts()) 2109 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2110 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2111 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2112 prop.set("retention_period", self._parse_retention_period()) 2113 2114 self._match(TokenType.COMMA) 2115 2116 return prop 2117 2118 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2119 self._match(TokenType.EQ) 2120 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2121 prop = self.expression(exp.DataDeletionProperty, on=on) 2122 2123 if self._match(TokenType.L_PAREN): 2124 while self._curr and not self._match(TokenType.R_PAREN): 2125 if self._match_text_seq("FILTER_COLUMN", "="): 2126 prop.set("filter_column", self._parse_column()) 2127 elif self._match_text_seq("RETENTION_PERIOD", "="): 2128 prop.set("retention_period", self._parse_retention_period()) 2129 2130 self._match(TokenType.COMMA) 2131 2132 return prop 2133 2134 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2135 kind = "HASH" 2136 expressions: t.Optional[t.List[exp.Expression]] = None 2137 if self._match_text_seq("BY", "HASH"): 2138 expressions = self._parse_wrapped_csv(self._parse_id_var) 2139 elif self._match_text_seq("BY", "RANDOM"): 2140 kind = "RANDOM" 2141 2142 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2143 buckets: t.Optional[exp.Expression] = None 2144 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2145 buckets = self._parse_number() 2146 2147 return self.expression( 2148 exp.DistributedByProperty, 2149 expressions=expressions, 2150 kind=kind, 2151 buckets=buckets, 2152 order=self._parse_order(), 2153 ) 2154 2155 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2156 self._match_text_seq("KEY") 2157 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2158 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2159 2160 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2161 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2162 prop = self._parse_system_versioning_property(with_=True) 2163 self._match_r_paren() 2164 return prop 2165 2166 if self._match(TokenType.L_PAREN, advance=False): 2167 return self._parse_wrapped_properties() 2168 2169 if self._match_text_seq("JOURNAL"): 2170 return self._parse_withjournaltable() 2171 2172 if self._match_texts(self.VIEW_ATTRIBUTES): 2173 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2174 2175 if self._match_text_seq("DATA"): 2176 return self._parse_withdata(no=False) 2177 elif self._match_text_seq("NO", "DATA"): 2178 return self._parse_withdata(no=True) 2179 2180 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2181 return self._parse_serde_properties(with_=True) 2182 2183 if self._match(TokenType.SCHEMA): 2184 return self.expression( 2185 exp.WithSchemaBindingProperty, 2186 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2187 ) 2188 2189 if not self._next: 2190 return None 2191 2192 return self._parse_withisolatedloading() 2193 2194 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2195 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2196 self._match(TokenType.EQ) 2197 2198 user = self._parse_id_var() 2199 self._match(TokenType.PARAMETER) 2200 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2201 2202 if not user or not host: 2203 return None 2204 2205 return exp.DefinerProperty(this=f"{user}@{host}") 2206 2207 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2208 self._match(TokenType.TABLE) 2209 self._match(TokenType.EQ) 2210 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2211 2212 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2213 return self.expression(exp.LogProperty, no=no) 2214 2215 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2216 return self.expression(exp.JournalProperty, **kwargs) 2217 2218 def _parse_checksum(self) -> exp.ChecksumProperty: 2219 self._match(TokenType.EQ) 2220 2221 on = None 2222 if self._match(TokenType.ON): 2223 on = True 2224 elif self._match_text_seq("OFF"): 2225 on = False 2226 2227 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2228 2229 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2230 return self.expression( 2231 exp.Cluster, 2232 expressions=( 2233 self._parse_wrapped_csv(self._parse_ordered) 2234 if wrapped 2235 else self._parse_csv(self._parse_ordered) 2236 ), 2237 ) 2238 2239 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2240 self._match_text_seq("BY") 2241 2242 self._match_l_paren() 2243 expressions = self._parse_csv(self._parse_column) 2244 self._match_r_paren() 2245 2246 if self._match_text_seq("SORTED", "BY"): 2247 self._match_l_paren() 2248 sorted_by = self._parse_csv(self._parse_ordered) 2249 self._match_r_paren() 2250 else: 2251 sorted_by = None 2252 2253 self._match(TokenType.INTO) 2254 buckets = self._parse_number() 2255 self._match_text_seq("BUCKETS") 2256 2257 return self.expression( 2258 exp.ClusteredByProperty, 2259 expressions=expressions, 2260 sorted_by=sorted_by, 2261 buckets=buckets, 2262 ) 2263 2264 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2265 if not self._match_text_seq("GRANTS"): 2266 self._retreat(self._index - 1) 2267 return None 2268 2269 return self.expression(exp.CopyGrantsProperty) 2270 2271 def _parse_freespace(self) -> exp.FreespaceProperty: 2272 self._match(TokenType.EQ) 2273 return self.expression( 2274 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2275 ) 2276 2277 def _parse_mergeblockratio( 2278 self, no: bool = False, default: bool = False 2279 ) -> exp.MergeBlockRatioProperty: 2280 if self._match(TokenType.EQ): 2281 return self.expression( 2282 exp.MergeBlockRatioProperty, 2283 this=self._parse_number(), 2284 percent=self._match(TokenType.PERCENT), 2285 ) 2286 2287 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2288 2289 def _parse_datablocksize( 2290 self, 2291 default: t.Optional[bool] = None, 2292 minimum: t.Optional[bool] = None, 2293 maximum: t.Optional[bool] = None, 2294 ) -> exp.DataBlocksizeProperty: 2295 self._match(TokenType.EQ) 2296 size = self._parse_number() 2297 2298 units = None 2299 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2300 units = self._prev.text 2301 2302 return self.expression( 2303 exp.DataBlocksizeProperty, 2304 size=size, 2305 units=units, 2306 default=default, 2307 minimum=minimum, 2308 maximum=maximum, 2309 ) 2310 2311 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2312 self._match(TokenType.EQ) 2313 always = self._match_text_seq("ALWAYS") 2314 manual = self._match_text_seq("MANUAL") 2315 never = self._match_text_seq("NEVER") 2316 default = self._match_text_seq("DEFAULT") 2317 2318 autotemp = None 2319 if self._match_text_seq("AUTOTEMP"): 2320 autotemp = self._parse_schema() 2321 2322 return self.expression( 2323 exp.BlockCompressionProperty, 2324 always=always, 2325 manual=manual, 2326 never=never, 2327 default=default, 2328 autotemp=autotemp, 2329 ) 2330 2331 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2332 index = self._index 2333 no = self._match_text_seq("NO") 2334 concurrent = self._match_text_seq("CONCURRENT") 2335 2336 if not self._match_text_seq("ISOLATED", "LOADING"): 2337 self._retreat(index) 2338 return None 2339 2340 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2341 return self.expression( 2342 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2343 ) 2344 2345 def _parse_locking(self) -> exp.LockingProperty: 2346 if self._match(TokenType.TABLE): 2347 kind = "TABLE" 2348 elif self._match(TokenType.VIEW): 2349 kind = "VIEW" 2350 elif self._match(TokenType.ROW): 2351 kind = "ROW" 2352 elif self._match_text_seq("DATABASE"): 2353 kind = "DATABASE" 2354 else: 2355 kind = None 2356 2357 if kind in ("DATABASE", "TABLE", "VIEW"): 2358 this = self._parse_table_parts() 2359 else: 2360 this = None 2361 2362 if self._match(TokenType.FOR): 2363 for_or_in = "FOR" 2364 elif self._match(TokenType.IN): 2365 for_or_in = "IN" 2366 else: 2367 for_or_in = None 2368 2369 if self._match_text_seq("ACCESS"): 2370 lock_type = "ACCESS" 2371 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2372 lock_type = "EXCLUSIVE" 2373 elif self._match_text_seq("SHARE"): 2374 lock_type = "SHARE" 2375 elif self._match_text_seq("READ"): 2376 lock_type = "READ" 2377 elif self._match_text_seq("WRITE"): 2378 lock_type = "WRITE" 2379 elif self._match_text_seq("CHECKSUM"): 2380 lock_type = "CHECKSUM" 2381 else: 2382 lock_type = None 2383 2384 override = self._match_text_seq("OVERRIDE") 2385 2386 return self.expression( 2387 exp.LockingProperty, 2388 this=this, 2389 kind=kind, 2390 for_or_in=for_or_in, 2391 lock_type=lock_type, 2392 override=override, 2393 ) 2394 2395 def _parse_partition_by(self) -> t.List[exp.Expression]: 2396 if self._match(TokenType.PARTITION_BY): 2397 return self._parse_csv(self._parse_assignment) 2398 return [] 2399 2400 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2401 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2402 if self._match_text_seq("MINVALUE"): 2403 return exp.var("MINVALUE") 2404 if self._match_text_seq("MAXVALUE"): 2405 return exp.var("MAXVALUE") 2406 return self._parse_bitwise() 2407 2408 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2409 expression = None 2410 from_expressions = None 2411 to_expressions = None 2412 2413 if self._match(TokenType.IN): 2414 this = self._parse_wrapped_csv(self._parse_bitwise) 2415 elif self._match(TokenType.FROM): 2416 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2417 self._match_text_seq("TO") 2418 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2419 elif self._match_text_seq("WITH", "(", "MODULUS"): 2420 this = self._parse_number() 2421 self._match_text_seq(",", "REMAINDER") 2422 expression = self._parse_number() 2423 self._match_r_paren() 2424 else: 2425 self.raise_error("Failed to parse partition bound spec.") 2426 2427 return self.expression( 2428 exp.PartitionBoundSpec, 2429 this=this, 2430 expression=expression, 2431 from_expressions=from_expressions, 2432 to_expressions=to_expressions, 2433 ) 2434 2435 # https://www.postgresql.org/docs/current/sql-createtable.html 2436 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2437 if not self._match_text_seq("OF"): 2438 self._retreat(self._index - 1) 2439 return None 2440 2441 this = self._parse_table(schema=True) 2442 2443 if self._match(TokenType.DEFAULT): 2444 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2445 elif self._match_text_seq("FOR", "VALUES"): 2446 expression = self._parse_partition_bound_spec() 2447 else: 2448 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2449 2450 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2451 2452 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2453 self._match(TokenType.EQ) 2454 return self.expression( 2455 exp.PartitionedByProperty, 2456 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2457 ) 2458 2459 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2460 if self._match_text_seq("AND", "STATISTICS"): 2461 statistics = True 2462 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2463 statistics = False 2464 else: 2465 statistics = None 2466 2467 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2468 2469 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2470 if self._match_text_seq("SQL"): 2471 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2472 return None 2473 2474 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2475 if self._match_text_seq("SQL", "DATA"): 2476 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2477 return None 2478 2479 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2480 if self._match_text_seq("PRIMARY", "INDEX"): 2481 return exp.NoPrimaryIndexProperty() 2482 if self._match_text_seq("SQL"): 2483 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2484 return None 2485 2486 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2487 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2488 return exp.OnCommitProperty() 2489 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2490 return exp.OnCommitProperty(delete=True) 2491 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2492 2493 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2494 if self._match_text_seq("SQL", "DATA"): 2495 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2496 return None 2497 2498 def _parse_distkey(self) -> exp.DistKeyProperty: 2499 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2500 2501 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2502 table = self._parse_table(schema=True) 2503 2504 options = [] 2505 while self._match_texts(("INCLUDING", "EXCLUDING")): 2506 this = self._prev.text.upper() 2507 2508 id_var = self._parse_id_var() 2509 if not id_var: 2510 return None 2511 2512 options.append( 2513 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2514 ) 2515 2516 return self.expression(exp.LikeProperty, this=table, expressions=options) 2517 2518 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2519 return self.expression( 2520 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2521 ) 2522 2523 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2524 self._match(TokenType.EQ) 2525 return self.expression( 2526 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2527 ) 2528 2529 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2530 self._match_text_seq("WITH", "CONNECTION") 2531 return self.expression( 2532 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2533 ) 2534 2535 def _parse_returns(self) -> exp.ReturnsProperty: 2536 value: t.Optional[exp.Expression] 2537 null = None 2538 is_table = self._match(TokenType.TABLE) 2539 2540 if is_table: 2541 if self._match(TokenType.LT): 2542 value = self.expression( 2543 exp.Schema, 2544 this="TABLE", 2545 expressions=self._parse_csv(self._parse_struct_types), 2546 ) 2547 if not self._match(TokenType.GT): 2548 self.raise_error("Expecting >") 2549 else: 2550 value = self._parse_schema(exp.var("TABLE")) 2551 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2552 null = True 2553 value = None 2554 else: 2555 value = self._parse_types() 2556 2557 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2558 2559 def _parse_describe(self) -> exp.Describe: 2560 kind = self._match_set(self.CREATABLES) and self._prev.text 2561 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2562 if self._match(TokenType.DOT): 2563 style = None 2564 self._retreat(self._index - 2) 2565 this = self._parse_table(schema=True) 2566 properties = self._parse_properties() 2567 expressions = properties.expressions if properties else None 2568 partition = self._parse_partition() 2569 return self.expression( 2570 exp.Describe, 2571 this=this, 2572 style=style, 2573 kind=kind, 2574 expressions=expressions, 2575 partition=partition, 2576 ) 2577 2578 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2579 kind = self._prev.text.upper() 2580 expressions = [] 2581 2582 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2583 if self._match(TokenType.WHEN): 2584 expression = self._parse_disjunction() 2585 self._match(TokenType.THEN) 2586 else: 2587 expression = None 2588 2589 else_ = self._match(TokenType.ELSE) 2590 2591 if not self._match(TokenType.INTO): 2592 return None 2593 2594 return self.expression( 2595 exp.ConditionalInsert, 2596 this=self.expression( 2597 exp.Insert, 2598 this=self._parse_table(schema=True), 2599 expression=self._parse_derived_table_values(), 2600 ), 2601 expression=expression, 2602 else_=else_, 2603 ) 2604 2605 expression = parse_conditional_insert() 2606 while expression is not None: 2607 expressions.append(expression) 2608 expression = parse_conditional_insert() 2609 2610 return self.expression( 2611 exp.MultitableInserts, 2612 kind=kind, 2613 comments=comments, 2614 expressions=expressions, 2615 source=self._parse_table(), 2616 ) 2617 2618 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2619 comments = ensure_list(self._prev_comments) 2620 hint = self._parse_hint() 2621 overwrite = self._match(TokenType.OVERWRITE) 2622 ignore = self._match(TokenType.IGNORE) 2623 local = self._match_text_seq("LOCAL") 2624 alternative = None 2625 is_function = None 2626 2627 if self._match_text_seq("DIRECTORY"): 2628 this: t.Optional[exp.Expression] = self.expression( 2629 exp.Directory, 2630 this=self._parse_var_or_string(), 2631 local=local, 2632 row_format=self._parse_row_format(match_row=True), 2633 ) 2634 else: 2635 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2636 comments += ensure_list(self._prev_comments) 2637 return self._parse_multitable_inserts(comments) 2638 2639 if self._match(TokenType.OR): 2640 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2641 2642 self._match(TokenType.INTO) 2643 comments += ensure_list(self._prev_comments) 2644 self._match(TokenType.TABLE) 2645 is_function = self._match(TokenType.FUNCTION) 2646 2647 this = ( 2648 self._parse_table(schema=True, parse_partition=True) 2649 if not is_function 2650 else self._parse_function() 2651 ) 2652 2653 returning = self._parse_returning() 2654 2655 return self.expression( 2656 exp.Insert, 2657 comments=comments, 2658 hint=hint, 2659 is_function=is_function, 2660 this=this, 2661 stored=self._match_text_seq("STORED") and self._parse_stored(), 2662 by_name=self._match_text_seq("BY", "NAME"), 2663 exists=self._parse_exists(), 2664 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2665 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2666 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2667 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2668 conflict=self._parse_on_conflict(), 2669 returning=returning or self._parse_returning(), 2670 overwrite=overwrite, 2671 alternative=alternative, 2672 ignore=ignore, 2673 source=self._match(TokenType.TABLE) and self._parse_table(), 2674 ) 2675 2676 def _parse_kill(self) -> exp.Kill: 2677 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2678 2679 return self.expression( 2680 exp.Kill, 2681 this=self._parse_primary(), 2682 kind=kind, 2683 ) 2684 2685 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2686 conflict = self._match_text_seq("ON", "CONFLICT") 2687 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2688 2689 if not conflict and not duplicate: 2690 return None 2691 2692 conflict_keys = None 2693 constraint = None 2694 2695 if conflict: 2696 if self._match_text_seq("ON", "CONSTRAINT"): 2697 constraint = self._parse_id_var() 2698 elif self._match(TokenType.L_PAREN): 2699 conflict_keys = self._parse_csv(self._parse_id_var) 2700 self._match_r_paren() 2701 2702 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2703 if self._prev.token_type == TokenType.UPDATE: 2704 self._match(TokenType.SET) 2705 expressions = self._parse_csv(self._parse_equality) 2706 else: 2707 expressions = None 2708 2709 return self.expression( 2710 exp.OnConflict, 2711 duplicate=duplicate, 2712 expressions=expressions, 2713 action=action, 2714 conflict_keys=conflict_keys, 2715 constraint=constraint, 2716 ) 2717 2718 def _parse_returning(self) -> t.Optional[exp.Returning]: 2719 if not self._match(TokenType.RETURNING): 2720 return None 2721 return self.expression( 2722 exp.Returning, 2723 expressions=self._parse_csv(self._parse_expression), 2724 into=self._match(TokenType.INTO) and self._parse_table_part(), 2725 ) 2726 2727 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2728 if not self._match(TokenType.FORMAT): 2729 return None 2730 return self._parse_row_format() 2731 2732 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2733 index = self._index 2734 with_ = with_ or self._match_text_seq("WITH") 2735 2736 if not self._match(TokenType.SERDE_PROPERTIES): 2737 self._retreat(index) 2738 return None 2739 return self.expression( 2740 exp.SerdeProperties, 2741 **{ # type: ignore 2742 "expressions": self._parse_wrapped_properties(), 2743 "with": with_, 2744 }, 2745 ) 2746 2747 def _parse_row_format( 2748 self, match_row: bool = False 2749 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2750 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2751 return None 2752 2753 if self._match_text_seq("SERDE"): 2754 this = self._parse_string() 2755 2756 serde_properties = self._parse_serde_properties() 2757 2758 return self.expression( 2759 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2760 ) 2761 2762 self._match_text_seq("DELIMITED") 2763 2764 kwargs = {} 2765 2766 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2767 kwargs["fields"] = self._parse_string() 2768 if self._match_text_seq("ESCAPED", "BY"): 2769 kwargs["escaped"] = self._parse_string() 2770 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2771 kwargs["collection_items"] = self._parse_string() 2772 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2773 kwargs["map_keys"] = self._parse_string() 2774 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2775 kwargs["lines"] = self._parse_string() 2776 if self._match_text_seq("NULL", "DEFINED", "AS"): 2777 kwargs["null"] = self._parse_string() 2778 2779 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2780 2781 def _parse_load(self) -> exp.LoadData | exp.Command: 2782 if self._match_text_seq("DATA"): 2783 local = self._match_text_seq("LOCAL") 2784 self._match_text_seq("INPATH") 2785 inpath = self._parse_string() 2786 overwrite = self._match(TokenType.OVERWRITE) 2787 self._match_pair(TokenType.INTO, TokenType.TABLE) 2788 2789 return self.expression( 2790 exp.LoadData, 2791 this=self._parse_table(schema=True), 2792 local=local, 2793 overwrite=overwrite, 2794 inpath=inpath, 2795 partition=self._parse_partition(), 2796 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2797 serde=self._match_text_seq("SERDE") and self._parse_string(), 2798 ) 2799 return self._parse_as_command(self._prev) 2800 2801 def _parse_delete(self) -> exp.Delete: 2802 # This handles MySQL's "Multiple-Table Syntax" 2803 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2804 tables = None 2805 comments = self._prev_comments 2806 if not self._match(TokenType.FROM, advance=False): 2807 tables = self._parse_csv(self._parse_table) or None 2808 2809 returning = self._parse_returning() 2810 2811 return self.expression( 2812 exp.Delete, 2813 comments=comments, 2814 tables=tables, 2815 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2816 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2817 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2818 where=self._parse_where(), 2819 returning=returning or self._parse_returning(), 2820 limit=self._parse_limit(), 2821 ) 2822 2823 def _parse_update(self) -> exp.Update: 2824 comments = self._prev_comments 2825 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2826 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2827 returning = self._parse_returning() 2828 return self.expression( 2829 exp.Update, 2830 comments=comments, 2831 **{ # type: ignore 2832 "this": this, 2833 "expressions": expressions, 2834 "from": self._parse_from(joins=True), 2835 "where": self._parse_where(), 2836 "returning": returning or self._parse_returning(), 2837 "order": self._parse_order(), 2838 "limit": self._parse_limit(), 2839 }, 2840 ) 2841 2842 def _parse_uncache(self) -> exp.Uncache: 2843 if not self._match(TokenType.TABLE): 2844 self.raise_error("Expecting TABLE after UNCACHE") 2845 2846 return self.expression( 2847 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2848 ) 2849 2850 def _parse_cache(self) -> exp.Cache: 2851 lazy = self._match_text_seq("LAZY") 2852 self._match(TokenType.TABLE) 2853 table = self._parse_table(schema=True) 2854 2855 options = [] 2856 if self._match_text_seq("OPTIONS"): 2857 self._match_l_paren() 2858 k = self._parse_string() 2859 self._match(TokenType.EQ) 2860 v = self._parse_string() 2861 options = [k, v] 2862 self._match_r_paren() 2863 2864 self._match(TokenType.ALIAS) 2865 return self.expression( 2866 exp.Cache, 2867 this=table, 2868 lazy=lazy, 2869 options=options, 2870 expression=self._parse_select(nested=True), 2871 ) 2872 2873 def _parse_partition(self) -> t.Optional[exp.Partition]: 2874 if not self._match(TokenType.PARTITION): 2875 return None 2876 2877 return self.expression( 2878 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2879 ) 2880 2881 def _parse_value(self) -> t.Optional[exp.Tuple]: 2882 if self._match(TokenType.L_PAREN): 2883 expressions = self._parse_csv(self._parse_expression) 2884 self._match_r_paren() 2885 return self.expression(exp.Tuple, expressions=expressions) 2886 2887 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2888 expression = self._parse_expression() 2889 if expression: 2890 return self.expression(exp.Tuple, expressions=[expression]) 2891 return None 2892 2893 def _parse_projections(self) -> t.List[exp.Expression]: 2894 return self._parse_expressions() 2895 2896 def _parse_select( 2897 self, 2898 nested: bool = False, 2899 table: bool = False, 2900 parse_subquery_alias: bool = True, 2901 parse_set_operation: bool = True, 2902 ) -> t.Optional[exp.Expression]: 2903 cte = self._parse_with() 2904 2905 if cte: 2906 this = self._parse_statement() 2907 2908 if not this: 2909 self.raise_error("Failed to parse any statement following CTE") 2910 return cte 2911 2912 if "with" in this.arg_types: 2913 this.set("with", cte) 2914 else: 2915 self.raise_error(f"{this.key} does not support CTE") 2916 this = cte 2917 2918 return this 2919 2920 # duckdb supports leading with FROM x 2921 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2922 2923 if self._match(TokenType.SELECT): 2924 comments = self._prev_comments 2925 2926 hint = self._parse_hint() 2927 2928 if self._next and not self._next.token_type == TokenType.DOT: 2929 all_ = self._match(TokenType.ALL) 2930 distinct = self._match_set(self.DISTINCT_TOKENS) 2931 else: 2932 all_, distinct = None, None 2933 2934 kind = ( 2935 self._match(TokenType.ALIAS) 2936 and self._match_texts(("STRUCT", "VALUE")) 2937 and self._prev.text.upper() 2938 ) 2939 2940 if distinct: 2941 distinct = self.expression( 2942 exp.Distinct, 2943 on=self._parse_value() if self._match(TokenType.ON) else None, 2944 ) 2945 2946 if all_ and distinct: 2947 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2948 2949 limit = self._parse_limit(top=True) 2950 projections = self._parse_projections() 2951 2952 this = self.expression( 2953 exp.Select, 2954 kind=kind, 2955 hint=hint, 2956 distinct=distinct, 2957 expressions=projections, 2958 limit=limit, 2959 ) 2960 this.comments = comments 2961 2962 into = self._parse_into() 2963 if into: 2964 this.set("into", into) 2965 2966 if not from_: 2967 from_ = self._parse_from() 2968 2969 if from_: 2970 this.set("from", from_) 2971 2972 this = self._parse_query_modifiers(this) 2973 elif (table or nested) and self._match(TokenType.L_PAREN): 2974 if self._match(TokenType.PIVOT): 2975 this = self._parse_simplified_pivot() 2976 elif self._match(TokenType.FROM): 2977 this = exp.select("*").from_( 2978 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2979 ) 2980 else: 2981 this = ( 2982 self._parse_table() 2983 if table 2984 else self._parse_select(nested=True, parse_set_operation=False) 2985 ) 2986 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2987 2988 self._match_r_paren() 2989 2990 # We return early here so that the UNION isn't attached to the subquery by the 2991 # following call to _parse_set_operations, but instead becomes the parent node 2992 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2993 elif self._match(TokenType.VALUES, advance=False): 2994 this = self._parse_derived_table_values() 2995 elif from_: 2996 this = exp.select("*").from_(from_.this, copy=False) 2997 elif self._match(TokenType.SUMMARIZE): 2998 table = self._match(TokenType.TABLE) 2999 this = self._parse_select() or self._parse_string() or self._parse_table() 3000 return self.expression(exp.Summarize, this=this, table=table) 3001 elif self._match(TokenType.DESCRIBE): 3002 this = self._parse_describe() 3003 elif self._match_text_seq("STREAM"): 3004 this = self.expression(exp.Stream, this=self._parse_function()) 3005 else: 3006 this = None 3007 3008 return self._parse_set_operations(this) if parse_set_operation else this 3009 3010 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3011 if not skip_with_token and not self._match(TokenType.WITH): 3012 return None 3013 3014 comments = self._prev_comments 3015 recursive = self._match(TokenType.RECURSIVE) 3016 3017 expressions = [] 3018 while True: 3019 expressions.append(self._parse_cte()) 3020 3021 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3022 break 3023 else: 3024 self._match(TokenType.WITH) 3025 3026 return self.expression( 3027 exp.With, comments=comments, expressions=expressions, recursive=recursive 3028 ) 3029 3030 def _parse_cte(self) -> exp.CTE: 3031 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3032 if not alias or not alias.this: 3033 self.raise_error("Expected CTE to have alias") 3034 3035 self._match(TokenType.ALIAS) 3036 comments = self._prev_comments 3037 3038 if self._match_text_seq("NOT", "MATERIALIZED"): 3039 materialized = False 3040 elif self._match_text_seq("MATERIALIZED"): 3041 materialized = True 3042 else: 3043 materialized = None 3044 3045 return self.expression( 3046 exp.CTE, 3047 this=self._parse_wrapped(self._parse_statement), 3048 alias=alias, 3049 materialized=materialized, 3050 comments=comments, 3051 ) 3052 3053 def _parse_table_alias( 3054 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3055 ) -> t.Optional[exp.TableAlias]: 3056 any_token = self._match(TokenType.ALIAS) 3057 alias = ( 3058 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3059 or self._parse_string_as_identifier() 3060 ) 3061 3062 index = self._index 3063 if self._match(TokenType.L_PAREN): 3064 columns = self._parse_csv(self._parse_function_parameter) 3065 self._match_r_paren() if columns else self._retreat(index) 3066 else: 3067 columns = None 3068 3069 if not alias and not columns: 3070 return None 3071 3072 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3073 3074 # We bubble up comments from the Identifier to the TableAlias 3075 if isinstance(alias, exp.Identifier): 3076 table_alias.add_comments(alias.pop_comments()) 3077 3078 return table_alias 3079 3080 def _parse_subquery( 3081 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3082 ) -> t.Optional[exp.Subquery]: 3083 if not this: 3084 return None 3085 3086 return self.expression( 3087 exp.Subquery, 3088 this=this, 3089 pivots=self._parse_pivots(), 3090 alias=self._parse_table_alias() if parse_alias else None, 3091 sample=self._parse_table_sample(), 3092 ) 3093 3094 def _implicit_unnests_to_explicit(self, this: E) -> E: 3095 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3096 3097 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3098 for i, join in enumerate(this.args.get("joins") or []): 3099 table = join.this 3100 normalized_table = table.copy() 3101 normalized_table.meta["maybe_column"] = True 3102 normalized_table = _norm(normalized_table, dialect=self.dialect) 3103 3104 if isinstance(table, exp.Table) and not join.args.get("on"): 3105 if normalized_table.parts[0].name in refs: 3106 table_as_column = table.to_column() 3107 unnest = exp.Unnest(expressions=[table_as_column]) 3108 3109 # Table.to_column creates a parent Alias node that we want to convert to 3110 # a TableAlias and attach to the Unnest, so it matches the parser's output 3111 if isinstance(table.args.get("alias"), exp.TableAlias): 3112 table_as_column.replace(table_as_column.this) 3113 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3114 3115 table.replace(unnest) 3116 3117 refs.add(normalized_table.alias_or_name) 3118 3119 return this 3120 3121 def _parse_query_modifiers( 3122 self, this: t.Optional[exp.Expression] 3123 ) -> t.Optional[exp.Expression]: 3124 if isinstance(this, (exp.Query, exp.Table)): 3125 for join in self._parse_joins(): 3126 this.append("joins", join) 3127 for lateral in iter(self._parse_lateral, None): 3128 this.append("laterals", lateral) 3129 3130 while True: 3131 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3132 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3133 key, expression = parser(self) 3134 3135 if expression: 3136 this.set(key, expression) 3137 if key == "limit": 3138 offset = expression.args.pop("offset", None) 3139 3140 if offset: 3141 offset = exp.Offset(expression=offset) 3142 this.set("offset", offset) 3143 3144 limit_by_expressions = expression.expressions 3145 expression.set("expressions", None) 3146 offset.set("expressions", limit_by_expressions) 3147 continue 3148 break 3149 3150 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3151 this = self._implicit_unnests_to_explicit(this) 3152 3153 return this 3154 3155 def _parse_hint(self) -> t.Optional[exp.Hint]: 3156 if self._match(TokenType.HINT): 3157 hints = [] 3158 for hint in iter( 3159 lambda: self._parse_csv( 3160 lambda: self._parse_function() or self._parse_var(upper=True) 3161 ), 3162 [], 3163 ): 3164 hints.extend(hint) 3165 3166 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3167 self.raise_error("Expected */ after HINT") 3168 3169 return self.expression(exp.Hint, expressions=hints) 3170 3171 return None 3172 3173 def _parse_into(self) -> t.Optional[exp.Into]: 3174 if not self._match(TokenType.INTO): 3175 return None 3176 3177 temp = self._match(TokenType.TEMPORARY) 3178 unlogged = self._match_text_seq("UNLOGGED") 3179 self._match(TokenType.TABLE) 3180 3181 return self.expression( 3182 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3183 ) 3184 3185 def _parse_from( 3186 self, joins: bool = False, skip_from_token: bool = False 3187 ) -> t.Optional[exp.From]: 3188 if not skip_from_token and not self._match(TokenType.FROM): 3189 return None 3190 3191 return self.expression( 3192 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3193 ) 3194 3195 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3196 return self.expression( 3197 exp.MatchRecognizeMeasure, 3198 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3199 this=self._parse_expression(), 3200 ) 3201 3202 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3203 if not self._match(TokenType.MATCH_RECOGNIZE): 3204 return None 3205 3206 self._match_l_paren() 3207 3208 partition = self._parse_partition_by() 3209 order = self._parse_order() 3210 3211 measures = ( 3212 self._parse_csv(self._parse_match_recognize_measure) 3213 if self._match_text_seq("MEASURES") 3214 else None 3215 ) 3216 3217 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3218 rows = exp.var("ONE ROW PER MATCH") 3219 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3220 text = "ALL ROWS PER MATCH" 3221 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3222 text += " SHOW EMPTY MATCHES" 3223 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3224 text += " OMIT EMPTY MATCHES" 3225 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3226 text += " WITH UNMATCHED ROWS" 3227 rows = exp.var(text) 3228 else: 3229 rows = None 3230 3231 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3232 text = "AFTER MATCH SKIP" 3233 if self._match_text_seq("PAST", "LAST", "ROW"): 3234 text += " PAST LAST ROW" 3235 elif self._match_text_seq("TO", "NEXT", "ROW"): 3236 text += " TO NEXT ROW" 3237 elif self._match_text_seq("TO", "FIRST"): 3238 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3239 elif self._match_text_seq("TO", "LAST"): 3240 text += f" TO LAST {self._advance_any().text}" # type: ignore 3241 after = exp.var(text) 3242 else: 3243 after = None 3244 3245 if self._match_text_seq("PATTERN"): 3246 self._match_l_paren() 3247 3248 if not self._curr: 3249 self.raise_error("Expecting )", self._curr) 3250 3251 paren = 1 3252 start = self._curr 3253 3254 while self._curr and paren > 0: 3255 if self._curr.token_type == TokenType.L_PAREN: 3256 paren += 1 3257 if self._curr.token_type == TokenType.R_PAREN: 3258 paren -= 1 3259 3260 end = self._prev 3261 self._advance() 3262 3263 if paren > 0: 3264 self.raise_error("Expecting )", self._curr) 3265 3266 pattern = exp.var(self._find_sql(start, end)) 3267 else: 3268 pattern = None 3269 3270 define = ( 3271 self._parse_csv(self._parse_name_as_expression) 3272 if self._match_text_seq("DEFINE") 3273 else None 3274 ) 3275 3276 self._match_r_paren() 3277 3278 return self.expression( 3279 exp.MatchRecognize, 3280 partition_by=partition, 3281 order=order, 3282 measures=measures, 3283 rows=rows, 3284 after=after, 3285 pattern=pattern, 3286 define=define, 3287 alias=self._parse_table_alias(), 3288 ) 3289 3290 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3291 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3292 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3293 cross_apply = False 3294 3295 if cross_apply is not None: 3296 this = self._parse_select(table=True) 3297 view = None 3298 outer = None 3299 elif self._match(TokenType.LATERAL): 3300 this = self._parse_select(table=True) 3301 view = self._match(TokenType.VIEW) 3302 outer = self._match(TokenType.OUTER) 3303 else: 3304 return None 3305 3306 if not this: 3307 this = ( 3308 self._parse_unnest() 3309 or self._parse_function() 3310 or self._parse_id_var(any_token=False) 3311 ) 3312 3313 while self._match(TokenType.DOT): 3314 this = exp.Dot( 3315 this=this, 3316 expression=self._parse_function() or self._parse_id_var(any_token=False), 3317 ) 3318 3319 if view: 3320 table = self._parse_id_var(any_token=False) 3321 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3322 table_alias: t.Optional[exp.TableAlias] = self.expression( 3323 exp.TableAlias, this=table, columns=columns 3324 ) 3325 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3326 # We move the alias from the lateral's child node to the lateral itself 3327 table_alias = this.args["alias"].pop() 3328 else: 3329 table_alias = self._parse_table_alias() 3330 3331 return self.expression( 3332 exp.Lateral, 3333 this=this, 3334 view=view, 3335 outer=outer, 3336 alias=table_alias, 3337 cross_apply=cross_apply, 3338 ) 3339 3340 def _parse_join_parts( 3341 self, 3342 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3343 return ( 3344 self._match_set(self.JOIN_METHODS) and self._prev, 3345 self._match_set(self.JOIN_SIDES) and self._prev, 3346 self._match_set(self.JOIN_KINDS) and self._prev, 3347 ) 3348 3349 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3350 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3351 this = self._parse_column() 3352 if isinstance(this, exp.Column): 3353 return this.this 3354 return this 3355 3356 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3357 3358 def _parse_join( 3359 self, skip_join_token: bool = False, parse_bracket: bool = False 3360 ) -> t.Optional[exp.Join]: 3361 if self._match(TokenType.COMMA): 3362 return self.expression(exp.Join, this=self._parse_table()) 3363 3364 index = self._index 3365 method, side, kind = self._parse_join_parts() 3366 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3367 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3368 3369 if not skip_join_token and not join: 3370 self._retreat(index) 3371 kind = None 3372 method = None 3373 side = None 3374 3375 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3376 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3377 3378 if not skip_join_token and not join and not outer_apply and not cross_apply: 3379 return None 3380 3381 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3382 3383 if method: 3384 kwargs["method"] = method.text 3385 if side: 3386 kwargs["side"] = side.text 3387 if kind: 3388 kwargs["kind"] = kind.text 3389 if hint: 3390 kwargs["hint"] = hint 3391 3392 if self._match(TokenType.MATCH_CONDITION): 3393 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3394 3395 if self._match(TokenType.ON): 3396 kwargs["on"] = self._parse_assignment() 3397 elif self._match(TokenType.USING): 3398 kwargs["using"] = self._parse_using_identifiers() 3399 elif ( 3400 not (outer_apply or cross_apply) 3401 and not isinstance(kwargs["this"], exp.Unnest) 3402 and not (kind and kind.token_type == TokenType.CROSS) 3403 ): 3404 index = self._index 3405 joins: t.Optional[list] = list(self._parse_joins()) 3406 3407 if joins and self._match(TokenType.ON): 3408 kwargs["on"] = self._parse_assignment() 3409 elif joins and self._match(TokenType.USING): 3410 kwargs["using"] = self._parse_using_identifiers() 3411 else: 3412 joins = None 3413 self._retreat(index) 3414 3415 kwargs["this"].set("joins", joins if joins else None) 3416 3417 comments = [c for token in (method, side, kind) if token for c in token.comments] 3418 return self.expression(exp.Join, comments=comments, **kwargs) 3419 3420 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3421 this = self._parse_assignment() 3422 3423 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3424 return this 3425 3426 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3427 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3428 3429 return this 3430 3431 def _parse_index_params(self) -> exp.IndexParameters: 3432 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3433 3434 if self._match(TokenType.L_PAREN, advance=False): 3435 columns = self._parse_wrapped_csv(self._parse_with_operator) 3436 else: 3437 columns = None 3438 3439 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3440 partition_by = self._parse_partition_by() 3441 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3442 tablespace = ( 3443 self._parse_var(any_token=True) 3444 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3445 else None 3446 ) 3447 where = self._parse_where() 3448 3449 on = self._parse_field() if self._match(TokenType.ON) else None 3450 3451 return self.expression( 3452 exp.IndexParameters, 3453 using=using, 3454 columns=columns, 3455 include=include, 3456 partition_by=partition_by, 3457 where=where, 3458 with_storage=with_storage, 3459 tablespace=tablespace, 3460 on=on, 3461 ) 3462 3463 def _parse_index( 3464 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3465 ) -> t.Optional[exp.Index]: 3466 if index or anonymous: 3467 unique = None 3468 primary = None 3469 amp = None 3470 3471 self._match(TokenType.ON) 3472 self._match(TokenType.TABLE) # hive 3473 table = self._parse_table_parts(schema=True) 3474 else: 3475 unique = self._match(TokenType.UNIQUE) 3476 primary = self._match_text_seq("PRIMARY") 3477 amp = self._match_text_seq("AMP") 3478 3479 if not self._match(TokenType.INDEX): 3480 return None 3481 3482 index = self._parse_id_var() 3483 table = None 3484 3485 params = self._parse_index_params() 3486 3487 return self.expression( 3488 exp.Index, 3489 this=index, 3490 table=table, 3491 unique=unique, 3492 primary=primary, 3493 amp=amp, 3494 params=params, 3495 ) 3496 3497 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3498 hints: t.List[exp.Expression] = [] 3499 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3500 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3501 hints.append( 3502 self.expression( 3503 exp.WithTableHint, 3504 expressions=self._parse_csv( 3505 lambda: self._parse_function() or self._parse_var(any_token=True) 3506 ), 3507 ) 3508 ) 3509 self._match_r_paren() 3510 else: 3511 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3512 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3513 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3514 3515 self._match_set((TokenType.INDEX, TokenType.KEY)) 3516 if self._match(TokenType.FOR): 3517 hint.set("target", self._advance_any() and self._prev.text.upper()) 3518 3519 hint.set("expressions", self._parse_wrapped_id_vars()) 3520 hints.append(hint) 3521 3522 return hints or None 3523 3524 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3525 return ( 3526 (not schema and self._parse_function(optional_parens=False)) 3527 or self._parse_id_var(any_token=False) 3528 or self._parse_string_as_identifier() 3529 or self._parse_placeholder() 3530 ) 3531 3532 def _parse_table_parts( 3533 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3534 ) -> exp.Table: 3535 catalog = None 3536 db = None 3537 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3538 3539 while self._match(TokenType.DOT): 3540 if catalog: 3541 # This allows nesting the table in arbitrarily many dot expressions if needed 3542 table = self.expression( 3543 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3544 ) 3545 else: 3546 catalog = db 3547 db = table 3548 # "" used for tsql FROM a..b case 3549 table = self._parse_table_part(schema=schema) or "" 3550 3551 if ( 3552 wildcard 3553 and self._is_connected() 3554 and (isinstance(table, exp.Identifier) or not table) 3555 and self._match(TokenType.STAR) 3556 ): 3557 if isinstance(table, exp.Identifier): 3558 table.args["this"] += "*" 3559 else: 3560 table = exp.Identifier(this="*") 3561 3562 # We bubble up comments from the Identifier to the Table 3563 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3564 3565 if is_db_reference: 3566 catalog = db 3567 db = table 3568 table = None 3569 3570 if not table and not is_db_reference: 3571 self.raise_error(f"Expected table name but got {self._curr}") 3572 if not db and is_db_reference: 3573 self.raise_error(f"Expected database name but got {self._curr}") 3574 3575 table = self.expression( 3576 exp.Table, 3577 comments=comments, 3578 this=table, 3579 db=db, 3580 catalog=catalog, 3581 ) 3582 3583 changes = self._parse_changes() 3584 if changes: 3585 table.set("changes", changes) 3586 3587 at_before = self._parse_historical_data() 3588 if at_before: 3589 table.set("when", at_before) 3590 3591 pivots = self._parse_pivots() 3592 if pivots: 3593 table.set("pivots", pivots) 3594 3595 return table 3596 3597 def _parse_table( 3598 self, 3599 schema: bool = False, 3600 joins: bool = False, 3601 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3602 parse_bracket: bool = False, 3603 is_db_reference: bool = False, 3604 parse_partition: bool = False, 3605 ) -> t.Optional[exp.Expression]: 3606 lateral = self._parse_lateral() 3607 if lateral: 3608 return lateral 3609 3610 unnest = self._parse_unnest() 3611 if unnest: 3612 return unnest 3613 3614 values = self._parse_derived_table_values() 3615 if values: 3616 return values 3617 3618 subquery = self._parse_select(table=True) 3619 if subquery: 3620 if not subquery.args.get("pivots"): 3621 subquery.set("pivots", self._parse_pivots()) 3622 return subquery 3623 3624 bracket = parse_bracket and self._parse_bracket(None) 3625 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3626 3627 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3628 self._parse_table 3629 ) 3630 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3631 3632 only = self._match(TokenType.ONLY) 3633 3634 this = t.cast( 3635 exp.Expression, 3636 bracket 3637 or rows_from 3638 or self._parse_bracket( 3639 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3640 ), 3641 ) 3642 3643 if only: 3644 this.set("only", only) 3645 3646 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3647 self._match_text_seq("*") 3648 3649 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3650 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3651 this.set("partition", self._parse_partition()) 3652 3653 if schema: 3654 return self._parse_schema(this=this) 3655 3656 version = self._parse_version() 3657 3658 if version: 3659 this.set("version", version) 3660 3661 if self.dialect.ALIAS_POST_TABLESAMPLE: 3662 this.set("sample", self._parse_table_sample()) 3663 3664 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3665 if alias: 3666 this.set("alias", alias) 3667 3668 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3669 return self.expression( 3670 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3671 ) 3672 3673 this.set("hints", self._parse_table_hints()) 3674 3675 if not this.args.get("pivots"): 3676 this.set("pivots", self._parse_pivots()) 3677 3678 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3679 this.set("sample", self._parse_table_sample()) 3680 3681 if joins: 3682 for join in self._parse_joins(): 3683 this.append("joins", join) 3684 3685 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3686 this.set("ordinality", True) 3687 this.set("alias", self._parse_table_alias()) 3688 3689 return this 3690 3691 def _parse_version(self) -> t.Optional[exp.Version]: 3692 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3693 this = "TIMESTAMP" 3694 elif self._match(TokenType.VERSION_SNAPSHOT): 3695 this = "VERSION" 3696 else: 3697 return None 3698 3699 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3700 kind = self._prev.text.upper() 3701 start = self._parse_bitwise() 3702 self._match_texts(("TO", "AND")) 3703 end = self._parse_bitwise() 3704 expression: t.Optional[exp.Expression] = self.expression( 3705 exp.Tuple, expressions=[start, end] 3706 ) 3707 elif self._match_text_seq("CONTAINED", "IN"): 3708 kind = "CONTAINED IN" 3709 expression = self.expression( 3710 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3711 ) 3712 elif self._match(TokenType.ALL): 3713 kind = "ALL" 3714 expression = None 3715 else: 3716 self._match_text_seq("AS", "OF") 3717 kind = "AS OF" 3718 expression = self._parse_type() 3719 3720 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3721 3722 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3723 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3724 index = self._index 3725 historical_data = None 3726 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3727 this = self._prev.text.upper() 3728 kind = ( 3729 self._match(TokenType.L_PAREN) 3730 and self._match_texts(self.HISTORICAL_DATA_KIND) 3731 and self._prev.text.upper() 3732 ) 3733 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3734 3735 if expression: 3736 self._match_r_paren() 3737 historical_data = self.expression( 3738 exp.HistoricalData, this=this, kind=kind, expression=expression 3739 ) 3740 else: 3741 self._retreat(index) 3742 3743 return historical_data 3744 3745 def _parse_changes(self) -> t.Optional[exp.Changes]: 3746 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3747 return None 3748 3749 information = self._parse_var(any_token=True) 3750 self._match_r_paren() 3751 3752 return self.expression( 3753 exp.Changes, 3754 information=information, 3755 at_before=self._parse_historical_data(), 3756 end=self._parse_historical_data(), 3757 ) 3758 3759 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3760 if not self._match(TokenType.UNNEST): 3761 return None 3762 3763 expressions = self._parse_wrapped_csv(self._parse_equality) 3764 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3765 3766 alias = self._parse_table_alias() if with_alias else None 3767 3768 if alias: 3769 if self.dialect.UNNEST_COLUMN_ONLY: 3770 if alias.args.get("columns"): 3771 self.raise_error("Unexpected extra column alias in unnest.") 3772 3773 alias.set("columns", [alias.this]) 3774 alias.set("this", None) 3775 3776 columns = alias.args.get("columns") or [] 3777 if offset and len(expressions) < len(columns): 3778 offset = columns.pop() 3779 3780 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3781 self._match(TokenType.ALIAS) 3782 offset = self._parse_id_var( 3783 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3784 ) or exp.to_identifier("offset") 3785 3786 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3787 3788 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3789 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3790 if not is_derived and not ( 3791 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3792 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3793 ): 3794 return None 3795 3796 expressions = self._parse_csv(self._parse_value) 3797 alias = self._parse_table_alias() 3798 3799 if is_derived: 3800 self._match_r_paren() 3801 3802 return self.expression( 3803 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3804 ) 3805 3806 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3807 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3808 as_modifier and self._match_text_seq("USING", "SAMPLE") 3809 ): 3810 return None 3811 3812 bucket_numerator = None 3813 bucket_denominator = None 3814 bucket_field = None 3815 percent = None 3816 size = None 3817 seed = None 3818 3819 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3820 matched_l_paren = self._match(TokenType.L_PAREN) 3821 3822 if self.TABLESAMPLE_CSV: 3823 num = None 3824 expressions = self._parse_csv(self._parse_primary) 3825 else: 3826 expressions = None 3827 num = ( 3828 self._parse_factor() 3829 if self._match(TokenType.NUMBER, advance=False) 3830 else self._parse_primary() or self._parse_placeholder() 3831 ) 3832 3833 if self._match_text_seq("BUCKET"): 3834 bucket_numerator = self._parse_number() 3835 self._match_text_seq("OUT", "OF") 3836 bucket_denominator = bucket_denominator = self._parse_number() 3837 self._match(TokenType.ON) 3838 bucket_field = self._parse_field() 3839 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3840 percent = num 3841 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3842 size = num 3843 else: 3844 percent = num 3845 3846 if matched_l_paren: 3847 self._match_r_paren() 3848 3849 if self._match(TokenType.L_PAREN): 3850 method = self._parse_var(upper=True) 3851 seed = self._match(TokenType.COMMA) and self._parse_number() 3852 self._match_r_paren() 3853 elif self._match_texts(("SEED", "REPEATABLE")): 3854 seed = self._parse_wrapped(self._parse_number) 3855 3856 if not method and self.DEFAULT_SAMPLING_METHOD: 3857 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3858 3859 return self.expression( 3860 exp.TableSample, 3861 expressions=expressions, 3862 method=method, 3863 bucket_numerator=bucket_numerator, 3864 bucket_denominator=bucket_denominator, 3865 bucket_field=bucket_field, 3866 percent=percent, 3867 size=size, 3868 seed=seed, 3869 ) 3870 3871 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3872 return list(iter(self._parse_pivot, None)) or None 3873 3874 def _parse_joins(self) -> t.Iterator[exp.Join]: 3875 return iter(self._parse_join, None) 3876 3877 # https://duckdb.org/docs/sql/statements/pivot 3878 def _parse_simplified_pivot(self) -> exp.Pivot: 3879 def _parse_on() -> t.Optional[exp.Expression]: 3880 this = self._parse_bitwise() 3881 return self._parse_in(this) if self._match(TokenType.IN) else this 3882 3883 this = self._parse_table() 3884 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3885 using = self._match(TokenType.USING) and self._parse_csv( 3886 lambda: self._parse_alias(self._parse_function()) 3887 ) 3888 group = self._parse_group() 3889 return self.expression( 3890 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3891 ) 3892 3893 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3894 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3895 this = self._parse_select_or_expression() 3896 3897 self._match(TokenType.ALIAS) 3898 alias = self._parse_bitwise() 3899 if alias: 3900 if isinstance(alias, exp.Column) and not alias.db: 3901 alias = alias.this 3902 return self.expression(exp.PivotAlias, this=this, alias=alias) 3903 3904 return this 3905 3906 value = self._parse_column() 3907 3908 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3909 self.raise_error("Expecting IN (") 3910 3911 if self._match(TokenType.ANY): 3912 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3913 else: 3914 exprs = self._parse_csv(_parse_aliased_expression) 3915 3916 self._match_r_paren() 3917 return self.expression(exp.In, this=value, expressions=exprs) 3918 3919 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3920 index = self._index 3921 include_nulls = None 3922 3923 if self._match(TokenType.PIVOT): 3924 unpivot = False 3925 elif self._match(TokenType.UNPIVOT): 3926 unpivot = True 3927 3928 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3929 if self._match_text_seq("INCLUDE", "NULLS"): 3930 include_nulls = True 3931 elif self._match_text_seq("EXCLUDE", "NULLS"): 3932 include_nulls = False 3933 else: 3934 return None 3935 3936 expressions = [] 3937 3938 if not self._match(TokenType.L_PAREN): 3939 self._retreat(index) 3940 return None 3941 3942 if unpivot: 3943 expressions = self._parse_csv(self._parse_column) 3944 else: 3945 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3946 3947 if not expressions: 3948 self.raise_error("Failed to parse PIVOT's aggregation list") 3949 3950 if not self._match(TokenType.FOR): 3951 self.raise_error("Expecting FOR") 3952 3953 field = self._parse_pivot_in() 3954 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3955 self._parse_bitwise 3956 ) 3957 3958 self._match_r_paren() 3959 3960 pivot = self.expression( 3961 exp.Pivot, 3962 expressions=expressions, 3963 field=field, 3964 unpivot=unpivot, 3965 include_nulls=include_nulls, 3966 default_on_null=default_on_null, 3967 ) 3968 3969 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3970 pivot.set("alias", self._parse_table_alias()) 3971 3972 if not unpivot: 3973 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3974 3975 columns: t.List[exp.Expression] = [] 3976 for fld in pivot.args["field"].expressions: 3977 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3978 for name in names: 3979 if self.PREFIXED_PIVOT_COLUMNS: 3980 name = f"{name}_{field_name}" if name else field_name 3981 else: 3982 name = f"{field_name}_{name}" if name else field_name 3983 3984 columns.append(exp.to_identifier(name)) 3985 3986 pivot.set("columns", columns) 3987 3988 return pivot 3989 3990 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3991 return [agg.alias for agg in aggregations] 3992 3993 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3994 if not skip_where_token and not self._match(TokenType.PREWHERE): 3995 return None 3996 3997 return self.expression( 3998 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3999 ) 4000 4001 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4002 if not skip_where_token and not self._match(TokenType.WHERE): 4003 return None 4004 4005 return self.expression( 4006 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4007 ) 4008 4009 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4010 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4011 return None 4012 4013 elements: t.Dict[str, t.Any] = defaultdict(list) 4014 4015 if self._match(TokenType.ALL): 4016 elements["all"] = True 4017 elif self._match(TokenType.DISTINCT): 4018 elements["all"] = False 4019 4020 while True: 4021 index = self._index 4022 4023 elements["expressions"].extend( 4024 self._parse_csv( 4025 lambda: None 4026 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4027 else self._parse_assignment() 4028 ) 4029 ) 4030 4031 before_with_index = self._index 4032 with_prefix = self._match(TokenType.WITH) 4033 4034 if self._match(TokenType.ROLLUP): 4035 elements["rollup"].append( 4036 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4037 ) 4038 elif self._match(TokenType.CUBE): 4039 elements["cube"].append( 4040 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4041 ) 4042 elif self._match(TokenType.GROUPING_SETS): 4043 elements["grouping_sets"].append( 4044 self.expression( 4045 exp.GroupingSets, 4046 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4047 ) 4048 ) 4049 elif self._match_text_seq("TOTALS"): 4050 elements["totals"] = True # type: ignore 4051 4052 if before_with_index <= self._index <= before_with_index + 1: 4053 self._retreat(before_with_index) 4054 break 4055 4056 if index == self._index: 4057 break 4058 4059 return self.expression(exp.Group, **elements) # type: ignore 4060 4061 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4062 return self.expression( 4063 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4064 ) 4065 4066 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4067 if self._match(TokenType.L_PAREN): 4068 grouping_set = self._parse_csv(self._parse_column) 4069 self._match_r_paren() 4070 return self.expression(exp.Tuple, expressions=grouping_set) 4071 4072 return self._parse_column() 4073 4074 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4075 if not skip_having_token and not self._match(TokenType.HAVING): 4076 return None 4077 return self.expression(exp.Having, this=self._parse_assignment()) 4078 4079 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4080 if not self._match(TokenType.QUALIFY): 4081 return None 4082 return self.expression(exp.Qualify, this=self._parse_assignment()) 4083 4084 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4085 if skip_start_token: 4086 start = None 4087 elif self._match(TokenType.START_WITH): 4088 start = self._parse_assignment() 4089 else: 4090 return None 4091 4092 self._match(TokenType.CONNECT_BY) 4093 nocycle = self._match_text_seq("NOCYCLE") 4094 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4095 exp.Prior, this=self._parse_bitwise() 4096 ) 4097 connect = self._parse_assignment() 4098 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4099 4100 if not start and self._match(TokenType.START_WITH): 4101 start = self._parse_assignment() 4102 4103 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4104 4105 def _parse_name_as_expression(self) -> exp.Alias: 4106 return self.expression( 4107 exp.Alias, 4108 alias=self._parse_id_var(any_token=True), 4109 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4110 ) 4111 4112 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4113 if self._match_text_seq("INTERPOLATE"): 4114 return self._parse_wrapped_csv(self._parse_name_as_expression) 4115 return None 4116 4117 def _parse_order( 4118 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4119 ) -> t.Optional[exp.Expression]: 4120 siblings = None 4121 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4122 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4123 return this 4124 4125 siblings = True 4126 4127 return self.expression( 4128 exp.Order, 4129 this=this, 4130 expressions=self._parse_csv(self._parse_ordered), 4131 siblings=siblings, 4132 ) 4133 4134 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4135 if not self._match(token): 4136 return None 4137 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4138 4139 def _parse_ordered( 4140 self, parse_method: t.Optional[t.Callable] = None 4141 ) -> t.Optional[exp.Ordered]: 4142 this = parse_method() if parse_method else self._parse_assignment() 4143 if not this: 4144 return None 4145 4146 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4147 this = exp.var("ALL") 4148 4149 asc = self._match(TokenType.ASC) 4150 desc = self._match(TokenType.DESC) or (asc and False) 4151 4152 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4153 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4154 4155 nulls_first = is_nulls_first or False 4156 explicitly_null_ordered = is_nulls_first or is_nulls_last 4157 4158 if ( 4159 not explicitly_null_ordered 4160 and ( 4161 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4162 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4163 ) 4164 and self.dialect.NULL_ORDERING != "nulls_are_last" 4165 ): 4166 nulls_first = True 4167 4168 if self._match_text_seq("WITH", "FILL"): 4169 with_fill = self.expression( 4170 exp.WithFill, 4171 **{ # type: ignore 4172 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4173 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4174 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4175 "interpolate": self._parse_interpolate(), 4176 }, 4177 ) 4178 else: 4179 with_fill = None 4180 4181 return self.expression( 4182 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4183 ) 4184 4185 def _parse_limit( 4186 self, 4187 this: t.Optional[exp.Expression] = None, 4188 top: bool = False, 4189 skip_limit_token: bool = False, 4190 ) -> t.Optional[exp.Expression]: 4191 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4192 comments = self._prev_comments 4193 if top: 4194 limit_paren = self._match(TokenType.L_PAREN) 4195 expression = self._parse_term() if limit_paren else self._parse_number() 4196 4197 if limit_paren: 4198 self._match_r_paren() 4199 else: 4200 expression = self._parse_term() 4201 4202 if self._match(TokenType.COMMA): 4203 offset = expression 4204 expression = self._parse_term() 4205 else: 4206 offset = None 4207 4208 limit_exp = self.expression( 4209 exp.Limit, 4210 this=this, 4211 expression=expression, 4212 offset=offset, 4213 comments=comments, 4214 expressions=self._parse_limit_by(), 4215 ) 4216 4217 return limit_exp 4218 4219 if self._match(TokenType.FETCH): 4220 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4221 direction = self._prev.text.upper() if direction else "FIRST" 4222 4223 count = self._parse_field(tokens=self.FETCH_TOKENS) 4224 percent = self._match(TokenType.PERCENT) 4225 4226 self._match_set((TokenType.ROW, TokenType.ROWS)) 4227 4228 only = self._match_text_seq("ONLY") 4229 with_ties = self._match_text_seq("WITH", "TIES") 4230 4231 if only and with_ties: 4232 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4233 4234 return self.expression( 4235 exp.Fetch, 4236 direction=direction, 4237 count=count, 4238 percent=percent, 4239 with_ties=with_ties, 4240 ) 4241 4242 return this 4243 4244 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4245 if not self._match(TokenType.OFFSET): 4246 return this 4247 4248 count = self._parse_term() 4249 self._match_set((TokenType.ROW, TokenType.ROWS)) 4250 4251 return self.expression( 4252 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4253 ) 4254 4255 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4256 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4257 4258 def _parse_locks(self) -> t.List[exp.Lock]: 4259 locks = [] 4260 while True: 4261 if self._match_text_seq("FOR", "UPDATE"): 4262 update = True 4263 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4264 "LOCK", "IN", "SHARE", "MODE" 4265 ): 4266 update = False 4267 else: 4268 break 4269 4270 expressions = None 4271 if self._match_text_seq("OF"): 4272 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4273 4274 wait: t.Optional[bool | exp.Expression] = None 4275 if self._match_text_seq("NOWAIT"): 4276 wait = True 4277 elif self._match_text_seq("WAIT"): 4278 wait = self._parse_primary() 4279 elif self._match_text_seq("SKIP", "LOCKED"): 4280 wait = False 4281 4282 locks.append( 4283 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4284 ) 4285 4286 return locks 4287 4288 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4289 while this and self._match_set(self.SET_OPERATIONS): 4290 token_type = self._prev.token_type 4291 4292 if token_type == TokenType.UNION: 4293 operation: t.Type[exp.SetOperation] = exp.Union 4294 elif token_type == TokenType.EXCEPT: 4295 operation = exp.Except 4296 else: 4297 operation = exp.Intersect 4298 4299 comments = self._prev.comments 4300 4301 if self._match(TokenType.DISTINCT): 4302 distinct: t.Optional[bool] = True 4303 elif self._match(TokenType.ALL): 4304 distinct = False 4305 else: 4306 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4307 if distinct is None: 4308 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4309 4310 by_name = self._match_text_seq("BY", "NAME") 4311 expression = self._parse_select(nested=True, parse_set_operation=False) 4312 4313 this = self.expression( 4314 operation, 4315 comments=comments, 4316 this=this, 4317 distinct=distinct, 4318 by_name=by_name, 4319 expression=expression, 4320 ) 4321 4322 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4323 expression = this.expression 4324 4325 if expression: 4326 for arg in self.SET_OP_MODIFIERS: 4327 expr = expression.args.get(arg) 4328 if expr: 4329 this.set(arg, expr.pop()) 4330 4331 return this 4332 4333 def _parse_expression(self) -> t.Optional[exp.Expression]: 4334 return self._parse_alias(self._parse_assignment()) 4335 4336 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4337 this = self._parse_disjunction() 4338 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4339 # This allows us to parse <non-identifier token> := <expr> 4340 this = exp.column( 4341 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4342 ) 4343 4344 while self._match_set(self.ASSIGNMENT): 4345 this = self.expression( 4346 self.ASSIGNMENT[self._prev.token_type], 4347 this=this, 4348 comments=self._prev_comments, 4349 expression=self._parse_assignment(), 4350 ) 4351 4352 return this 4353 4354 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4355 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4356 4357 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4358 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4359 4360 def _parse_equality(self) -> t.Optional[exp.Expression]: 4361 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4362 4363 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4364 return self._parse_tokens(self._parse_range, self.COMPARISON) 4365 4366 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4367 this = this or self._parse_bitwise() 4368 negate = self._match(TokenType.NOT) 4369 4370 if self._match_set(self.RANGE_PARSERS): 4371 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4372 if not expression: 4373 return this 4374 4375 this = expression 4376 elif self._match(TokenType.ISNULL): 4377 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4378 4379 # Postgres supports ISNULL and NOTNULL for conditions. 4380 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4381 if self._match(TokenType.NOTNULL): 4382 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4383 this = self.expression(exp.Not, this=this) 4384 4385 if negate: 4386 this = self._negate_range(this) 4387 4388 if self._match(TokenType.IS): 4389 this = self._parse_is(this) 4390 4391 return this 4392 4393 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4394 if not this: 4395 return this 4396 4397 return self.expression(exp.Not, this=this) 4398 4399 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4400 index = self._index - 1 4401 negate = self._match(TokenType.NOT) 4402 4403 if self._match_text_seq("DISTINCT", "FROM"): 4404 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4405 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4406 4407 if self._match(TokenType.JSON): 4408 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4409 4410 if self._match_text_seq("WITH"): 4411 _with = True 4412 elif self._match_text_seq("WITHOUT"): 4413 _with = False 4414 else: 4415 _with = None 4416 4417 unique = self._match(TokenType.UNIQUE) 4418 self._match_text_seq("KEYS") 4419 expression: t.Optional[exp.Expression] = self.expression( 4420 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4421 ) 4422 else: 4423 expression = self._parse_primary() or self._parse_null() 4424 if not expression: 4425 self._retreat(index) 4426 return None 4427 4428 this = self.expression(exp.Is, this=this, expression=expression) 4429 return self.expression(exp.Not, this=this) if negate else this 4430 4431 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4432 unnest = self._parse_unnest(with_alias=False) 4433 if unnest: 4434 this = self.expression(exp.In, this=this, unnest=unnest) 4435 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4436 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4437 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4438 4439 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4440 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4441 else: 4442 this = self.expression(exp.In, this=this, expressions=expressions) 4443 4444 if matched_l_paren: 4445 self._match_r_paren(this) 4446 elif not self._match(TokenType.R_BRACKET, expression=this): 4447 self.raise_error("Expecting ]") 4448 else: 4449 this = self.expression(exp.In, this=this, field=self._parse_field()) 4450 4451 return this 4452 4453 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4454 low = self._parse_bitwise() 4455 self._match(TokenType.AND) 4456 high = self._parse_bitwise() 4457 return self.expression(exp.Between, this=this, low=low, high=high) 4458 4459 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4460 if not self._match(TokenType.ESCAPE): 4461 return this 4462 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4463 4464 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4465 index = self._index 4466 4467 if not self._match(TokenType.INTERVAL) and match_interval: 4468 return None 4469 4470 if self._match(TokenType.STRING, advance=False): 4471 this = self._parse_primary() 4472 else: 4473 this = self._parse_term() 4474 4475 if not this or ( 4476 isinstance(this, exp.Column) 4477 and not this.table 4478 and not this.this.quoted 4479 and this.name.upper() == "IS" 4480 ): 4481 self._retreat(index) 4482 return None 4483 4484 unit = self._parse_function() or ( 4485 not self._match(TokenType.ALIAS, advance=False) 4486 and self._parse_var(any_token=True, upper=True) 4487 ) 4488 4489 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4490 # each INTERVAL expression into this canonical form so it's easy to transpile 4491 if this and this.is_number: 4492 this = exp.Literal.string(this.to_py()) 4493 elif this and this.is_string: 4494 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4495 if len(parts) == 1: 4496 if unit: 4497 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4498 self._retreat(self._index - 1) 4499 4500 this = exp.Literal.string(parts[0][0]) 4501 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4502 4503 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4504 unit = self.expression( 4505 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4506 ) 4507 4508 interval = self.expression(exp.Interval, this=this, unit=unit) 4509 4510 index = self._index 4511 self._match(TokenType.PLUS) 4512 4513 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4514 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4515 return self.expression( 4516 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4517 ) 4518 4519 self._retreat(index) 4520 return interval 4521 4522 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4523 this = self._parse_term() 4524 4525 while True: 4526 if self._match_set(self.BITWISE): 4527 this = self.expression( 4528 self.BITWISE[self._prev.token_type], 4529 this=this, 4530 expression=self._parse_term(), 4531 ) 4532 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4533 this = self.expression( 4534 exp.DPipe, 4535 this=this, 4536 expression=self._parse_term(), 4537 safe=not self.dialect.STRICT_STRING_CONCAT, 4538 ) 4539 elif self._match(TokenType.DQMARK): 4540 this = self.expression( 4541 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4542 ) 4543 elif self._match_pair(TokenType.LT, TokenType.LT): 4544 this = self.expression( 4545 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4546 ) 4547 elif self._match_pair(TokenType.GT, TokenType.GT): 4548 this = self.expression( 4549 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4550 ) 4551 else: 4552 break 4553 4554 return this 4555 4556 def _parse_term(self) -> t.Optional[exp.Expression]: 4557 this = self._parse_factor() 4558 4559 while self._match_set(self.TERM): 4560 klass = self.TERM[self._prev.token_type] 4561 comments = self._prev_comments 4562 expression = self._parse_factor() 4563 4564 this = self.expression(klass, this=this, comments=comments, expression=expression) 4565 4566 if isinstance(this, exp.Collate): 4567 expr = this.expression 4568 4569 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4570 # fallback to Identifier / Var 4571 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4572 ident = expr.this 4573 if isinstance(ident, exp.Identifier): 4574 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4575 4576 return this 4577 4578 def _parse_factor(self) -> t.Optional[exp.Expression]: 4579 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4580 this = parse_method() 4581 4582 while self._match_set(self.FACTOR): 4583 klass = self.FACTOR[self._prev.token_type] 4584 comments = self._prev_comments 4585 expression = parse_method() 4586 4587 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4588 self._retreat(self._index - 1) 4589 return this 4590 4591 this = self.expression(klass, this=this, comments=comments, expression=expression) 4592 4593 if isinstance(this, exp.Div): 4594 this.args["typed"] = self.dialect.TYPED_DIVISION 4595 this.args["safe"] = self.dialect.SAFE_DIVISION 4596 4597 return this 4598 4599 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4600 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4601 4602 def _parse_unary(self) -> t.Optional[exp.Expression]: 4603 if self._match_set(self.UNARY_PARSERS): 4604 return self.UNARY_PARSERS[self._prev.token_type](self) 4605 return self._parse_at_time_zone(self._parse_type()) 4606 4607 def _parse_type( 4608 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4609 ) -> t.Optional[exp.Expression]: 4610 interval = parse_interval and self._parse_interval() 4611 if interval: 4612 return interval 4613 4614 index = self._index 4615 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4616 4617 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4618 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4619 if isinstance(data_type, exp.Cast): 4620 # This constructor can contain ops directly after it, for instance struct unnesting: 4621 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4622 return self._parse_column_ops(data_type) 4623 4624 if data_type: 4625 index2 = self._index 4626 this = self._parse_primary() 4627 4628 if isinstance(this, exp.Literal): 4629 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4630 if parser: 4631 return parser(self, this, data_type) 4632 4633 return self.expression(exp.Cast, this=this, to=data_type) 4634 4635 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4636 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4637 # 4638 # If the index difference here is greater than 1, that means the parser itself must have 4639 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4640 # 4641 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4642 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4643 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4644 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4645 # 4646 # In these cases, we don't really want to return the converted type, but instead retreat 4647 # and try to parse a Column or Identifier in the section below. 4648 if data_type.expressions and index2 - index > 1: 4649 self._retreat(index2) 4650 return self._parse_column_ops(data_type) 4651 4652 self._retreat(index) 4653 4654 if fallback_to_identifier: 4655 return self._parse_id_var() 4656 4657 this = self._parse_column() 4658 return this and self._parse_column_ops(this) 4659 4660 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4661 this = self._parse_type() 4662 if not this: 4663 return None 4664 4665 if isinstance(this, exp.Column) and not this.table: 4666 this = exp.var(this.name.upper()) 4667 4668 return self.expression( 4669 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4670 ) 4671 4672 def _parse_types( 4673 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4674 ) -> t.Optional[exp.Expression]: 4675 index = self._index 4676 4677 this: t.Optional[exp.Expression] = None 4678 prefix = self._match_text_seq("SYSUDTLIB", ".") 4679 4680 if not self._match_set(self.TYPE_TOKENS): 4681 identifier = allow_identifiers and self._parse_id_var( 4682 any_token=False, tokens=(TokenType.VAR,) 4683 ) 4684 if isinstance(identifier, exp.Identifier): 4685 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4686 4687 if len(tokens) != 1: 4688 self.raise_error("Unexpected identifier", self._prev) 4689 4690 if tokens[0].token_type in self.TYPE_TOKENS: 4691 self._prev = tokens[0] 4692 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4693 type_name = identifier.name 4694 4695 while self._match(TokenType.DOT): 4696 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4697 4698 this = exp.DataType.build(type_name, udt=True) 4699 else: 4700 self._retreat(self._index - 1) 4701 return None 4702 else: 4703 return None 4704 4705 type_token = self._prev.token_type 4706 4707 if type_token == TokenType.PSEUDO_TYPE: 4708 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4709 4710 if type_token == TokenType.OBJECT_IDENTIFIER: 4711 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4712 4713 # https://materialize.com/docs/sql/types/map/ 4714 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4715 key_type = self._parse_types( 4716 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4717 ) 4718 if not self._match(TokenType.FARROW): 4719 self._retreat(index) 4720 return None 4721 4722 value_type = self._parse_types( 4723 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4724 ) 4725 if not self._match(TokenType.R_BRACKET): 4726 self._retreat(index) 4727 return None 4728 4729 return exp.DataType( 4730 this=exp.DataType.Type.MAP, 4731 expressions=[key_type, value_type], 4732 nested=True, 4733 prefix=prefix, 4734 ) 4735 4736 nested = type_token in self.NESTED_TYPE_TOKENS 4737 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4738 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4739 expressions = None 4740 maybe_func = False 4741 4742 if self._match(TokenType.L_PAREN): 4743 if is_struct: 4744 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4745 elif nested: 4746 expressions = self._parse_csv( 4747 lambda: self._parse_types( 4748 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4749 ) 4750 ) 4751 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4752 this = expressions[0] 4753 this.set("nullable", True) 4754 self._match_r_paren() 4755 return this 4756 elif type_token in self.ENUM_TYPE_TOKENS: 4757 expressions = self._parse_csv(self._parse_equality) 4758 elif is_aggregate: 4759 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4760 any_token=False, tokens=(TokenType.VAR,) 4761 ) 4762 if not func_or_ident or not self._match(TokenType.COMMA): 4763 return None 4764 expressions = self._parse_csv( 4765 lambda: self._parse_types( 4766 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4767 ) 4768 ) 4769 expressions.insert(0, func_or_ident) 4770 else: 4771 expressions = self._parse_csv(self._parse_type_size) 4772 4773 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4774 if type_token == TokenType.VECTOR and len(expressions) == 2: 4775 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4776 4777 if not expressions or not self._match(TokenType.R_PAREN): 4778 self._retreat(index) 4779 return None 4780 4781 maybe_func = True 4782 4783 values: t.Optional[t.List[exp.Expression]] = None 4784 4785 if nested and self._match(TokenType.LT): 4786 if is_struct: 4787 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4788 else: 4789 expressions = self._parse_csv( 4790 lambda: self._parse_types( 4791 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4792 ) 4793 ) 4794 4795 if not self._match(TokenType.GT): 4796 self.raise_error("Expecting >") 4797 4798 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4799 values = self._parse_csv(self._parse_assignment) 4800 if not values and is_struct: 4801 values = None 4802 self._retreat(self._index - 1) 4803 else: 4804 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4805 4806 if type_token in self.TIMESTAMPS: 4807 if self._match_text_seq("WITH", "TIME", "ZONE"): 4808 maybe_func = False 4809 tz_type = ( 4810 exp.DataType.Type.TIMETZ 4811 if type_token in self.TIMES 4812 else exp.DataType.Type.TIMESTAMPTZ 4813 ) 4814 this = exp.DataType(this=tz_type, expressions=expressions) 4815 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4816 maybe_func = False 4817 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4818 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4819 maybe_func = False 4820 elif type_token == TokenType.INTERVAL: 4821 unit = self._parse_var(upper=True) 4822 if unit: 4823 if self._match_text_seq("TO"): 4824 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4825 4826 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4827 else: 4828 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4829 4830 if maybe_func and check_func: 4831 index2 = self._index 4832 peek = self._parse_string() 4833 4834 if not peek: 4835 self._retreat(index) 4836 return None 4837 4838 self._retreat(index2) 4839 4840 if not this: 4841 if self._match_text_seq("UNSIGNED"): 4842 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4843 if not unsigned_type_token: 4844 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4845 4846 type_token = unsigned_type_token or type_token 4847 4848 this = exp.DataType( 4849 this=exp.DataType.Type[type_token.value], 4850 expressions=expressions, 4851 nested=nested, 4852 prefix=prefix, 4853 ) 4854 4855 # Empty arrays/structs are allowed 4856 if values is not None: 4857 cls = exp.Struct if is_struct else exp.Array 4858 this = exp.cast(cls(expressions=values), this, copy=False) 4859 4860 elif expressions: 4861 this.set("expressions", expressions) 4862 4863 # https://materialize.com/docs/sql/types/list/#type-name 4864 while self._match(TokenType.LIST): 4865 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4866 4867 index = self._index 4868 4869 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4870 matched_array = self._match(TokenType.ARRAY) 4871 4872 while self._curr: 4873 datatype_token = self._prev.token_type 4874 matched_l_bracket = self._match(TokenType.L_BRACKET) 4875 if not matched_l_bracket and not matched_array: 4876 break 4877 4878 matched_array = False 4879 values = self._parse_csv(self._parse_assignment) or None 4880 if ( 4881 values 4882 and not schema 4883 and ( 4884 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4885 ) 4886 ): 4887 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4888 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4889 self._retreat(index) 4890 break 4891 4892 this = exp.DataType( 4893 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4894 ) 4895 self._match(TokenType.R_BRACKET) 4896 4897 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4898 converter = self.TYPE_CONVERTERS.get(this.this) 4899 if converter: 4900 this = converter(t.cast(exp.DataType, this)) 4901 4902 return this 4903 4904 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4905 index = self._index 4906 4907 if ( 4908 self._curr 4909 and self._next 4910 and self._curr.token_type in self.TYPE_TOKENS 4911 and self._next.token_type in self.TYPE_TOKENS 4912 ): 4913 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4914 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4915 this = self._parse_id_var() 4916 else: 4917 this = ( 4918 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4919 or self._parse_id_var() 4920 ) 4921 4922 self._match(TokenType.COLON) 4923 4924 if ( 4925 type_required 4926 and not isinstance(this, exp.DataType) 4927 and not self._match_set(self.TYPE_TOKENS, advance=False) 4928 ): 4929 self._retreat(index) 4930 return self._parse_types() 4931 4932 return self._parse_column_def(this) 4933 4934 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4935 if not self._match_text_seq("AT", "TIME", "ZONE"): 4936 return this 4937 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4938 4939 def _parse_column(self) -> t.Optional[exp.Expression]: 4940 this = self._parse_column_reference() 4941 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4942 4943 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4944 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4945 4946 return column 4947 4948 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4949 this = self._parse_field() 4950 if ( 4951 not this 4952 and self._match(TokenType.VALUES, advance=False) 4953 and self.VALUES_FOLLOWED_BY_PAREN 4954 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4955 ): 4956 this = self._parse_id_var() 4957 4958 if isinstance(this, exp.Identifier): 4959 # We bubble up comments from the Identifier to the Column 4960 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4961 4962 return this 4963 4964 def _parse_colon_as_variant_extract( 4965 self, this: t.Optional[exp.Expression] 4966 ) -> t.Optional[exp.Expression]: 4967 casts = [] 4968 json_path = [] 4969 escape = None 4970 4971 while self._match(TokenType.COLON): 4972 start_index = self._index 4973 4974 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4975 path = self._parse_column_ops( 4976 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4977 ) 4978 4979 # The cast :: operator has a lower precedence than the extraction operator :, so 4980 # we rearrange the AST appropriately to avoid casting the JSON path 4981 while isinstance(path, exp.Cast): 4982 casts.append(path.to) 4983 path = path.this 4984 4985 if casts: 4986 dcolon_offset = next( 4987 i 4988 for i, t in enumerate(self._tokens[start_index:]) 4989 if t.token_type == TokenType.DCOLON 4990 ) 4991 end_token = self._tokens[start_index + dcolon_offset - 1] 4992 else: 4993 end_token = self._prev 4994 4995 if path: 4996 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 4997 # it'll roundtrip to a string literal in GET_PATH 4998 if isinstance(path, exp.Identifier) and path.quoted: 4999 escape = True 5000 5001 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5002 5003 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5004 # Databricks transforms it back to the colon/dot notation 5005 if json_path: 5006 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5007 5008 if json_path_expr: 5009 json_path_expr.set("escape", escape) 5010 5011 this = self.expression( 5012 exp.JSONExtract, 5013 this=this, 5014 expression=json_path_expr, 5015 variant_extract=True, 5016 ) 5017 5018 while casts: 5019 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5020 5021 return this 5022 5023 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5024 return self._parse_types() 5025 5026 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5027 this = self._parse_bracket(this) 5028 5029 while self._match_set(self.COLUMN_OPERATORS): 5030 op_token = self._prev.token_type 5031 op = self.COLUMN_OPERATORS.get(op_token) 5032 5033 if op_token == TokenType.DCOLON: 5034 field = self._parse_dcolon() 5035 if not field: 5036 self.raise_error("Expected type") 5037 elif op and self._curr: 5038 field = self._parse_column_reference() 5039 else: 5040 field = self._parse_field(any_token=True, anonymous_func=True) 5041 5042 if isinstance(field, exp.Func) and this: 5043 # bigquery allows function calls like x.y.count(...) 5044 # SAFE.SUBSTR(...) 5045 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5046 this = exp.replace_tree( 5047 this, 5048 lambda n: ( 5049 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5050 if n.table 5051 else n.this 5052 ) 5053 if isinstance(n, exp.Column) 5054 else n, 5055 ) 5056 5057 if op: 5058 this = op(self, this, field) 5059 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5060 this = self.expression( 5061 exp.Column, 5062 this=field, 5063 table=this.this, 5064 db=this.args.get("table"), 5065 catalog=this.args.get("db"), 5066 ) 5067 else: 5068 this = self.expression(exp.Dot, this=this, expression=field) 5069 5070 this = self._parse_bracket(this) 5071 5072 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5073 5074 def _parse_primary(self) -> t.Optional[exp.Expression]: 5075 if self._match_set(self.PRIMARY_PARSERS): 5076 token_type = self._prev.token_type 5077 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5078 5079 if token_type == TokenType.STRING: 5080 expressions = [primary] 5081 while self._match(TokenType.STRING): 5082 expressions.append(exp.Literal.string(self._prev.text)) 5083 5084 if len(expressions) > 1: 5085 return self.expression(exp.Concat, expressions=expressions) 5086 5087 return primary 5088 5089 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5090 return exp.Literal.number(f"0.{self._prev.text}") 5091 5092 if self._match(TokenType.L_PAREN): 5093 comments = self._prev_comments 5094 query = self._parse_select() 5095 5096 if query: 5097 expressions = [query] 5098 else: 5099 expressions = self._parse_expressions() 5100 5101 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5102 5103 if not this and self._match(TokenType.R_PAREN, advance=False): 5104 this = self.expression(exp.Tuple) 5105 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5106 this = self._parse_subquery(this=this, parse_alias=False) 5107 elif isinstance(this, exp.Subquery): 5108 this = self._parse_subquery( 5109 this=self._parse_set_operations(this), parse_alias=False 5110 ) 5111 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5112 this = self.expression(exp.Tuple, expressions=expressions) 5113 else: 5114 this = self.expression(exp.Paren, this=this) 5115 5116 if this: 5117 this.add_comments(comments) 5118 5119 self._match_r_paren(expression=this) 5120 return this 5121 5122 return None 5123 5124 def _parse_field( 5125 self, 5126 any_token: bool = False, 5127 tokens: t.Optional[t.Collection[TokenType]] = None, 5128 anonymous_func: bool = False, 5129 ) -> t.Optional[exp.Expression]: 5130 if anonymous_func: 5131 field = ( 5132 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5133 or self._parse_primary() 5134 ) 5135 else: 5136 field = self._parse_primary() or self._parse_function( 5137 anonymous=anonymous_func, any_token=any_token 5138 ) 5139 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5140 5141 def _parse_function( 5142 self, 5143 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5144 anonymous: bool = False, 5145 optional_parens: bool = True, 5146 any_token: bool = False, 5147 ) -> t.Optional[exp.Expression]: 5148 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5149 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5150 fn_syntax = False 5151 if ( 5152 self._match(TokenType.L_BRACE, advance=False) 5153 and self._next 5154 and self._next.text.upper() == "FN" 5155 ): 5156 self._advance(2) 5157 fn_syntax = True 5158 5159 func = self._parse_function_call( 5160 functions=functions, 5161 anonymous=anonymous, 5162 optional_parens=optional_parens, 5163 any_token=any_token, 5164 ) 5165 5166 if fn_syntax: 5167 self._match(TokenType.R_BRACE) 5168 5169 return func 5170 5171 def _parse_function_call( 5172 self, 5173 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5174 anonymous: bool = False, 5175 optional_parens: bool = True, 5176 any_token: bool = False, 5177 ) -> t.Optional[exp.Expression]: 5178 if not self._curr: 5179 return None 5180 5181 comments = self._curr.comments 5182 token_type = self._curr.token_type 5183 this = self._curr.text 5184 upper = this.upper() 5185 5186 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5187 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5188 self._advance() 5189 return self._parse_window(parser(self)) 5190 5191 if not self._next or self._next.token_type != TokenType.L_PAREN: 5192 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5193 self._advance() 5194 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5195 5196 return None 5197 5198 if any_token: 5199 if token_type in self.RESERVED_TOKENS: 5200 return None 5201 elif token_type not in self.FUNC_TOKENS: 5202 return None 5203 5204 self._advance(2) 5205 5206 parser = self.FUNCTION_PARSERS.get(upper) 5207 if parser and not anonymous: 5208 this = parser(self) 5209 else: 5210 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5211 5212 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5213 this = self.expression(subquery_predicate, this=self._parse_select()) 5214 self._match_r_paren() 5215 return this 5216 5217 if functions is None: 5218 functions = self.FUNCTIONS 5219 5220 function = functions.get(upper) 5221 5222 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5223 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5224 5225 if alias: 5226 args = self._kv_to_prop_eq(args) 5227 5228 if function and not anonymous: 5229 if "dialect" in function.__code__.co_varnames: 5230 func = function(args, dialect=self.dialect) 5231 else: 5232 func = function(args) 5233 5234 func = self.validate_expression(func, args) 5235 if not self.dialect.NORMALIZE_FUNCTIONS: 5236 func.meta["name"] = this 5237 5238 this = func 5239 else: 5240 if token_type == TokenType.IDENTIFIER: 5241 this = exp.Identifier(this=this, quoted=True) 5242 this = self.expression(exp.Anonymous, this=this, expressions=args) 5243 5244 if isinstance(this, exp.Expression): 5245 this.add_comments(comments) 5246 5247 self._match_r_paren(this) 5248 return self._parse_window(this) 5249 5250 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5251 return expression 5252 5253 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5254 transformed = [] 5255 5256 for index, e in enumerate(expressions): 5257 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5258 if isinstance(e, exp.Alias): 5259 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5260 5261 if not isinstance(e, exp.PropertyEQ): 5262 e = self.expression( 5263 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5264 ) 5265 5266 if isinstance(e.this, exp.Column): 5267 e.this.replace(e.this.this) 5268 else: 5269 e = self._to_prop_eq(e, index) 5270 5271 transformed.append(e) 5272 5273 return transformed 5274 5275 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5276 return self._parse_column_def(self._parse_id_var()) 5277 5278 def _parse_user_defined_function( 5279 self, kind: t.Optional[TokenType] = None 5280 ) -> t.Optional[exp.Expression]: 5281 this = self._parse_id_var() 5282 5283 while self._match(TokenType.DOT): 5284 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5285 5286 if not self._match(TokenType.L_PAREN): 5287 return this 5288 5289 expressions = self._parse_csv(self._parse_function_parameter) 5290 self._match_r_paren() 5291 return self.expression( 5292 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5293 ) 5294 5295 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5296 literal = self._parse_primary() 5297 if literal: 5298 return self.expression(exp.Introducer, this=token.text, expression=literal) 5299 5300 return self.expression(exp.Identifier, this=token.text) 5301 5302 def _parse_session_parameter(self) -> exp.SessionParameter: 5303 kind = None 5304 this = self._parse_id_var() or self._parse_primary() 5305 5306 if this and self._match(TokenType.DOT): 5307 kind = this.name 5308 this = self._parse_var() or self._parse_primary() 5309 5310 return self.expression(exp.SessionParameter, this=this, kind=kind) 5311 5312 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5313 return self._parse_id_var() 5314 5315 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5316 index = self._index 5317 5318 if self._match(TokenType.L_PAREN): 5319 expressions = t.cast( 5320 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5321 ) 5322 5323 if not self._match(TokenType.R_PAREN): 5324 self._retreat(index) 5325 else: 5326 expressions = [self._parse_lambda_arg()] 5327 5328 if self._match_set(self.LAMBDAS): 5329 return self.LAMBDAS[self._prev.token_type](self, expressions) 5330 5331 self._retreat(index) 5332 5333 this: t.Optional[exp.Expression] 5334 5335 if self._match(TokenType.DISTINCT): 5336 this = self.expression( 5337 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5338 ) 5339 else: 5340 this = self._parse_select_or_expression(alias=alias) 5341 5342 return self._parse_limit( 5343 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5344 ) 5345 5346 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5347 index = self._index 5348 if not self._match(TokenType.L_PAREN): 5349 return this 5350 5351 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5352 # expr can be of both types 5353 if self._match_set(self.SELECT_START_TOKENS): 5354 self._retreat(index) 5355 return this 5356 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5357 self._match_r_paren() 5358 return self.expression(exp.Schema, this=this, expressions=args) 5359 5360 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5361 return self._parse_column_def(self._parse_field(any_token=True)) 5362 5363 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5364 # column defs are not really columns, they're identifiers 5365 if isinstance(this, exp.Column): 5366 this = this.this 5367 5368 kind = self._parse_types(schema=True) 5369 5370 if self._match_text_seq("FOR", "ORDINALITY"): 5371 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5372 5373 constraints: t.List[exp.Expression] = [] 5374 5375 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5376 ("ALIAS", "MATERIALIZED") 5377 ): 5378 persisted = self._prev.text.upper() == "MATERIALIZED" 5379 constraint_kind = exp.ComputedColumnConstraint( 5380 this=self._parse_assignment(), 5381 persisted=persisted or self._match_text_seq("PERSISTED"), 5382 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5383 ) 5384 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5385 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5386 self._match(TokenType.ALIAS) 5387 constraints.append( 5388 self.expression( 5389 exp.ColumnConstraint, 5390 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5391 ) 5392 ) 5393 5394 while True: 5395 constraint = self._parse_column_constraint() 5396 if not constraint: 5397 break 5398 constraints.append(constraint) 5399 5400 if not kind and not constraints: 5401 return this 5402 5403 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5404 5405 def _parse_auto_increment( 5406 self, 5407 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5408 start = None 5409 increment = None 5410 5411 if self._match(TokenType.L_PAREN, advance=False): 5412 args = self._parse_wrapped_csv(self._parse_bitwise) 5413 start = seq_get(args, 0) 5414 increment = seq_get(args, 1) 5415 elif self._match_text_seq("START"): 5416 start = self._parse_bitwise() 5417 self._match_text_seq("INCREMENT") 5418 increment = self._parse_bitwise() 5419 5420 if start and increment: 5421 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5422 5423 return exp.AutoIncrementColumnConstraint() 5424 5425 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5426 if not self._match_text_seq("REFRESH"): 5427 self._retreat(self._index - 1) 5428 return None 5429 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5430 5431 def _parse_compress(self) -> exp.CompressColumnConstraint: 5432 if self._match(TokenType.L_PAREN, advance=False): 5433 return self.expression( 5434 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5435 ) 5436 5437 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5438 5439 def _parse_generated_as_identity( 5440 self, 5441 ) -> ( 5442 exp.GeneratedAsIdentityColumnConstraint 5443 | exp.ComputedColumnConstraint 5444 | exp.GeneratedAsRowColumnConstraint 5445 ): 5446 if self._match_text_seq("BY", "DEFAULT"): 5447 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5448 this = self.expression( 5449 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5450 ) 5451 else: 5452 self._match_text_seq("ALWAYS") 5453 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5454 5455 self._match(TokenType.ALIAS) 5456 5457 if self._match_text_seq("ROW"): 5458 start = self._match_text_seq("START") 5459 if not start: 5460 self._match(TokenType.END) 5461 hidden = self._match_text_seq("HIDDEN") 5462 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5463 5464 identity = self._match_text_seq("IDENTITY") 5465 5466 if self._match(TokenType.L_PAREN): 5467 if self._match(TokenType.START_WITH): 5468 this.set("start", self._parse_bitwise()) 5469 if self._match_text_seq("INCREMENT", "BY"): 5470 this.set("increment", self._parse_bitwise()) 5471 if self._match_text_seq("MINVALUE"): 5472 this.set("minvalue", self._parse_bitwise()) 5473 if self._match_text_seq("MAXVALUE"): 5474 this.set("maxvalue", self._parse_bitwise()) 5475 5476 if self._match_text_seq("CYCLE"): 5477 this.set("cycle", True) 5478 elif self._match_text_seq("NO", "CYCLE"): 5479 this.set("cycle", False) 5480 5481 if not identity: 5482 this.set("expression", self._parse_range()) 5483 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5484 args = self._parse_csv(self._parse_bitwise) 5485 this.set("start", seq_get(args, 0)) 5486 this.set("increment", seq_get(args, 1)) 5487 5488 self._match_r_paren() 5489 5490 return this 5491 5492 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5493 self._match_text_seq("LENGTH") 5494 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5495 5496 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5497 if self._match_text_seq("NULL"): 5498 return self.expression(exp.NotNullColumnConstraint) 5499 if self._match_text_seq("CASESPECIFIC"): 5500 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5501 if self._match_text_seq("FOR", "REPLICATION"): 5502 return self.expression(exp.NotForReplicationColumnConstraint) 5503 5504 # Unconsume the `NOT` token 5505 self._retreat(self._index - 1) 5506 return None 5507 5508 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5509 if self._match(TokenType.CONSTRAINT): 5510 this = self._parse_id_var() 5511 else: 5512 this = None 5513 5514 if self._match_texts(self.CONSTRAINT_PARSERS): 5515 return self.expression( 5516 exp.ColumnConstraint, 5517 this=this, 5518 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5519 ) 5520 5521 return this 5522 5523 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5524 if not self._match(TokenType.CONSTRAINT): 5525 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5526 5527 return self.expression( 5528 exp.Constraint, 5529 this=self._parse_id_var(), 5530 expressions=self._parse_unnamed_constraints(), 5531 ) 5532 5533 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5534 constraints = [] 5535 while True: 5536 constraint = self._parse_unnamed_constraint() or self._parse_function() 5537 if not constraint: 5538 break 5539 constraints.append(constraint) 5540 5541 return constraints 5542 5543 def _parse_unnamed_constraint( 5544 self, constraints: t.Optional[t.Collection[str]] = None 5545 ) -> t.Optional[exp.Expression]: 5546 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5547 constraints or self.CONSTRAINT_PARSERS 5548 ): 5549 return None 5550 5551 constraint = self._prev.text.upper() 5552 if constraint not in self.CONSTRAINT_PARSERS: 5553 self.raise_error(f"No parser found for schema constraint {constraint}.") 5554 5555 return self.CONSTRAINT_PARSERS[constraint](self) 5556 5557 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5558 return self._parse_id_var(any_token=False) 5559 5560 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5561 self._match_text_seq("KEY") 5562 return self.expression( 5563 exp.UniqueColumnConstraint, 5564 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5565 this=self._parse_schema(self._parse_unique_key()), 5566 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5567 on_conflict=self._parse_on_conflict(), 5568 ) 5569 5570 def _parse_key_constraint_options(self) -> t.List[str]: 5571 options = [] 5572 while True: 5573 if not self._curr: 5574 break 5575 5576 if self._match(TokenType.ON): 5577 action = None 5578 on = self._advance_any() and self._prev.text 5579 5580 if self._match_text_seq("NO", "ACTION"): 5581 action = "NO ACTION" 5582 elif self._match_text_seq("CASCADE"): 5583 action = "CASCADE" 5584 elif self._match_text_seq("RESTRICT"): 5585 action = "RESTRICT" 5586 elif self._match_pair(TokenType.SET, TokenType.NULL): 5587 action = "SET NULL" 5588 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5589 action = "SET DEFAULT" 5590 else: 5591 self.raise_error("Invalid key constraint") 5592 5593 options.append(f"ON {on} {action}") 5594 else: 5595 var = self._parse_var_from_options( 5596 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5597 ) 5598 if not var: 5599 break 5600 options.append(var.name) 5601 5602 return options 5603 5604 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5605 if match and not self._match(TokenType.REFERENCES): 5606 return None 5607 5608 expressions = None 5609 this = self._parse_table(schema=True) 5610 options = self._parse_key_constraint_options() 5611 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5612 5613 def _parse_foreign_key(self) -> exp.ForeignKey: 5614 expressions = self._parse_wrapped_id_vars() 5615 reference = self._parse_references() 5616 options = {} 5617 5618 while self._match(TokenType.ON): 5619 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5620 self.raise_error("Expected DELETE or UPDATE") 5621 5622 kind = self._prev.text.lower() 5623 5624 if self._match_text_seq("NO", "ACTION"): 5625 action = "NO ACTION" 5626 elif self._match(TokenType.SET): 5627 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5628 action = "SET " + self._prev.text.upper() 5629 else: 5630 self._advance() 5631 action = self._prev.text.upper() 5632 5633 options[kind] = action 5634 5635 return self.expression( 5636 exp.ForeignKey, 5637 expressions=expressions, 5638 reference=reference, 5639 **options, # type: ignore 5640 ) 5641 5642 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5643 return self._parse_field() 5644 5645 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5646 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5647 self._retreat(self._index - 1) 5648 return None 5649 5650 id_vars = self._parse_wrapped_id_vars() 5651 return self.expression( 5652 exp.PeriodForSystemTimeConstraint, 5653 this=seq_get(id_vars, 0), 5654 expression=seq_get(id_vars, 1), 5655 ) 5656 5657 def _parse_primary_key( 5658 self, wrapped_optional: bool = False, in_props: bool = False 5659 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5660 desc = ( 5661 self._match_set((TokenType.ASC, TokenType.DESC)) 5662 and self._prev.token_type == TokenType.DESC 5663 ) 5664 5665 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5666 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5667 5668 expressions = self._parse_wrapped_csv( 5669 self._parse_primary_key_part, optional=wrapped_optional 5670 ) 5671 options = self._parse_key_constraint_options() 5672 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5673 5674 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5675 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5676 5677 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5678 """ 5679 Parses a datetime column in ODBC format. We parse the column into the corresponding 5680 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5681 same as we did for `DATE('yyyy-mm-dd')`. 5682 5683 Reference: 5684 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5685 """ 5686 self._match(TokenType.VAR) 5687 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5688 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5689 if not self._match(TokenType.R_BRACE): 5690 self.raise_error("Expected }") 5691 return expression 5692 5693 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5694 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5695 return this 5696 5697 bracket_kind = self._prev.token_type 5698 if ( 5699 bracket_kind == TokenType.L_BRACE 5700 and self._curr 5701 and self._curr.token_type == TokenType.VAR 5702 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5703 ): 5704 return self._parse_odbc_datetime_literal() 5705 5706 expressions = self._parse_csv( 5707 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5708 ) 5709 5710 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5711 self.raise_error("Expected ]") 5712 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5713 self.raise_error("Expected }") 5714 5715 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5716 if bracket_kind == TokenType.L_BRACE: 5717 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5718 elif not this: 5719 this = build_array_constructor( 5720 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5721 ) 5722 else: 5723 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5724 if constructor_type: 5725 return build_array_constructor( 5726 constructor_type, 5727 args=expressions, 5728 bracket_kind=bracket_kind, 5729 dialect=self.dialect, 5730 ) 5731 5732 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5733 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5734 5735 self._add_comments(this) 5736 return self._parse_bracket(this) 5737 5738 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5739 if self._match(TokenType.COLON): 5740 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5741 return this 5742 5743 def _parse_case(self) -> t.Optional[exp.Expression]: 5744 ifs = [] 5745 default = None 5746 5747 comments = self._prev_comments 5748 expression = self._parse_assignment() 5749 5750 while self._match(TokenType.WHEN): 5751 this = self._parse_assignment() 5752 self._match(TokenType.THEN) 5753 then = self._parse_assignment() 5754 ifs.append(self.expression(exp.If, this=this, true=then)) 5755 5756 if self._match(TokenType.ELSE): 5757 default = self._parse_assignment() 5758 5759 if not self._match(TokenType.END): 5760 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5761 default = exp.column("interval") 5762 else: 5763 self.raise_error("Expected END after CASE", self._prev) 5764 5765 return self.expression( 5766 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5767 ) 5768 5769 def _parse_if(self) -> t.Optional[exp.Expression]: 5770 if self._match(TokenType.L_PAREN): 5771 args = self._parse_csv(self._parse_assignment) 5772 this = self.validate_expression(exp.If.from_arg_list(args), args) 5773 self._match_r_paren() 5774 else: 5775 index = self._index - 1 5776 5777 if self.NO_PAREN_IF_COMMANDS and index == 0: 5778 return self._parse_as_command(self._prev) 5779 5780 condition = self._parse_assignment() 5781 5782 if not condition: 5783 self._retreat(index) 5784 return None 5785 5786 self._match(TokenType.THEN) 5787 true = self._parse_assignment() 5788 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5789 self._match(TokenType.END) 5790 this = self.expression(exp.If, this=condition, true=true, false=false) 5791 5792 return this 5793 5794 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5795 if not self._match_text_seq("VALUE", "FOR"): 5796 self._retreat(self._index - 1) 5797 return None 5798 5799 return self.expression( 5800 exp.NextValueFor, 5801 this=self._parse_column(), 5802 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5803 ) 5804 5805 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5806 this = self._parse_function() or self._parse_var_or_string(upper=True) 5807 5808 if self._match(TokenType.FROM): 5809 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5810 5811 if not self._match(TokenType.COMMA): 5812 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5813 5814 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5815 5816 def _parse_gap_fill(self) -> exp.GapFill: 5817 self._match(TokenType.TABLE) 5818 this = self._parse_table() 5819 5820 self._match(TokenType.COMMA) 5821 args = [this, *self._parse_csv(self._parse_lambda)] 5822 5823 gap_fill = exp.GapFill.from_arg_list(args) 5824 return self.validate_expression(gap_fill, args) 5825 5826 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5827 this = self._parse_assignment() 5828 5829 if not self._match(TokenType.ALIAS): 5830 if self._match(TokenType.COMMA): 5831 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5832 5833 self.raise_error("Expected AS after CAST") 5834 5835 fmt = None 5836 to = self._parse_types() 5837 5838 if self._match(TokenType.FORMAT): 5839 fmt_string = self._parse_string() 5840 fmt = self._parse_at_time_zone(fmt_string) 5841 5842 if not to: 5843 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5844 if to.this in exp.DataType.TEMPORAL_TYPES: 5845 this = self.expression( 5846 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5847 this=this, 5848 format=exp.Literal.string( 5849 format_time( 5850 fmt_string.this if fmt_string else "", 5851 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5852 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5853 ) 5854 ), 5855 safe=safe, 5856 ) 5857 5858 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5859 this.set("zone", fmt.args["zone"]) 5860 return this 5861 elif not to: 5862 self.raise_error("Expected TYPE after CAST") 5863 elif isinstance(to, exp.Identifier): 5864 to = exp.DataType.build(to.name, udt=True) 5865 elif to.this == exp.DataType.Type.CHAR: 5866 if self._match(TokenType.CHARACTER_SET): 5867 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5868 5869 return self.expression( 5870 exp.Cast if strict else exp.TryCast, 5871 this=this, 5872 to=to, 5873 format=fmt, 5874 safe=safe, 5875 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5876 ) 5877 5878 def _parse_string_agg(self) -> exp.Expression: 5879 if self._match(TokenType.DISTINCT): 5880 args: t.List[t.Optional[exp.Expression]] = [ 5881 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5882 ] 5883 if self._match(TokenType.COMMA): 5884 args.extend(self._parse_csv(self._parse_assignment)) 5885 else: 5886 args = self._parse_csv(self._parse_assignment) # type: ignore 5887 5888 index = self._index 5889 if not self._match(TokenType.R_PAREN) and args: 5890 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5891 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5892 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5893 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5894 5895 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5896 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5897 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5898 if not self._match_text_seq("WITHIN", "GROUP"): 5899 self._retreat(index) 5900 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5901 5902 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5903 order = self._parse_order(this=seq_get(args, 0)) 5904 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5905 5906 def _parse_convert( 5907 self, strict: bool, safe: t.Optional[bool] = None 5908 ) -> t.Optional[exp.Expression]: 5909 this = self._parse_bitwise() 5910 5911 if self._match(TokenType.USING): 5912 to: t.Optional[exp.Expression] = self.expression( 5913 exp.CharacterSet, this=self._parse_var() 5914 ) 5915 elif self._match(TokenType.COMMA): 5916 to = self._parse_types() 5917 else: 5918 to = None 5919 5920 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5921 5922 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5923 """ 5924 There are generally two variants of the DECODE function: 5925 5926 - DECODE(bin, charset) 5927 - DECODE(expression, search, result [, search, result] ... [, default]) 5928 5929 The second variant will always be parsed into a CASE expression. Note that NULL 5930 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5931 instead of relying on pattern matching. 5932 """ 5933 args = self._parse_csv(self._parse_assignment) 5934 5935 if len(args) < 3: 5936 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5937 5938 expression, *expressions = args 5939 if not expression: 5940 return None 5941 5942 ifs = [] 5943 for search, result in zip(expressions[::2], expressions[1::2]): 5944 if not search or not result: 5945 return None 5946 5947 if isinstance(search, exp.Literal): 5948 ifs.append( 5949 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5950 ) 5951 elif isinstance(search, exp.Null): 5952 ifs.append( 5953 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5954 ) 5955 else: 5956 cond = exp.or_( 5957 exp.EQ(this=expression.copy(), expression=search), 5958 exp.and_( 5959 exp.Is(this=expression.copy(), expression=exp.Null()), 5960 exp.Is(this=search.copy(), expression=exp.Null()), 5961 copy=False, 5962 ), 5963 copy=False, 5964 ) 5965 ifs.append(exp.If(this=cond, true=result)) 5966 5967 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5968 5969 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5970 self._match_text_seq("KEY") 5971 key = self._parse_column() 5972 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5973 self._match_text_seq("VALUE") 5974 value = self._parse_bitwise() 5975 5976 if not key and not value: 5977 return None 5978 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5979 5980 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5981 if not this or not self._match_text_seq("FORMAT", "JSON"): 5982 return this 5983 5984 return self.expression(exp.FormatJson, this=this) 5985 5986 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 5987 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 5988 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 5989 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 5990 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 5991 else: 5992 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 5993 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 5994 5995 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 5996 5997 if not empty and not error and not null: 5998 return None 5999 6000 return self.expression( 6001 exp.OnCondition, 6002 empty=empty, 6003 error=error, 6004 null=null, 6005 ) 6006 6007 def _parse_on_handling( 6008 self, on: str, *values: str 6009 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6010 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6011 for value in values: 6012 if self._match_text_seq(value, "ON", on): 6013 return f"{value} ON {on}" 6014 6015 index = self._index 6016 if self._match(TokenType.DEFAULT): 6017 default_value = self._parse_bitwise() 6018 if self._match_text_seq("ON", on): 6019 return default_value 6020 6021 self._retreat(index) 6022 6023 return None 6024 6025 @t.overload 6026 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6027 6028 @t.overload 6029 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6030 6031 def _parse_json_object(self, agg=False): 6032 star = self._parse_star() 6033 expressions = ( 6034 [star] 6035 if star 6036 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6037 ) 6038 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6039 6040 unique_keys = None 6041 if self._match_text_seq("WITH", "UNIQUE"): 6042 unique_keys = True 6043 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6044 unique_keys = False 6045 6046 self._match_text_seq("KEYS") 6047 6048 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6049 self._parse_type() 6050 ) 6051 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6052 6053 return self.expression( 6054 exp.JSONObjectAgg if agg else exp.JSONObject, 6055 expressions=expressions, 6056 null_handling=null_handling, 6057 unique_keys=unique_keys, 6058 return_type=return_type, 6059 encoding=encoding, 6060 ) 6061 6062 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6063 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6064 if not self._match_text_seq("NESTED"): 6065 this = self._parse_id_var() 6066 kind = self._parse_types(allow_identifiers=False) 6067 nested = None 6068 else: 6069 this = None 6070 kind = None 6071 nested = True 6072 6073 path = self._match_text_seq("PATH") and self._parse_string() 6074 nested_schema = nested and self._parse_json_schema() 6075 6076 return self.expression( 6077 exp.JSONColumnDef, 6078 this=this, 6079 kind=kind, 6080 path=path, 6081 nested_schema=nested_schema, 6082 ) 6083 6084 def _parse_json_schema(self) -> exp.JSONSchema: 6085 self._match_text_seq("COLUMNS") 6086 return self.expression( 6087 exp.JSONSchema, 6088 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6089 ) 6090 6091 def _parse_json_table(self) -> exp.JSONTable: 6092 this = self._parse_format_json(self._parse_bitwise()) 6093 path = self._match(TokenType.COMMA) and self._parse_string() 6094 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6095 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6096 schema = self._parse_json_schema() 6097 6098 return exp.JSONTable( 6099 this=this, 6100 schema=schema, 6101 path=path, 6102 error_handling=error_handling, 6103 empty_handling=empty_handling, 6104 ) 6105 6106 def _parse_match_against(self) -> exp.MatchAgainst: 6107 expressions = self._parse_csv(self._parse_column) 6108 6109 self._match_text_seq(")", "AGAINST", "(") 6110 6111 this = self._parse_string() 6112 6113 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6114 modifier = "IN NATURAL LANGUAGE MODE" 6115 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6116 modifier = f"{modifier} WITH QUERY EXPANSION" 6117 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6118 modifier = "IN BOOLEAN MODE" 6119 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6120 modifier = "WITH QUERY EXPANSION" 6121 else: 6122 modifier = None 6123 6124 return self.expression( 6125 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6126 ) 6127 6128 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6129 def _parse_open_json(self) -> exp.OpenJSON: 6130 this = self._parse_bitwise() 6131 path = self._match(TokenType.COMMA) and self._parse_string() 6132 6133 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6134 this = self._parse_field(any_token=True) 6135 kind = self._parse_types() 6136 path = self._parse_string() 6137 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6138 6139 return self.expression( 6140 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6141 ) 6142 6143 expressions = None 6144 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6145 self._match_l_paren() 6146 expressions = self._parse_csv(_parse_open_json_column_def) 6147 6148 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6149 6150 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6151 args = self._parse_csv(self._parse_bitwise) 6152 6153 if self._match(TokenType.IN): 6154 return self.expression( 6155 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6156 ) 6157 6158 if haystack_first: 6159 haystack = seq_get(args, 0) 6160 needle = seq_get(args, 1) 6161 else: 6162 needle = seq_get(args, 0) 6163 haystack = seq_get(args, 1) 6164 6165 return self.expression( 6166 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6167 ) 6168 6169 def _parse_predict(self) -> exp.Predict: 6170 self._match_text_seq("MODEL") 6171 this = self._parse_table() 6172 6173 self._match(TokenType.COMMA) 6174 self._match_text_seq("TABLE") 6175 6176 return self.expression( 6177 exp.Predict, 6178 this=this, 6179 expression=self._parse_table(), 6180 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6181 ) 6182 6183 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6184 args = self._parse_csv(self._parse_table) 6185 return exp.JoinHint(this=func_name.upper(), expressions=args) 6186 6187 def _parse_substring(self) -> exp.Substring: 6188 # Postgres supports the form: substring(string [from int] [for int]) 6189 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6190 6191 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6192 6193 if self._match(TokenType.FROM): 6194 args.append(self._parse_bitwise()) 6195 if self._match(TokenType.FOR): 6196 if len(args) == 1: 6197 args.append(exp.Literal.number(1)) 6198 args.append(self._parse_bitwise()) 6199 6200 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6201 6202 def _parse_trim(self) -> exp.Trim: 6203 # https://www.w3resource.com/sql/character-functions/trim.php 6204 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6205 6206 position = None 6207 collation = None 6208 expression = None 6209 6210 if self._match_texts(self.TRIM_TYPES): 6211 position = self._prev.text.upper() 6212 6213 this = self._parse_bitwise() 6214 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6215 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6216 expression = self._parse_bitwise() 6217 6218 if invert_order: 6219 this, expression = expression, this 6220 6221 if self._match(TokenType.COLLATE): 6222 collation = self._parse_bitwise() 6223 6224 return self.expression( 6225 exp.Trim, this=this, position=position, expression=expression, collation=collation 6226 ) 6227 6228 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6229 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6230 6231 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6232 return self._parse_window(self._parse_id_var(), alias=True) 6233 6234 def _parse_respect_or_ignore_nulls( 6235 self, this: t.Optional[exp.Expression] 6236 ) -> t.Optional[exp.Expression]: 6237 if self._match_text_seq("IGNORE", "NULLS"): 6238 return self.expression(exp.IgnoreNulls, this=this) 6239 if self._match_text_seq("RESPECT", "NULLS"): 6240 return self.expression(exp.RespectNulls, this=this) 6241 return this 6242 6243 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6244 if self._match(TokenType.HAVING): 6245 self._match_texts(("MAX", "MIN")) 6246 max = self._prev.text.upper() != "MIN" 6247 return self.expression( 6248 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6249 ) 6250 6251 return this 6252 6253 def _parse_window( 6254 self, this: t.Optional[exp.Expression], alias: bool = False 6255 ) -> t.Optional[exp.Expression]: 6256 func = this 6257 comments = func.comments if isinstance(func, exp.Expression) else None 6258 6259 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6260 self._match(TokenType.WHERE) 6261 this = self.expression( 6262 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6263 ) 6264 self._match_r_paren() 6265 6266 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6267 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6268 if self._match_text_seq("WITHIN", "GROUP"): 6269 order = self._parse_wrapped(self._parse_order) 6270 this = self.expression(exp.WithinGroup, this=this, expression=order) 6271 6272 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6273 # Some dialects choose to implement and some do not. 6274 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6275 6276 # There is some code above in _parse_lambda that handles 6277 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6278 6279 # The below changes handle 6280 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6281 6282 # Oracle allows both formats 6283 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6284 # and Snowflake chose to do the same for familiarity 6285 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6286 if isinstance(this, exp.AggFunc): 6287 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6288 6289 if ignore_respect and ignore_respect is not this: 6290 ignore_respect.replace(ignore_respect.this) 6291 this = self.expression(ignore_respect.__class__, this=this) 6292 6293 this = self._parse_respect_or_ignore_nulls(this) 6294 6295 # bigquery select from window x AS (partition by ...) 6296 if alias: 6297 over = None 6298 self._match(TokenType.ALIAS) 6299 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6300 return this 6301 else: 6302 over = self._prev.text.upper() 6303 6304 if comments and isinstance(func, exp.Expression): 6305 func.pop_comments() 6306 6307 if not self._match(TokenType.L_PAREN): 6308 return self.expression( 6309 exp.Window, 6310 comments=comments, 6311 this=this, 6312 alias=self._parse_id_var(False), 6313 over=over, 6314 ) 6315 6316 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6317 6318 first = self._match(TokenType.FIRST) 6319 if self._match_text_seq("LAST"): 6320 first = False 6321 6322 partition, order = self._parse_partition_and_order() 6323 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6324 6325 if kind: 6326 self._match(TokenType.BETWEEN) 6327 start = self._parse_window_spec() 6328 self._match(TokenType.AND) 6329 end = self._parse_window_spec() 6330 6331 spec = self.expression( 6332 exp.WindowSpec, 6333 kind=kind, 6334 start=start["value"], 6335 start_side=start["side"], 6336 end=end["value"], 6337 end_side=end["side"], 6338 ) 6339 else: 6340 spec = None 6341 6342 self._match_r_paren() 6343 6344 window = self.expression( 6345 exp.Window, 6346 comments=comments, 6347 this=this, 6348 partition_by=partition, 6349 order=order, 6350 spec=spec, 6351 alias=window_alias, 6352 over=over, 6353 first=first, 6354 ) 6355 6356 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6357 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6358 return self._parse_window(window, alias=alias) 6359 6360 return window 6361 6362 def _parse_partition_and_order( 6363 self, 6364 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6365 return self._parse_partition_by(), self._parse_order() 6366 6367 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6368 self._match(TokenType.BETWEEN) 6369 6370 return { 6371 "value": ( 6372 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6373 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6374 or self._parse_bitwise() 6375 ), 6376 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6377 } 6378 6379 def _parse_alias( 6380 self, this: t.Optional[exp.Expression], explicit: bool = False 6381 ) -> t.Optional[exp.Expression]: 6382 any_token = self._match(TokenType.ALIAS) 6383 comments = self._prev_comments or [] 6384 6385 if explicit and not any_token: 6386 return this 6387 6388 if self._match(TokenType.L_PAREN): 6389 aliases = self.expression( 6390 exp.Aliases, 6391 comments=comments, 6392 this=this, 6393 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6394 ) 6395 self._match_r_paren(aliases) 6396 return aliases 6397 6398 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6399 self.STRING_ALIASES and self._parse_string_as_identifier() 6400 ) 6401 6402 if alias: 6403 comments.extend(alias.pop_comments()) 6404 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6405 column = this.this 6406 6407 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6408 if not this.comments and column and column.comments: 6409 this.comments = column.pop_comments() 6410 6411 return this 6412 6413 def _parse_id_var( 6414 self, 6415 any_token: bool = True, 6416 tokens: t.Optional[t.Collection[TokenType]] = None, 6417 ) -> t.Optional[exp.Expression]: 6418 expression = self._parse_identifier() 6419 if not expression and ( 6420 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6421 ): 6422 quoted = self._prev.token_type == TokenType.STRING 6423 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6424 6425 return expression 6426 6427 def _parse_string(self) -> t.Optional[exp.Expression]: 6428 if self._match_set(self.STRING_PARSERS): 6429 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6430 return self._parse_placeholder() 6431 6432 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6433 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6434 6435 def _parse_number(self) -> t.Optional[exp.Expression]: 6436 if self._match_set(self.NUMERIC_PARSERS): 6437 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6438 return self._parse_placeholder() 6439 6440 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6441 if self._match(TokenType.IDENTIFIER): 6442 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6443 return self._parse_placeholder() 6444 6445 def _parse_var( 6446 self, 6447 any_token: bool = False, 6448 tokens: t.Optional[t.Collection[TokenType]] = None, 6449 upper: bool = False, 6450 ) -> t.Optional[exp.Expression]: 6451 if ( 6452 (any_token and self._advance_any()) 6453 or self._match(TokenType.VAR) 6454 or (self._match_set(tokens) if tokens else False) 6455 ): 6456 return self.expression( 6457 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6458 ) 6459 return self._parse_placeholder() 6460 6461 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6462 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6463 self._advance() 6464 return self._prev 6465 return None 6466 6467 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6468 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6469 6470 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6471 return self._parse_primary() or self._parse_var(any_token=True) 6472 6473 def _parse_null(self) -> t.Optional[exp.Expression]: 6474 if self._match_set(self.NULL_TOKENS): 6475 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6476 return self._parse_placeholder() 6477 6478 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6479 if self._match(TokenType.TRUE): 6480 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6481 if self._match(TokenType.FALSE): 6482 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6483 return self._parse_placeholder() 6484 6485 def _parse_star(self) -> t.Optional[exp.Expression]: 6486 if self._match(TokenType.STAR): 6487 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6488 return self._parse_placeholder() 6489 6490 def _parse_parameter(self) -> exp.Parameter: 6491 this = self._parse_identifier() or self._parse_primary_or_var() 6492 return self.expression(exp.Parameter, this=this) 6493 6494 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6495 if self._match_set(self.PLACEHOLDER_PARSERS): 6496 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6497 if placeholder: 6498 return placeholder 6499 self._advance(-1) 6500 return None 6501 6502 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6503 if not self._match_texts(keywords): 6504 return None 6505 if self._match(TokenType.L_PAREN, advance=False): 6506 return self._parse_wrapped_csv(self._parse_expression) 6507 6508 expression = self._parse_expression() 6509 return [expression] if expression else None 6510 6511 def _parse_csv( 6512 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6513 ) -> t.List[exp.Expression]: 6514 parse_result = parse_method() 6515 items = [parse_result] if parse_result is not None else [] 6516 6517 while self._match(sep): 6518 self._add_comments(parse_result) 6519 parse_result = parse_method() 6520 if parse_result is not None: 6521 items.append(parse_result) 6522 6523 return items 6524 6525 def _parse_tokens( 6526 self, parse_method: t.Callable, expressions: t.Dict 6527 ) -> t.Optional[exp.Expression]: 6528 this = parse_method() 6529 6530 while self._match_set(expressions): 6531 this = self.expression( 6532 expressions[self._prev.token_type], 6533 this=this, 6534 comments=self._prev_comments, 6535 expression=parse_method(), 6536 ) 6537 6538 return this 6539 6540 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6541 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6542 6543 def _parse_wrapped_csv( 6544 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6545 ) -> t.List[exp.Expression]: 6546 return self._parse_wrapped( 6547 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6548 ) 6549 6550 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6551 wrapped = self._match(TokenType.L_PAREN) 6552 if not wrapped and not optional: 6553 self.raise_error("Expecting (") 6554 parse_result = parse_method() 6555 if wrapped: 6556 self._match_r_paren() 6557 return parse_result 6558 6559 def _parse_expressions(self) -> t.List[exp.Expression]: 6560 return self._parse_csv(self._parse_expression) 6561 6562 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6563 return self._parse_select() or self._parse_set_operations( 6564 self._parse_expression() if alias else self._parse_assignment() 6565 ) 6566 6567 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6568 return self._parse_query_modifiers( 6569 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6570 ) 6571 6572 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6573 this = None 6574 if self._match_texts(self.TRANSACTION_KIND): 6575 this = self._prev.text 6576 6577 self._match_texts(("TRANSACTION", "WORK")) 6578 6579 modes = [] 6580 while True: 6581 mode = [] 6582 while self._match(TokenType.VAR): 6583 mode.append(self._prev.text) 6584 6585 if mode: 6586 modes.append(" ".join(mode)) 6587 if not self._match(TokenType.COMMA): 6588 break 6589 6590 return self.expression(exp.Transaction, this=this, modes=modes) 6591 6592 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6593 chain = None 6594 savepoint = None 6595 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6596 6597 self._match_texts(("TRANSACTION", "WORK")) 6598 6599 if self._match_text_seq("TO"): 6600 self._match_text_seq("SAVEPOINT") 6601 savepoint = self._parse_id_var() 6602 6603 if self._match(TokenType.AND): 6604 chain = not self._match_text_seq("NO") 6605 self._match_text_seq("CHAIN") 6606 6607 if is_rollback: 6608 return self.expression(exp.Rollback, savepoint=savepoint) 6609 6610 return self.expression(exp.Commit, chain=chain) 6611 6612 def _parse_refresh(self) -> exp.Refresh: 6613 self._match(TokenType.TABLE) 6614 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6615 6616 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6617 if not self._match_text_seq("ADD"): 6618 return None 6619 6620 self._match(TokenType.COLUMN) 6621 exists_column = self._parse_exists(not_=True) 6622 expression = self._parse_field_def() 6623 6624 if expression: 6625 expression.set("exists", exists_column) 6626 6627 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6628 if self._match_texts(("FIRST", "AFTER")): 6629 position = self._prev.text 6630 column_position = self.expression( 6631 exp.ColumnPosition, this=self._parse_column(), position=position 6632 ) 6633 expression.set("position", column_position) 6634 6635 return expression 6636 6637 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6638 drop = self._match(TokenType.DROP) and self._parse_drop() 6639 if drop and not isinstance(drop, exp.Command): 6640 drop.set("kind", drop.args.get("kind", "COLUMN")) 6641 return drop 6642 6643 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6644 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6645 return self.expression( 6646 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6647 ) 6648 6649 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6650 index = self._index - 1 6651 6652 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6653 return self._parse_csv( 6654 lambda: self.expression( 6655 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6656 ) 6657 ) 6658 6659 self._retreat(index) 6660 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6661 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6662 6663 if self._match_text_seq("ADD", "COLUMNS"): 6664 schema = self._parse_schema() 6665 if schema: 6666 return [schema] 6667 return [] 6668 6669 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6670 6671 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6672 if self._match_texts(self.ALTER_ALTER_PARSERS): 6673 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6674 6675 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6676 # keyword after ALTER we default to parsing this statement 6677 self._match(TokenType.COLUMN) 6678 column = self._parse_field(any_token=True) 6679 6680 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6681 return self.expression(exp.AlterColumn, this=column, drop=True) 6682 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6683 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6684 if self._match(TokenType.COMMENT): 6685 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6686 if self._match_text_seq("DROP", "NOT", "NULL"): 6687 return self.expression( 6688 exp.AlterColumn, 6689 this=column, 6690 drop=True, 6691 allow_null=True, 6692 ) 6693 if self._match_text_seq("SET", "NOT", "NULL"): 6694 return self.expression( 6695 exp.AlterColumn, 6696 this=column, 6697 allow_null=False, 6698 ) 6699 self._match_text_seq("SET", "DATA") 6700 self._match_text_seq("TYPE") 6701 return self.expression( 6702 exp.AlterColumn, 6703 this=column, 6704 dtype=self._parse_types(), 6705 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6706 using=self._match(TokenType.USING) and self._parse_assignment(), 6707 ) 6708 6709 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6710 if self._match_texts(("ALL", "EVEN", "AUTO")): 6711 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6712 6713 self._match_text_seq("KEY", "DISTKEY") 6714 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6715 6716 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6717 if compound: 6718 self._match_text_seq("SORTKEY") 6719 6720 if self._match(TokenType.L_PAREN, advance=False): 6721 return self.expression( 6722 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6723 ) 6724 6725 self._match_texts(("AUTO", "NONE")) 6726 return self.expression( 6727 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6728 ) 6729 6730 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6731 index = self._index - 1 6732 6733 partition_exists = self._parse_exists() 6734 if self._match(TokenType.PARTITION, advance=False): 6735 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6736 6737 self._retreat(index) 6738 return self._parse_csv(self._parse_drop_column) 6739 6740 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6741 if self._match(TokenType.COLUMN): 6742 exists = self._parse_exists() 6743 old_column = self._parse_column() 6744 to = self._match_text_seq("TO") 6745 new_column = self._parse_column() 6746 6747 if old_column is None or to is None or new_column is None: 6748 return None 6749 6750 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6751 6752 self._match_text_seq("TO") 6753 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6754 6755 def _parse_alter_table_set(self) -> exp.AlterSet: 6756 alter_set = self.expression(exp.AlterSet) 6757 6758 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6759 "TABLE", "PROPERTIES" 6760 ): 6761 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6762 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6763 alter_set.set("expressions", [self._parse_assignment()]) 6764 elif self._match_texts(("LOGGED", "UNLOGGED")): 6765 alter_set.set("option", exp.var(self._prev.text.upper())) 6766 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6767 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6768 elif self._match_text_seq("LOCATION"): 6769 alter_set.set("location", self._parse_field()) 6770 elif self._match_text_seq("ACCESS", "METHOD"): 6771 alter_set.set("access_method", self._parse_field()) 6772 elif self._match_text_seq("TABLESPACE"): 6773 alter_set.set("tablespace", self._parse_field()) 6774 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6775 alter_set.set("file_format", [self._parse_field()]) 6776 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6777 alter_set.set("file_format", self._parse_wrapped_options()) 6778 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6779 alter_set.set("copy_options", self._parse_wrapped_options()) 6780 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6781 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6782 else: 6783 if self._match_text_seq("SERDE"): 6784 alter_set.set("serde", self._parse_field()) 6785 6786 alter_set.set("expressions", [self._parse_properties()]) 6787 6788 return alter_set 6789 6790 def _parse_alter(self) -> exp.Alter | exp.Command: 6791 start = self._prev 6792 6793 alter_token = self._match_set(self.ALTERABLES) and self._prev 6794 if not alter_token: 6795 return self._parse_as_command(start) 6796 6797 exists = self._parse_exists() 6798 only = self._match_text_seq("ONLY") 6799 this = self._parse_table(schema=True) 6800 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6801 6802 if self._next: 6803 self._advance() 6804 6805 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6806 if parser: 6807 actions = ensure_list(parser(self)) 6808 not_valid = self._match_text_seq("NOT", "VALID") 6809 options = self._parse_csv(self._parse_property) 6810 6811 if not self._curr and actions: 6812 return self.expression( 6813 exp.Alter, 6814 this=this, 6815 kind=alter_token.text.upper(), 6816 exists=exists, 6817 actions=actions, 6818 only=only, 6819 options=options, 6820 cluster=cluster, 6821 not_valid=not_valid, 6822 ) 6823 6824 return self._parse_as_command(start) 6825 6826 def _parse_merge(self) -> exp.Merge: 6827 self._match(TokenType.INTO) 6828 target = self._parse_table() 6829 6830 if target and self._match(TokenType.ALIAS, advance=False): 6831 target.set("alias", self._parse_table_alias()) 6832 6833 self._match(TokenType.USING) 6834 using = self._parse_table() 6835 6836 self._match(TokenType.ON) 6837 on = self._parse_assignment() 6838 6839 return self.expression( 6840 exp.Merge, 6841 this=target, 6842 using=using, 6843 on=on, 6844 expressions=self._parse_when_matched(), 6845 returning=self._match(TokenType.RETURNING) and self._parse_csv(self._parse_bitwise), 6846 ) 6847 6848 def _parse_when_matched(self) -> t.List[exp.When]: 6849 whens = [] 6850 6851 while self._match(TokenType.WHEN): 6852 matched = not self._match(TokenType.NOT) 6853 self._match_text_seq("MATCHED") 6854 source = ( 6855 False 6856 if self._match_text_seq("BY", "TARGET") 6857 else self._match_text_seq("BY", "SOURCE") 6858 ) 6859 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6860 6861 self._match(TokenType.THEN) 6862 6863 if self._match(TokenType.INSERT): 6864 this = self._parse_star() 6865 if this: 6866 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6867 else: 6868 then = self.expression( 6869 exp.Insert, 6870 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6871 expression=self._match_text_seq("VALUES") and self._parse_value(), 6872 ) 6873 elif self._match(TokenType.UPDATE): 6874 expressions = self._parse_star() 6875 if expressions: 6876 then = self.expression(exp.Update, expressions=expressions) 6877 else: 6878 then = self.expression( 6879 exp.Update, 6880 expressions=self._match(TokenType.SET) 6881 and self._parse_csv(self._parse_equality), 6882 ) 6883 elif self._match(TokenType.DELETE): 6884 then = self.expression(exp.Var, this=self._prev.text) 6885 else: 6886 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6887 6888 whens.append( 6889 self.expression( 6890 exp.When, 6891 matched=matched, 6892 source=source, 6893 condition=condition, 6894 then=then, 6895 ) 6896 ) 6897 return whens 6898 6899 def _parse_show(self) -> t.Optional[exp.Expression]: 6900 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6901 if parser: 6902 return parser(self) 6903 return self._parse_as_command(self._prev) 6904 6905 def _parse_set_item_assignment( 6906 self, kind: t.Optional[str] = None 6907 ) -> t.Optional[exp.Expression]: 6908 index = self._index 6909 6910 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6911 return self._parse_set_transaction(global_=kind == "GLOBAL") 6912 6913 left = self._parse_primary() or self._parse_column() 6914 assignment_delimiter = self._match_texts(("=", "TO")) 6915 6916 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6917 self._retreat(index) 6918 return None 6919 6920 right = self._parse_statement() or self._parse_id_var() 6921 if isinstance(right, (exp.Column, exp.Identifier)): 6922 right = exp.var(right.name) 6923 6924 this = self.expression(exp.EQ, this=left, expression=right) 6925 return self.expression(exp.SetItem, this=this, kind=kind) 6926 6927 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6928 self._match_text_seq("TRANSACTION") 6929 characteristics = self._parse_csv( 6930 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6931 ) 6932 return self.expression( 6933 exp.SetItem, 6934 expressions=characteristics, 6935 kind="TRANSACTION", 6936 **{"global": global_}, # type: ignore 6937 ) 6938 6939 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6940 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6941 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6942 6943 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6944 index = self._index 6945 set_ = self.expression( 6946 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6947 ) 6948 6949 if self._curr: 6950 self._retreat(index) 6951 return self._parse_as_command(self._prev) 6952 6953 return set_ 6954 6955 def _parse_var_from_options( 6956 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6957 ) -> t.Optional[exp.Var]: 6958 start = self._curr 6959 if not start: 6960 return None 6961 6962 option = start.text.upper() 6963 continuations = options.get(option) 6964 6965 index = self._index 6966 self._advance() 6967 for keywords in continuations or []: 6968 if isinstance(keywords, str): 6969 keywords = (keywords,) 6970 6971 if self._match_text_seq(*keywords): 6972 option = f"{option} {' '.join(keywords)}" 6973 break 6974 else: 6975 if continuations or continuations is None: 6976 if raise_unmatched: 6977 self.raise_error(f"Unknown option {option}") 6978 6979 self._retreat(index) 6980 return None 6981 6982 return exp.var(option) 6983 6984 def _parse_as_command(self, start: Token) -> exp.Command: 6985 while self._curr: 6986 self._advance() 6987 text = self._find_sql(start, self._prev) 6988 size = len(start.text) 6989 self._warn_unsupported() 6990 return exp.Command(this=text[:size], expression=text[size:]) 6991 6992 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6993 settings = [] 6994 6995 self._match_l_paren() 6996 kind = self._parse_id_var() 6997 6998 if self._match(TokenType.L_PAREN): 6999 while True: 7000 key = self._parse_id_var() 7001 value = self._parse_primary() 7002 7003 if not key and value is None: 7004 break 7005 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7006 self._match(TokenType.R_PAREN) 7007 7008 self._match_r_paren() 7009 7010 return self.expression( 7011 exp.DictProperty, 7012 this=this, 7013 kind=kind.this if kind else None, 7014 settings=settings, 7015 ) 7016 7017 def _parse_dict_range(self, this: str) -> exp.DictRange: 7018 self._match_l_paren() 7019 has_min = self._match_text_seq("MIN") 7020 if has_min: 7021 min = self._parse_var() or self._parse_primary() 7022 self._match_text_seq("MAX") 7023 max = self._parse_var() or self._parse_primary() 7024 else: 7025 max = self._parse_var() or self._parse_primary() 7026 min = exp.Literal.number(0) 7027 self._match_r_paren() 7028 return self.expression(exp.DictRange, this=this, min=min, max=max) 7029 7030 def _parse_comprehension( 7031 self, this: t.Optional[exp.Expression] 7032 ) -> t.Optional[exp.Comprehension]: 7033 index = self._index 7034 expression = self._parse_column() 7035 if not self._match(TokenType.IN): 7036 self._retreat(index - 1) 7037 return None 7038 iterator = self._parse_column() 7039 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7040 return self.expression( 7041 exp.Comprehension, 7042 this=this, 7043 expression=expression, 7044 iterator=iterator, 7045 condition=condition, 7046 ) 7047 7048 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7049 if self._match(TokenType.HEREDOC_STRING): 7050 return self.expression(exp.Heredoc, this=self._prev.text) 7051 7052 if not self._match_text_seq("$"): 7053 return None 7054 7055 tags = ["$"] 7056 tag_text = None 7057 7058 if self._is_connected(): 7059 self._advance() 7060 tags.append(self._prev.text.upper()) 7061 else: 7062 self.raise_error("No closing $ found") 7063 7064 if tags[-1] != "$": 7065 if self._is_connected() and self._match_text_seq("$"): 7066 tag_text = tags[-1] 7067 tags.append("$") 7068 else: 7069 self.raise_error("No closing $ found") 7070 7071 heredoc_start = self._curr 7072 7073 while self._curr: 7074 if self._match_text_seq(*tags, advance=False): 7075 this = self._find_sql(heredoc_start, self._prev) 7076 self._advance(len(tags)) 7077 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7078 7079 self._advance() 7080 7081 self.raise_error(f"No closing {''.join(tags)} found") 7082 return None 7083 7084 def _find_parser( 7085 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7086 ) -> t.Optional[t.Callable]: 7087 if not self._curr: 7088 return None 7089 7090 index = self._index 7091 this = [] 7092 while True: 7093 # The current token might be multiple words 7094 curr = self._curr.text.upper() 7095 key = curr.split(" ") 7096 this.append(curr) 7097 7098 self._advance() 7099 result, trie = in_trie(trie, key) 7100 if result == TrieResult.FAILED: 7101 break 7102 7103 if result == TrieResult.EXISTS: 7104 subparser = parsers[" ".join(this)] 7105 return subparser 7106 7107 self._retreat(index) 7108 return None 7109 7110 def _match(self, token_type, advance=True, expression=None): 7111 if not self._curr: 7112 return None 7113 7114 if self._curr.token_type == token_type: 7115 if advance: 7116 self._advance() 7117 self._add_comments(expression) 7118 return True 7119 7120 return None 7121 7122 def _match_set(self, types, advance=True): 7123 if not self._curr: 7124 return None 7125 7126 if self._curr.token_type in types: 7127 if advance: 7128 self._advance() 7129 return True 7130 7131 return None 7132 7133 def _match_pair(self, token_type_a, token_type_b, advance=True): 7134 if not self._curr or not self._next: 7135 return None 7136 7137 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7138 if advance: 7139 self._advance(2) 7140 return True 7141 7142 return None 7143 7144 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7145 if not self._match(TokenType.L_PAREN, expression=expression): 7146 self.raise_error("Expecting (") 7147 7148 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7149 if not self._match(TokenType.R_PAREN, expression=expression): 7150 self.raise_error("Expecting )") 7151 7152 def _match_texts(self, texts, advance=True): 7153 if ( 7154 self._curr 7155 and self._curr.token_type != TokenType.STRING 7156 and self._curr.text.upper() in texts 7157 ): 7158 if advance: 7159 self._advance() 7160 return True 7161 return None 7162 7163 def _match_text_seq(self, *texts, advance=True): 7164 index = self._index 7165 for text in texts: 7166 if ( 7167 self._curr 7168 and self._curr.token_type != TokenType.STRING 7169 and self._curr.text.upper() == text 7170 ): 7171 self._advance() 7172 else: 7173 self._retreat(index) 7174 return None 7175 7176 if not advance: 7177 self._retreat(index) 7178 7179 return True 7180 7181 def _replace_lambda( 7182 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7183 ) -> t.Optional[exp.Expression]: 7184 if not node: 7185 return node 7186 7187 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7188 7189 for column in node.find_all(exp.Column): 7190 typ = lambda_types.get(column.parts[0].name) 7191 if typ is not None: 7192 dot_or_id = column.to_dot() if column.table else column.this 7193 7194 if typ: 7195 dot_or_id = self.expression( 7196 exp.Cast, 7197 this=dot_or_id, 7198 to=typ, 7199 ) 7200 7201 parent = column.parent 7202 7203 while isinstance(parent, exp.Dot): 7204 if not isinstance(parent.parent, exp.Dot): 7205 parent.replace(dot_or_id) 7206 break 7207 parent = parent.parent 7208 else: 7209 if column is node: 7210 node = dot_or_id 7211 else: 7212 column.replace(dot_or_id) 7213 return node 7214 7215 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7216 start = self._prev 7217 7218 # Not to be confused with TRUNCATE(number, decimals) function call 7219 if self._match(TokenType.L_PAREN): 7220 self._retreat(self._index - 2) 7221 return self._parse_function() 7222 7223 # Clickhouse supports TRUNCATE DATABASE as well 7224 is_database = self._match(TokenType.DATABASE) 7225 7226 self._match(TokenType.TABLE) 7227 7228 exists = self._parse_exists(not_=False) 7229 7230 expressions = self._parse_csv( 7231 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7232 ) 7233 7234 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7235 7236 if self._match_text_seq("RESTART", "IDENTITY"): 7237 identity = "RESTART" 7238 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7239 identity = "CONTINUE" 7240 else: 7241 identity = None 7242 7243 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7244 option = self._prev.text 7245 else: 7246 option = None 7247 7248 partition = self._parse_partition() 7249 7250 # Fallback case 7251 if self._curr: 7252 return self._parse_as_command(start) 7253 7254 return self.expression( 7255 exp.TruncateTable, 7256 expressions=expressions, 7257 is_database=is_database, 7258 exists=exists, 7259 cluster=cluster, 7260 identity=identity, 7261 option=option, 7262 partition=partition, 7263 ) 7264 7265 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7266 this = self._parse_ordered(self._parse_opclass) 7267 7268 if not self._match(TokenType.WITH): 7269 return this 7270 7271 op = self._parse_var(any_token=True) 7272 7273 return self.expression(exp.WithOperator, this=this, op=op) 7274 7275 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7276 self._match(TokenType.EQ) 7277 self._match(TokenType.L_PAREN) 7278 7279 opts: t.List[t.Optional[exp.Expression]] = [] 7280 while self._curr and not self._match(TokenType.R_PAREN): 7281 if self._match_text_seq("FORMAT_NAME", "="): 7282 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7283 # so we parse it separately to use _parse_field() 7284 prop = self.expression( 7285 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7286 ) 7287 opts.append(prop) 7288 else: 7289 opts.append(self._parse_property()) 7290 7291 self._match(TokenType.COMMA) 7292 7293 return opts 7294 7295 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7296 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7297 7298 options = [] 7299 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7300 option = self._parse_var(any_token=True) 7301 prev = self._prev.text.upper() 7302 7303 # Different dialects might separate options and values by white space, "=" and "AS" 7304 self._match(TokenType.EQ) 7305 self._match(TokenType.ALIAS) 7306 7307 param = self.expression(exp.CopyParameter, this=option) 7308 7309 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7310 TokenType.L_PAREN, advance=False 7311 ): 7312 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7313 param.set("expressions", self._parse_wrapped_options()) 7314 elif prev == "FILE_FORMAT": 7315 # T-SQL's external file format case 7316 param.set("expression", self._parse_field()) 7317 else: 7318 param.set("expression", self._parse_unquoted_field()) 7319 7320 options.append(param) 7321 self._match(sep) 7322 7323 return options 7324 7325 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7326 expr = self.expression(exp.Credentials) 7327 7328 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7329 expr.set("storage", self._parse_field()) 7330 if self._match_text_seq("CREDENTIALS"): 7331 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7332 creds = ( 7333 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7334 ) 7335 expr.set("credentials", creds) 7336 if self._match_text_seq("ENCRYPTION"): 7337 expr.set("encryption", self._parse_wrapped_options()) 7338 if self._match_text_seq("IAM_ROLE"): 7339 expr.set("iam_role", self._parse_field()) 7340 if self._match_text_seq("REGION"): 7341 expr.set("region", self._parse_field()) 7342 7343 return expr 7344 7345 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7346 return self._parse_field() 7347 7348 def _parse_copy(self) -> exp.Copy | exp.Command: 7349 start = self._prev 7350 7351 self._match(TokenType.INTO) 7352 7353 this = ( 7354 self._parse_select(nested=True, parse_subquery_alias=False) 7355 if self._match(TokenType.L_PAREN, advance=False) 7356 else self._parse_table(schema=True) 7357 ) 7358 7359 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7360 7361 files = self._parse_csv(self._parse_file_location) 7362 credentials = self._parse_credentials() 7363 7364 self._match_text_seq("WITH") 7365 7366 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7367 7368 # Fallback case 7369 if self._curr: 7370 return self._parse_as_command(start) 7371 7372 return self.expression( 7373 exp.Copy, 7374 this=this, 7375 kind=kind, 7376 credentials=credentials, 7377 files=files, 7378 params=params, 7379 ) 7380 7381 def _parse_normalize(self) -> exp.Normalize: 7382 return self.expression( 7383 exp.Normalize, 7384 this=self._parse_bitwise(), 7385 form=self._match(TokenType.COMMA) and self._parse_var(), 7386 ) 7387 7388 def _parse_star_ops(self) -> exp.Star | exp.UnpackColumns: 7389 if self._match_text_seq("COLUMNS", "(", advance=False): 7390 return exp.UnpackColumns(this=self._parse_function()) 7391 7392 return self.expression( 7393 exp.Star, 7394 **{ # type: ignore 7395 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7396 "replace": self._parse_star_op("REPLACE"), 7397 "rename": self._parse_star_op("RENAME"), 7398 }, 7399 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1350 def __init__( 1351 self, 1352 error_level: t.Optional[ErrorLevel] = None, 1353 error_message_context: int = 100, 1354 max_errors: int = 3, 1355 dialect: DialectType = None, 1356 ): 1357 from sqlglot.dialects import Dialect 1358 1359 self.error_level = error_level or ErrorLevel.IMMEDIATE 1360 self.error_message_context = error_message_context 1361 self.max_errors = max_errors 1362 self.dialect = Dialect.get_or_raise(dialect) 1363 self.reset()
1375 def parse( 1376 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1377 ) -> t.List[t.Optional[exp.Expression]]: 1378 """ 1379 Parses a list of tokens and returns a list of syntax trees, one tree 1380 per parsed SQL statement. 1381 1382 Args: 1383 raw_tokens: The list of tokens. 1384 sql: The original SQL string, used to produce helpful debug messages. 1385 1386 Returns: 1387 The list of the produced syntax trees. 1388 """ 1389 return self._parse( 1390 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1391 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1393 def parse_into( 1394 self, 1395 expression_types: exp.IntoType, 1396 raw_tokens: t.List[Token], 1397 sql: t.Optional[str] = None, 1398 ) -> t.List[t.Optional[exp.Expression]]: 1399 """ 1400 Parses a list of tokens into a given Expression type. If a collection of Expression 1401 types is given instead, this method will try to parse the token list into each one 1402 of them, stopping at the first for which the parsing succeeds. 1403 1404 Args: 1405 expression_types: The expression type(s) to try and parse the token list into. 1406 raw_tokens: The list of tokens. 1407 sql: The original SQL string, used to produce helpful debug messages. 1408 1409 Returns: 1410 The target Expression. 1411 """ 1412 errors = [] 1413 for expression_type in ensure_list(expression_types): 1414 parser = self.EXPRESSION_PARSERS.get(expression_type) 1415 if not parser: 1416 raise TypeError(f"No parser registered for {expression_type}") 1417 1418 try: 1419 return self._parse(parser, raw_tokens, sql) 1420 except ParseError as e: 1421 e.errors[0]["into_expression"] = expression_type 1422 errors.append(e) 1423 1424 raise ParseError( 1425 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1426 errors=merge_errors(errors), 1427 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1467 def check_errors(self) -> None: 1468 """Logs or raises any found errors, depending on the chosen error level setting.""" 1469 if self.error_level == ErrorLevel.WARN: 1470 for error in self.errors: 1471 logger.error(str(error)) 1472 elif self.error_level == ErrorLevel.RAISE and self.errors: 1473 raise ParseError( 1474 concat_messages(self.errors, self.max_errors), 1475 errors=merge_errors(self.errors), 1476 )
Logs or raises any found errors, depending on the chosen error level setting.
1478 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1479 """ 1480 Appends an error in the list of recorded errors or raises it, depending on the chosen 1481 error level setting. 1482 """ 1483 token = token or self._curr or self._prev or Token.string("") 1484 start = token.start 1485 end = token.end + 1 1486 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1487 highlight = self.sql[start:end] 1488 end_context = self.sql[end : end + self.error_message_context] 1489 1490 error = ParseError.new( 1491 f"{message}. Line {token.line}, Col: {token.col}.\n" 1492 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1493 description=message, 1494 line=token.line, 1495 col=token.col, 1496 start_context=start_context, 1497 highlight=highlight, 1498 end_context=end_context, 1499 ) 1500 1501 if self.error_level == ErrorLevel.IMMEDIATE: 1502 raise error 1503 1504 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1506 def expression( 1507 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1508 ) -> E: 1509 """ 1510 Creates a new, validated Expression. 1511 1512 Args: 1513 exp_class: The expression class to instantiate. 1514 comments: An optional list of comments to attach to the expression. 1515 kwargs: The arguments to set for the expression along with their respective values. 1516 1517 Returns: 1518 The target expression. 1519 """ 1520 instance = exp_class(**kwargs) 1521 instance.add_comments(comments) if comments else self._add_comments(instance) 1522 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1529 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1530 """ 1531 Validates an Expression, making sure that all its mandatory arguments are set. 1532 1533 Args: 1534 expression: The expression to validate. 1535 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1536 1537 Returns: 1538 The validated expression. 1539 """ 1540 if self.error_level != ErrorLevel.IGNORE: 1541 for error_message in expression.error_messages(args): 1542 self.raise_error(error_message) 1543 1544 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.