sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143class _Parser(type): 144 def __new__(cls, clsname, bases, attrs): 145 klass = super().__new__(cls, clsname, bases, attrs) 146 147 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 148 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 149 150 return klass 151 152 153class Parser(metaclass=_Parser): 154 """ 155 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 156 157 Args: 158 error_level: The desired error level. 159 Default: ErrorLevel.IMMEDIATE 160 error_message_context: The amount of context to capture from a query string when displaying 161 the error message (in number of characters). 162 Default: 100 163 max_errors: Maximum number of error messages to include in a raised ParseError. 164 This is only relevant if error_level is ErrorLevel.RAISE. 165 Default: 3 166 """ 167 168 FUNCTIONS: t.Dict[str, t.Callable] = { 169 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 170 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 171 "CONCAT": lambda args, dialect: exp.Concat( 172 expressions=args, 173 safe=not dialect.STRICT_STRING_CONCAT, 174 coalesce=dialect.CONCAT_COALESCE, 175 ), 176 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 177 expressions=args, 178 safe=not dialect.STRICT_STRING_CONCAT, 179 coalesce=dialect.CONCAT_COALESCE, 180 ), 181 "CONVERT_TIMEZONE": build_convert_timezone, 182 "DATE_TO_DATE_STR": lambda args: exp.Cast( 183 this=seq_get(args, 0), 184 to=exp.DataType(this=exp.DataType.Type.TEXT), 185 ), 186 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 187 start=seq_get(args, 0), 188 end=seq_get(args, 1), 189 interval=seq_get(args, 2) 190 or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 191 ), 192 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 193 "HEX": build_hex, 194 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 195 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 196 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 197 "LIKE": build_like, 198 "LOG": build_logarithm, 199 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 200 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 201 "LOWER": build_lower, 202 "LPAD": lambda args: build_pad(args), 203 "LEFTPAD": lambda args: build_pad(args), 204 "MOD": build_mod, 205 "RPAD": lambda args: build_pad(args, is_left=False), 206 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 207 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 208 if len(args) != 2 209 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 210 "TIME_TO_TIME_STR": lambda args: exp.Cast( 211 this=seq_get(args, 0), 212 to=exp.DataType(this=exp.DataType.Type.TEXT), 213 ), 214 "TO_HEX": build_hex, 215 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 216 this=exp.Cast( 217 this=seq_get(args, 0), 218 to=exp.DataType(this=exp.DataType.Type.TEXT), 219 ), 220 start=exp.Literal.number(1), 221 length=exp.Literal.number(10), 222 ), 223 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 224 "UPPER": build_upper, 225 "VAR_MAP": build_var_map, 226 } 227 228 NO_PAREN_FUNCTIONS = { 229 TokenType.CURRENT_DATE: exp.CurrentDate, 230 TokenType.CURRENT_DATETIME: exp.CurrentDate, 231 TokenType.CURRENT_TIME: exp.CurrentTime, 232 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 233 TokenType.CURRENT_USER: exp.CurrentUser, 234 } 235 236 STRUCT_TYPE_TOKENS = { 237 TokenType.NESTED, 238 TokenType.OBJECT, 239 TokenType.STRUCT, 240 } 241 242 NESTED_TYPE_TOKENS = { 243 TokenType.ARRAY, 244 TokenType.LIST, 245 TokenType.LOWCARDINALITY, 246 TokenType.MAP, 247 TokenType.NULLABLE, 248 *STRUCT_TYPE_TOKENS, 249 } 250 251 ENUM_TYPE_TOKENS = { 252 TokenType.ENUM, 253 TokenType.ENUM8, 254 TokenType.ENUM16, 255 } 256 257 AGGREGATE_TYPE_TOKENS = { 258 TokenType.AGGREGATEFUNCTION, 259 TokenType.SIMPLEAGGREGATEFUNCTION, 260 } 261 262 TYPE_TOKENS = { 263 TokenType.BIT, 264 TokenType.BOOLEAN, 265 TokenType.TINYINT, 266 TokenType.UTINYINT, 267 TokenType.SMALLINT, 268 TokenType.USMALLINT, 269 TokenType.INT, 270 TokenType.UINT, 271 TokenType.BIGINT, 272 TokenType.UBIGINT, 273 TokenType.INT128, 274 TokenType.UINT128, 275 TokenType.INT256, 276 TokenType.UINT256, 277 TokenType.MEDIUMINT, 278 TokenType.UMEDIUMINT, 279 TokenType.FIXEDSTRING, 280 TokenType.FLOAT, 281 TokenType.DOUBLE, 282 TokenType.CHAR, 283 TokenType.NCHAR, 284 TokenType.VARCHAR, 285 TokenType.NVARCHAR, 286 TokenType.BPCHAR, 287 TokenType.TEXT, 288 TokenType.MEDIUMTEXT, 289 TokenType.LONGTEXT, 290 TokenType.MEDIUMBLOB, 291 TokenType.LONGBLOB, 292 TokenType.BINARY, 293 TokenType.VARBINARY, 294 TokenType.JSON, 295 TokenType.JSONB, 296 TokenType.INTERVAL, 297 TokenType.TINYBLOB, 298 TokenType.TINYTEXT, 299 TokenType.TIME, 300 TokenType.TIMETZ, 301 TokenType.TIMESTAMP, 302 TokenType.TIMESTAMP_S, 303 TokenType.TIMESTAMP_MS, 304 TokenType.TIMESTAMP_NS, 305 TokenType.TIMESTAMPTZ, 306 TokenType.TIMESTAMPLTZ, 307 TokenType.TIMESTAMPNTZ, 308 TokenType.DATETIME, 309 TokenType.DATETIME64, 310 TokenType.DATE, 311 TokenType.DATE32, 312 TokenType.INT4RANGE, 313 TokenType.INT4MULTIRANGE, 314 TokenType.INT8RANGE, 315 TokenType.INT8MULTIRANGE, 316 TokenType.NUMRANGE, 317 TokenType.NUMMULTIRANGE, 318 TokenType.TSRANGE, 319 TokenType.TSMULTIRANGE, 320 TokenType.TSTZRANGE, 321 TokenType.TSTZMULTIRANGE, 322 TokenType.DATERANGE, 323 TokenType.DATEMULTIRANGE, 324 TokenType.DECIMAL, 325 TokenType.UDECIMAL, 326 TokenType.BIGDECIMAL, 327 TokenType.UUID, 328 TokenType.GEOGRAPHY, 329 TokenType.GEOMETRY, 330 TokenType.HLLSKETCH, 331 TokenType.HSTORE, 332 TokenType.PSEUDO_TYPE, 333 TokenType.SUPER, 334 TokenType.SERIAL, 335 TokenType.SMALLSERIAL, 336 TokenType.BIGSERIAL, 337 TokenType.XML, 338 TokenType.YEAR, 339 TokenType.UNIQUEIDENTIFIER, 340 TokenType.USERDEFINED, 341 TokenType.MONEY, 342 TokenType.SMALLMONEY, 343 TokenType.ROWVERSION, 344 TokenType.IMAGE, 345 TokenType.VARIANT, 346 TokenType.VECTOR, 347 TokenType.OBJECT, 348 TokenType.OBJECT_IDENTIFIER, 349 TokenType.INET, 350 TokenType.IPADDRESS, 351 TokenType.IPPREFIX, 352 TokenType.IPV4, 353 TokenType.IPV6, 354 TokenType.UNKNOWN, 355 TokenType.NULL, 356 TokenType.NAME, 357 TokenType.TDIGEST, 358 *ENUM_TYPE_TOKENS, 359 *NESTED_TYPE_TOKENS, 360 *AGGREGATE_TYPE_TOKENS, 361 } 362 363 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 364 TokenType.BIGINT: TokenType.UBIGINT, 365 TokenType.INT: TokenType.UINT, 366 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 367 TokenType.SMALLINT: TokenType.USMALLINT, 368 TokenType.TINYINT: TokenType.UTINYINT, 369 TokenType.DECIMAL: TokenType.UDECIMAL, 370 } 371 372 SUBQUERY_PREDICATES = { 373 TokenType.ANY: exp.Any, 374 TokenType.ALL: exp.All, 375 TokenType.EXISTS: exp.Exists, 376 TokenType.SOME: exp.Any, 377 } 378 379 RESERVED_TOKENS = { 380 *Tokenizer.SINGLE_TOKENS.values(), 381 TokenType.SELECT, 382 } - {TokenType.IDENTIFIER} 383 384 DB_CREATABLES = { 385 TokenType.DATABASE, 386 TokenType.DICTIONARY, 387 TokenType.MODEL, 388 TokenType.SCHEMA, 389 TokenType.SEQUENCE, 390 TokenType.STORAGE_INTEGRATION, 391 TokenType.TABLE, 392 TokenType.TAG, 393 TokenType.VIEW, 394 TokenType.WAREHOUSE, 395 TokenType.STREAMLIT, 396 } 397 398 CREATABLES = { 399 TokenType.COLUMN, 400 TokenType.CONSTRAINT, 401 TokenType.FOREIGN_KEY, 402 TokenType.FUNCTION, 403 TokenType.INDEX, 404 TokenType.PROCEDURE, 405 *DB_CREATABLES, 406 } 407 408 # Tokens that can represent identifiers 409 ID_VAR_TOKENS = { 410 TokenType.ALL, 411 TokenType.VAR, 412 TokenType.ANTI, 413 TokenType.APPLY, 414 TokenType.ASC, 415 TokenType.ASOF, 416 TokenType.AUTO_INCREMENT, 417 TokenType.BEGIN, 418 TokenType.BPCHAR, 419 TokenType.CACHE, 420 TokenType.CASE, 421 TokenType.COLLATE, 422 TokenType.COMMAND, 423 TokenType.COMMENT, 424 TokenType.COMMIT, 425 TokenType.CONSTRAINT, 426 TokenType.COPY, 427 TokenType.CUBE, 428 TokenType.DEFAULT, 429 TokenType.DELETE, 430 TokenType.DESC, 431 TokenType.DESCRIBE, 432 TokenType.DICTIONARY, 433 TokenType.DIV, 434 TokenType.END, 435 TokenType.EXECUTE, 436 TokenType.ESCAPE, 437 TokenType.FALSE, 438 TokenType.FIRST, 439 TokenType.FILTER, 440 TokenType.FINAL, 441 TokenType.FORMAT, 442 TokenType.FULL, 443 TokenType.IDENTIFIER, 444 TokenType.IS, 445 TokenType.ISNULL, 446 TokenType.INTERVAL, 447 TokenType.KEEP, 448 TokenType.KILL, 449 TokenType.LEFT, 450 TokenType.LOAD, 451 TokenType.MERGE, 452 TokenType.NATURAL, 453 TokenType.NEXT, 454 TokenType.OFFSET, 455 TokenType.OPERATOR, 456 TokenType.ORDINALITY, 457 TokenType.OVERLAPS, 458 TokenType.OVERWRITE, 459 TokenType.PARTITION, 460 TokenType.PERCENT, 461 TokenType.PIVOT, 462 TokenType.PRAGMA, 463 TokenType.RANGE, 464 TokenType.RECURSIVE, 465 TokenType.REFERENCES, 466 TokenType.REFRESH, 467 TokenType.RENAME, 468 TokenType.REPLACE, 469 TokenType.RIGHT, 470 TokenType.ROLLUP, 471 TokenType.ROW, 472 TokenType.ROWS, 473 TokenType.SEMI, 474 TokenType.SET, 475 TokenType.SETTINGS, 476 TokenType.SHOW, 477 TokenType.TEMPORARY, 478 TokenType.TOP, 479 TokenType.TRUE, 480 TokenType.TRUNCATE, 481 TokenType.UNIQUE, 482 TokenType.UNNEST, 483 TokenType.UNPIVOT, 484 TokenType.UPDATE, 485 TokenType.USE, 486 TokenType.VOLATILE, 487 TokenType.WINDOW, 488 *CREATABLES, 489 *SUBQUERY_PREDICATES, 490 *TYPE_TOKENS, 491 *NO_PAREN_FUNCTIONS, 492 } 493 494 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 495 496 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 497 TokenType.ANTI, 498 TokenType.APPLY, 499 TokenType.ASOF, 500 TokenType.FULL, 501 TokenType.LEFT, 502 TokenType.LOCK, 503 TokenType.NATURAL, 504 TokenType.OFFSET, 505 TokenType.RIGHT, 506 TokenType.SEMI, 507 TokenType.WINDOW, 508 } 509 510 ALIAS_TOKENS = ID_VAR_TOKENS 511 512 ARRAY_CONSTRUCTORS = { 513 "ARRAY": exp.Array, 514 "LIST": exp.List, 515 } 516 517 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 518 519 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 520 521 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 522 523 FUNC_TOKENS = { 524 TokenType.COLLATE, 525 TokenType.COMMAND, 526 TokenType.CURRENT_DATE, 527 TokenType.CURRENT_DATETIME, 528 TokenType.CURRENT_TIMESTAMP, 529 TokenType.CURRENT_TIME, 530 TokenType.CURRENT_USER, 531 TokenType.FILTER, 532 TokenType.FIRST, 533 TokenType.FORMAT, 534 TokenType.GLOB, 535 TokenType.IDENTIFIER, 536 TokenType.INDEX, 537 TokenType.ISNULL, 538 TokenType.ILIKE, 539 TokenType.INSERT, 540 TokenType.LIKE, 541 TokenType.MERGE, 542 TokenType.OFFSET, 543 TokenType.PRIMARY_KEY, 544 TokenType.RANGE, 545 TokenType.REPLACE, 546 TokenType.RLIKE, 547 TokenType.ROW, 548 TokenType.UNNEST, 549 TokenType.VAR, 550 TokenType.LEFT, 551 TokenType.RIGHT, 552 TokenType.SEQUENCE, 553 TokenType.DATE, 554 TokenType.DATETIME, 555 TokenType.TABLE, 556 TokenType.TIMESTAMP, 557 TokenType.TIMESTAMPTZ, 558 TokenType.TRUNCATE, 559 TokenType.WINDOW, 560 TokenType.XOR, 561 *TYPE_TOKENS, 562 *SUBQUERY_PREDICATES, 563 } 564 565 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 566 TokenType.AND: exp.And, 567 } 568 569 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 570 TokenType.COLON_EQ: exp.PropertyEQ, 571 } 572 573 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 574 TokenType.OR: exp.Or, 575 } 576 577 EQUALITY = { 578 TokenType.EQ: exp.EQ, 579 TokenType.NEQ: exp.NEQ, 580 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 581 } 582 583 COMPARISON = { 584 TokenType.GT: exp.GT, 585 TokenType.GTE: exp.GTE, 586 TokenType.LT: exp.LT, 587 TokenType.LTE: exp.LTE, 588 } 589 590 BITWISE = { 591 TokenType.AMP: exp.BitwiseAnd, 592 TokenType.CARET: exp.BitwiseXor, 593 TokenType.PIPE: exp.BitwiseOr, 594 } 595 596 TERM = { 597 TokenType.DASH: exp.Sub, 598 TokenType.PLUS: exp.Add, 599 TokenType.MOD: exp.Mod, 600 TokenType.COLLATE: exp.Collate, 601 } 602 603 FACTOR = { 604 TokenType.DIV: exp.IntDiv, 605 TokenType.LR_ARROW: exp.Distance, 606 TokenType.SLASH: exp.Div, 607 TokenType.STAR: exp.Mul, 608 } 609 610 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 611 612 TIMES = { 613 TokenType.TIME, 614 TokenType.TIMETZ, 615 } 616 617 TIMESTAMPS = { 618 TokenType.TIMESTAMP, 619 TokenType.TIMESTAMPTZ, 620 TokenType.TIMESTAMPLTZ, 621 *TIMES, 622 } 623 624 SET_OPERATIONS = { 625 TokenType.UNION, 626 TokenType.INTERSECT, 627 TokenType.EXCEPT, 628 } 629 630 JOIN_METHODS = { 631 TokenType.ASOF, 632 TokenType.NATURAL, 633 TokenType.POSITIONAL, 634 } 635 636 JOIN_SIDES = { 637 TokenType.LEFT, 638 TokenType.RIGHT, 639 TokenType.FULL, 640 } 641 642 JOIN_KINDS = { 643 TokenType.ANTI, 644 TokenType.CROSS, 645 TokenType.INNER, 646 TokenType.OUTER, 647 TokenType.SEMI, 648 TokenType.STRAIGHT_JOIN, 649 } 650 651 JOIN_HINTS: t.Set[str] = set() 652 653 LAMBDAS = { 654 TokenType.ARROW: lambda self, expressions: self.expression( 655 exp.Lambda, 656 this=self._replace_lambda( 657 self._parse_assignment(), 658 expressions, 659 ), 660 expressions=expressions, 661 ), 662 TokenType.FARROW: lambda self, expressions: self.expression( 663 exp.Kwarg, 664 this=exp.var(expressions[0].name), 665 expression=self._parse_assignment(), 666 ), 667 } 668 669 COLUMN_OPERATORS = { 670 TokenType.DOT: None, 671 TokenType.DCOLON: lambda self, this, to: self.expression( 672 exp.Cast if self.STRICT_CAST else exp.TryCast, 673 this=this, 674 to=to, 675 ), 676 TokenType.ARROW: lambda self, this, path: self.expression( 677 exp.JSONExtract, 678 this=this, 679 expression=self.dialect.to_json_path(path), 680 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 681 ), 682 TokenType.DARROW: lambda self, this, path: self.expression( 683 exp.JSONExtractScalar, 684 this=this, 685 expression=self.dialect.to_json_path(path), 686 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 687 ), 688 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 689 exp.JSONBExtract, 690 this=this, 691 expression=path, 692 ), 693 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 694 exp.JSONBExtractScalar, 695 this=this, 696 expression=path, 697 ), 698 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 699 exp.JSONBContains, 700 this=this, 701 expression=key, 702 ), 703 } 704 705 EXPRESSION_PARSERS = { 706 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 707 exp.Column: lambda self: self._parse_column(), 708 exp.Condition: lambda self: self._parse_assignment(), 709 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 710 exp.Expression: lambda self: self._parse_expression(), 711 exp.From: lambda self: self._parse_from(joins=True), 712 exp.Group: lambda self: self._parse_group(), 713 exp.Having: lambda self: self._parse_having(), 714 exp.Identifier: lambda self: self._parse_id_var(), 715 exp.Join: lambda self: self._parse_join(), 716 exp.Lambda: lambda self: self._parse_lambda(), 717 exp.Lateral: lambda self: self._parse_lateral(), 718 exp.Limit: lambda self: self._parse_limit(), 719 exp.Offset: lambda self: self._parse_offset(), 720 exp.Order: lambda self: self._parse_order(), 721 exp.Ordered: lambda self: self._parse_ordered(), 722 exp.Properties: lambda self: self._parse_properties(), 723 exp.Qualify: lambda self: self._parse_qualify(), 724 exp.Returning: lambda self: self._parse_returning(), 725 exp.Select: lambda self: self._parse_select(), 726 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 727 exp.Table: lambda self: self._parse_table_parts(), 728 exp.TableAlias: lambda self: self._parse_table_alias(), 729 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 730 exp.Where: lambda self: self._parse_where(), 731 exp.Window: lambda self: self._parse_named_window(), 732 exp.With: lambda self: self._parse_with(), 733 "JOIN_TYPE": lambda self: self._parse_join_parts(), 734 } 735 736 STATEMENT_PARSERS = { 737 TokenType.ALTER: lambda self: self._parse_alter(), 738 TokenType.BEGIN: lambda self: self._parse_transaction(), 739 TokenType.CACHE: lambda self: self._parse_cache(), 740 TokenType.COMMENT: lambda self: self._parse_comment(), 741 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 742 TokenType.COPY: lambda self: self._parse_copy(), 743 TokenType.CREATE: lambda self: self._parse_create(), 744 TokenType.DELETE: lambda self: self._parse_delete(), 745 TokenType.DESC: lambda self: self._parse_describe(), 746 TokenType.DESCRIBE: lambda self: self._parse_describe(), 747 TokenType.DROP: lambda self: self._parse_drop(), 748 TokenType.INSERT: lambda self: self._parse_insert(), 749 TokenType.KILL: lambda self: self._parse_kill(), 750 TokenType.LOAD: lambda self: self._parse_load(), 751 TokenType.MERGE: lambda self: self._parse_merge(), 752 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 753 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 754 TokenType.REFRESH: lambda self: self._parse_refresh(), 755 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 756 TokenType.SET: lambda self: self._parse_set(), 757 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 758 TokenType.UNCACHE: lambda self: self._parse_uncache(), 759 TokenType.UPDATE: lambda self: self._parse_update(), 760 TokenType.USE: lambda self: self.expression( 761 exp.Use, 762 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 763 this=self._parse_table(schema=False), 764 ), 765 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 766 } 767 768 UNARY_PARSERS = { 769 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 770 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 771 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 772 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 773 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 774 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 775 } 776 777 STRING_PARSERS = { 778 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 779 exp.RawString, this=token.text 780 ), 781 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 782 exp.National, this=token.text 783 ), 784 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 785 TokenType.STRING: lambda self, token: self.expression( 786 exp.Literal, this=token.text, is_string=True 787 ), 788 TokenType.UNICODE_STRING: lambda self, token: self.expression( 789 exp.UnicodeString, 790 this=token.text, 791 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 792 ), 793 } 794 795 NUMERIC_PARSERS = { 796 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 797 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 798 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 799 TokenType.NUMBER: lambda self, token: self.expression( 800 exp.Literal, this=token.text, is_string=False 801 ), 802 } 803 804 PRIMARY_PARSERS = { 805 **STRING_PARSERS, 806 **NUMERIC_PARSERS, 807 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 808 TokenType.NULL: lambda self, _: self.expression(exp.Null), 809 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 810 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 811 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 812 TokenType.STAR: lambda self, _: self.expression( 813 exp.Star, 814 **{ 815 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 816 "replace": self._parse_star_op("REPLACE"), 817 "rename": self._parse_star_op("RENAME"), 818 }, 819 ), 820 } 821 822 PLACEHOLDER_PARSERS = { 823 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 824 TokenType.PARAMETER: lambda self: self._parse_parameter(), 825 TokenType.COLON: lambda self: ( 826 self.expression(exp.Placeholder, this=self._prev.text) 827 if self._match_set(self.ID_VAR_TOKENS) 828 else None 829 ), 830 } 831 832 RANGE_PARSERS = { 833 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 834 TokenType.GLOB: binary_range_parser(exp.Glob), 835 TokenType.ILIKE: binary_range_parser(exp.ILike), 836 TokenType.IN: lambda self, this: self._parse_in(this), 837 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 838 TokenType.IS: lambda self, this: self._parse_is(this), 839 TokenType.LIKE: binary_range_parser(exp.Like), 840 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 841 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 842 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 843 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 844 } 845 846 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 847 "ALLOWED_VALUES": lambda self: self.expression( 848 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 849 ), 850 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 851 "AUTO": lambda self: self._parse_auto_property(), 852 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 853 "BACKUP": lambda self: self.expression( 854 exp.BackupProperty, this=self._parse_var(any_token=True) 855 ), 856 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 857 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 858 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 859 "CHECKSUM": lambda self: self._parse_checksum(), 860 "CLUSTER BY": lambda self: self._parse_cluster(), 861 "CLUSTERED": lambda self: self._parse_clustered_by(), 862 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 863 exp.CollateProperty, **kwargs 864 ), 865 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 866 "CONTAINS": lambda self: self._parse_contains_property(), 867 "COPY": lambda self: self._parse_copy_property(), 868 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 869 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 870 "DEFINER": lambda self: self._parse_definer(), 871 "DETERMINISTIC": lambda self: self.expression( 872 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 873 ), 874 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 875 "DISTKEY": lambda self: self._parse_distkey(), 876 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 877 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 878 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 879 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 880 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 881 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 882 "FREESPACE": lambda self: self._parse_freespace(), 883 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 884 "HEAP": lambda self: self.expression(exp.HeapProperty), 885 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 886 "IMMUTABLE": lambda self: self.expression( 887 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 888 ), 889 "INHERITS": lambda self: self.expression( 890 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 891 ), 892 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 893 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 894 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 895 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 896 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 897 "LIKE": lambda self: self._parse_create_like(), 898 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 899 "LOCK": lambda self: self._parse_locking(), 900 "LOCKING": lambda self: self._parse_locking(), 901 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 902 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 903 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 904 "MODIFIES": lambda self: self._parse_modifies_property(), 905 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 906 "NO": lambda self: self._parse_no_property(), 907 "ON": lambda self: self._parse_on_property(), 908 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 909 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 910 "PARTITION": lambda self: self._parse_partitioned_of(), 911 "PARTITION BY": lambda self: self._parse_partitioned_by(), 912 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 913 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 914 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 915 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 916 "READS": lambda self: self._parse_reads_property(), 917 "REMOTE": lambda self: self._parse_remote_with_connection(), 918 "RETURNS": lambda self: self._parse_returns(), 919 "STRICT": lambda self: self.expression(exp.StrictProperty), 920 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 921 "ROW": lambda self: self._parse_row(), 922 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 923 "SAMPLE": lambda self: self.expression( 924 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 925 ), 926 "SECURE": lambda self: self.expression(exp.SecureProperty), 927 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 928 "SETTINGS": lambda self: self.expression( 929 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 930 ), 931 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 932 "SORTKEY": lambda self: self._parse_sortkey(), 933 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 934 "STABLE": lambda self: self.expression( 935 exp.StabilityProperty, this=exp.Literal.string("STABLE") 936 ), 937 "STORED": lambda self: self._parse_stored(), 938 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 939 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 940 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 941 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 942 "TO": lambda self: self._parse_to_table(), 943 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 944 "TRANSFORM": lambda self: self.expression( 945 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 946 ), 947 "TTL": lambda self: self._parse_ttl(), 948 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 949 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 950 "VOLATILE": lambda self: self._parse_volatile_property(), 951 "WITH": lambda self: self._parse_with_property(), 952 } 953 954 CONSTRAINT_PARSERS = { 955 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 956 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 957 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 958 "CHARACTER SET": lambda self: self.expression( 959 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 960 ), 961 "CHECK": lambda self: self.expression( 962 exp.CheckColumnConstraint, 963 this=self._parse_wrapped(self._parse_assignment), 964 enforced=self._match_text_seq("ENFORCED"), 965 ), 966 "COLLATE": lambda self: self.expression( 967 exp.CollateColumnConstraint, 968 this=self._parse_identifier() or self._parse_column(), 969 ), 970 "COMMENT": lambda self: self.expression( 971 exp.CommentColumnConstraint, this=self._parse_string() 972 ), 973 "COMPRESS": lambda self: self._parse_compress(), 974 "CLUSTERED": lambda self: self.expression( 975 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 976 ), 977 "NONCLUSTERED": lambda self: self.expression( 978 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 979 ), 980 "DEFAULT": lambda self: self.expression( 981 exp.DefaultColumnConstraint, this=self._parse_bitwise() 982 ), 983 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 984 "EPHEMERAL": lambda self: self.expression( 985 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 986 ), 987 "EXCLUDE": lambda self: self.expression( 988 exp.ExcludeColumnConstraint, this=self._parse_index_params() 989 ), 990 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 991 "FORMAT": lambda self: self.expression( 992 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 993 ), 994 "GENERATED": lambda self: self._parse_generated_as_identity(), 995 "IDENTITY": lambda self: self._parse_auto_increment(), 996 "INLINE": lambda self: self._parse_inline(), 997 "LIKE": lambda self: self._parse_create_like(), 998 "NOT": lambda self: self._parse_not_constraint(), 999 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1000 "ON": lambda self: ( 1001 self._match(TokenType.UPDATE) 1002 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1003 ) 1004 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1005 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1006 "PERIOD": lambda self: self._parse_period_for_system_time(), 1007 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1008 "REFERENCES": lambda self: self._parse_references(match=False), 1009 "TITLE": lambda self: self.expression( 1010 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1011 ), 1012 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1013 "UNIQUE": lambda self: self._parse_unique(), 1014 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1015 "WITH": lambda self: self.expression( 1016 exp.Properties, expressions=self._parse_wrapped_properties() 1017 ), 1018 } 1019 1020 ALTER_PARSERS = { 1021 "ADD": lambda self: self._parse_alter_table_add(), 1022 "ALTER": lambda self: self._parse_alter_table_alter(), 1023 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1024 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1025 "DROP": lambda self: self._parse_alter_table_drop(), 1026 "RENAME": lambda self: self._parse_alter_table_rename(), 1027 "SET": lambda self: self._parse_alter_table_set(), 1028 } 1029 1030 ALTER_ALTER_PARSERS = { 1031 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1032 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1033 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1034 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1035 } 1036 1037 SCHEMA_UNNAMED_CONSTRAINTS = { 1038 "CHECK", 1039 "EXCLUDE", 1040 "FOREIGN KEY", 1041 "LIKE", 1042 "PERIOD", 1043 "PRIMARY KEY", 1044 "UNIQUE", 1045 } 1046 1047 NO_PAREN_FUNCTION_PARSERS = { 1048 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1049 "CASE": lambda self: self._parse_case(), 1050 "CONNECT_BY_ROOT": lambda self: self.expression( 1051 exp.ConnectByRoot, this=self._parse_column() 1052 ), 1053 "IF": lambda self: self._parse_if(), 1054 "NEXT": lambda self: self._parse_next_value_for(), 1055 } 1056 1057 INVALID_FUNC_NAME_TOKENS = { 1058 TokenType.IDENTIFIER, 1059 TokenType.STRING, 1060 } 1061 1062 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1063 1064 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1065 1066 FUNCTION_PARSERS = { 1067 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1068 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1069 "DECODE": lambda self: self._parse_decode(), 1070 "EXTRACT": lambda self: self._parse_extract(), 1071 "GAP_FILL": lambda self: self._parse_gap_fill(), 1072 "JSON_OBJECT": lambda self: self._parse_json_object(), 1073 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1074 "JSON_TABLE": lambda self: self._parse_json_table(), 1075 "MATCH": lambda self: self._parse_match_against(), 1076 "OPENJSON": lambda self: self._parse_open_json(), 1077 "POSITION": lambda self: self._parse_position(), 1078 "PREDICT": lambda self: self._parse_predict(), 1079 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1080 "STRING_AGG": lambda self: self._parse_string_agg(), 1081 "SUBSTRING": lambda self: self._parse_substring(), 1082 "TRIM": lambda self: self._parse_trim(), 1083 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1084 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1085 } 1086 1087 QUERY_MODIFIER_PARSERS = { 1088 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1089 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1090 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1091 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1092 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1093 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1094 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1095 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1096 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1097 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1098 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1099 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1100 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1101 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1102 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1103 TokenType.CLUSTER_BY: lambda self: ( 1104 "cluster", 1105 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1106 ), 1107 TokenType.DISTRIBUTE_BY: lambda self: ( 1108 "distribute", 1109 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1110 ), 1111 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1112 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1113 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1114 } 1115 1116 SET_PARSERS = { 1117 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1118 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1119 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1120 "TRANSACTION": lambda self: self._parse_set_transaction(), 1121 } 1122 1123 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1124 1125 TYPE_LITERAL_PARSERS = { 1126 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1127 } 1128 1129 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1130 1131 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1132 1133 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1134 1135 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1136 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1137 "ISOLATION": ( 1138 ("LEVEL", "REPEATABLE", "READ"), 1139 ("LEVEL", "READ", "COMMITTED"), 1140 ("LEVEL", "READ", "UNCOMITTED"), 1141 ("LEVEL", "SERIALIZABLE"), 1142 ), 1143 "READ": ("WRITE", "ONLY"), 1144 } 1145 1146 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1147 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1148 ) 1149 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1150 1151 CREATE_SEQUENCE: OPTIONS_TYPE = { 1152 "SCALE": ("EXTEND", "NOEXTEND"), 1153 "SHARD": ("EXTEND", "NOEXTEND"), 1154 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1155 **dict.fromkeys( 1156 ( 1157 "SESSION", 1158 "GLOBAL", 1159 "KEEP", 1160 "NOKEEP", 1161 "ORDER", 1162 "NOORDER", 1163 "NOCACHE", 1164 "CYCLE", 1165 "NOCYCLE", 1166 "NOMINVALUE", 1167 "NOMAXVALUE", 1168 "NOSCALE", 1169 "NOSHARD", 1170 ), 1171 tuple(), 1172 ), 1173 } 1174 1175 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1176 1177 USABLES: OPTIONS_TYPE = dict.fromkeys( 1178 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1179 ) 1180 1181 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1182 1183 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1184 "TYPE": ("EVOLUTION",), 1185 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1186 } 1187 1188 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1189 "NOT": ("ENFORCED",), 1190 "MATCH": ( 1191 "FULL", 1192 "PARTIAL", 1193 "SIMPLE", 1194 ), 1195 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1196 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1197 } 1198 1199 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1200 1201 CLONE_KEYWORDS = {"CLONE", "COPY"} 1202 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1203 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1204 1205 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1206 1207 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1208 1209 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1210 1211 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1212 1213 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1214 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1215 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1216 1217 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1218 1219 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1220 1221 ADD_CONSTRAINT_TOKENS = { 1222 TokenType.CONSTRAINT, 1223 TokenType.FOREIGN_KEY, 1224 TokenType.INDEX, 1225 TokenType.KEY, 1226 TokenType.PRIMARY_KEY, 1227 TokenType.UNIQUE, 1228 } 1229 1230 DISTINCT_TOKENS = {TokenType.DISTINCT} 1231 1232 NULL_TOKENS = {TokenType.NULL} 1233 1234 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1235 1236 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1237 1238 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1239 1240 STRICT_CAST = True 1241 1242 PREFIXED_PIVOT_COLUMNS = False 1243 IDENTIFY_PIVOT_STRINGS = False 1244 1245 LOG_DEFAULTS_TO_LN = False 1246 1247 # Whether ADD is present for each column added by ALTER TABLE 1248 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1249 1250 # Whether the table sample clause expects CSV syntax 1251 TABLESAMPLE_CSV = False 1252 1253 # The default method used for table sampling 1254 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1255 1256 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1257 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1258 1259 # Whether the TRIM function expects the characters to trim as its first argument 1260 TRIM_PATTERN_FIRST = False 1261 1262 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1263 STRING_ALIASES = False 1264 1265 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1266 MODIFIERS_ATTACHED_TO_SET_OP = True 1267 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1268 1269 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1270 NO_PAREN_IF_COMMANDS = True 1271 1272 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1273 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1274 1275 # Whether the `:` operator is used to extract a value from a VARIANT column 1276 COLON_IS_VARIANT_EXTRACT = False 1277 1278 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1279 # If this is True and '(' is not found, the keyword will be treated as an identifier 1280 VALUES_FOLLOWED_BY_PAREN = True 1281 1282 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1283 SUPPORTS_IMPLICIT_UNNEST = False 1284 1285 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1286 INTERVAL_SPANS = True 1287 1288 # Whether a PARTITION clause can follow a table reference 1289 SUPPORTS_PARTITION_SELECTION = False 1290 1291 __slots__ = ( 1292 "error_level", 1293 "error_message_context", 1294 "max_errors", 1295 "dialect", 1296 "sql", 1297 "errors", 1298 "_tokens", 1299 "_index", 1300 "_curr", 1301 "_next", 1302 "_prev", 1303 "_prev_comments", 1304 ) 1305 1306 # Autofilled 1307 SHOW_TRIE: t.Dict = {} 1308 SET_TRIE: t.Dict = {} 1309 1310 def __init__( 1311 self, 1312 error_level: t.Optional[ErrorLevel] = None, 1313 error_message_context: int = 100, 1314 max_errors: int = 3, 1315 dialect: DialectType = None, 1316 ): 1317 from sqlglot.dialects import Dialect 1318 1319 self.error_level = error_level or ErrorLevel.IMMEDIATE 1320 self.error_message_context = error_message_context 1321 self.max_errors = max_errors 1322 self.dialect = Dialect.get_or_raise(dialect) 1323 self.reset() 1324 1325 def reset(self): 1326 self.sql = "" 1327 self.errors = [] 1328 self._tokens = [] 1329 self._index = 0 1330 self._curr = None 1331 self._next = None 1332 self._prev = None 1333 self._prev_comments = None 1334 1335 def parse( 1336 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1337 ) -> t.List[t.Optional[exp.Expression]]: 1338 """ 1339 Parses a list of tokens and returns a list of syntax trees, one tree 1340 per parsed SQL statement. 1341 1342 Args: 1343 raw_tokens: The list of tokens. 1344 sql: The original SQL string, used to produce helpful debug messages. 1345 1346 Returns: 1347 The list of the produced syntax trees. 1348 """ 1349 return self._parse( 1350 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1351 ) 1352 1353 def parse_into( 1354 self, 1355 expression_types: exp.IntoType, 1356 raw_tokens: t.List[Token], 1357 sql: t.Optional[str] = None, 1358 ) -> t.List[t.Optional[exp.Expression]]: 1359 """ 1360 Parses a list of tokens into a given Expression type. If a collection of Expression 1361 types is given instead, this method will try to parse the token list into each one 1362 of them, stopping at the first for which the parsing succeeds. 1363 1364 Args: 1365 expression_types: The expression type(s) to try and parse the token list into. 1366 raw_tokens: The list of tokens. 1367 sql: The original SQL string, used to produce helpful debug messages. 1368 1369 Returns: 1370 The target Expression. 1371 """ 1372 errors = [] 1373 for expression_type in ensure_list(expression_types): 1374 parser = self.EXPRESSION_PARSERS.get(expression_type) 1375 if not parser: 1376 raise TypeError(f"No parser registered for {expression_type}") 1377 1378 try: 1379 return self._parse(parser, raw_tokens, sql) 1380 except ParseError as e: 1381 e.errors[0]["into_expression"] = expression_type 1382 errors.append(e) 1383 1384 raise ParseError( 1385 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1386 errors=merge_errors(errors), 1387 ) from errors[-1] 1388 1389 def _parse( 1390 self, 1391 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1392 raw_tokens: t.List[Token], 1393 sql: t.Optional[str] = None, 1394 ) -> t.List[t.Optional[exp.Expression]]: 1395 self.reset() 1396 self.sql = sql or "" 1397 1398 total = len(raw_tokens) 1399 chunks: t.List[t.List[Token]] = [[]] 1400 1401 for i, token in enumerate(raw_tokens): 1402 if token.token_type == TokenType.SEMICOLON: 1403 if token.comments: 1404 chunks.append([token]) 1405 1406 if i < total - 1: 1407 chunks.append([]) 1408 else: 1409 chunks[-1].append(token) 1410 1411 expressions = [] 1412 1413 for tokens in chunks: 1414 self._index = -1 1415 self._tokens = tokens 1416 self._advance() 1417 1418 expressions.append(parse_method(self)) 1419 1420 if self._index < len(self._tokens): 1421 self.raise_error("Invalid expression / Unexpected token") 1422 1423 self.check_errors() 1424 1425 return expressions 1426 1427 def check_errors(self) -> None: 1428 """Logs or raises any found errors, depending on the chosen error level setting.""" 1429 if self.error_level == ErrorLevel.WARN: 1430 for error in self.errors: 1431 logger.error(str(error)) 1432 elif self.error_level == ErrorLevel.RAISE and self.errors: 1433 raise ParseError( 1434 concat_messages(self.errors, self.max_errors), 1435 errors=merge_errors(self.errors), 1436 ) 1437 1438 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1439 """ 1440 Appends an error in the list of recorded errors or raises it, depending on the chosen 1441 error level setting. 1442 """ 1443 token = token or self._curr or self._prev or Token.string("") 1444 start = token.start 1445 end = token.end + 1 1446 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1447 highlight = self.sql[start:end] 1448 end_context = self.sql[end : end + self.error_message_context] 1449 1450 error = ParseError.new( 1451 f"{message}. Line {token.line}, Col: {token.col}.\n" 1452 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1453 description=message, 1454 line=token.line, 1455 col=token.col, 1456 start_context=start_context, 1457 highlight=highlight, 1458 end_context=end_context, 1459 ) 1460 1461 if self.error_level == ErrorLevel.IMMEDIATE: 1462 raise error 1463 1464 self.errors.append(error) 1465 1466 def expression( 1467 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1468 ) -> E: 1469 """ 1470 Creates a new, validated Expression. 1471 1472 Args: 1473 exp_class: The expression class to instantiate. 1474 comments: An optional list of comments to attach to the expression. 1475 kwargs: The arguments to set for the expression along with their respective values. 1476 1477 Returns: 1478 The target expression. 1479 """ 1480 instance = exp_class(**kwargs) 1481 instance.add_comments(comments) if comments else self._add_comments(instance) 1482 return self.validate_expression(instance) 1483 1484 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1485 if expression and self._prev_comments: 1486 expression.add_comments(self._prev_comments) 1487 self._prev_comments = None 1488 1489 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1490 """ 1491 Validates an Expression, making sure that all its mandatory arguments are set. 1492 1493 Args: 1494 expression: The expression to validate. 1495 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1496 1497 Returns: 1498 The validated expression. 1499 """ 1500 if self.error_level != ErrorLevel.IGNORE: 1501 for error_message in expression.error_messages(args): 1502 self.raise_error(error_message) 1503 1504 return expression 1505 1506 def _find_sql(self, start: Token, end: Token) -> str: 1507 return self.sql[start.start : end.end + 1] 1508 1509 def _is_connected(self) -> bool: 1510 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1511 1512 def _advance(self, times: int = 1) -> None: 1513 self._index += times 1514 self._curr = seq_get(self._tokens, self._index) 1515 self._next = seq_get(self._tokens, self._index + 1) 1516 1517 if self._index > 0: 1518 self._prev = self._tokens[self._index - 1] 1519 self._prev_comments = self._prev.comments 1520 else: 1521 self._prev = None 1522 self._prev_comments = None 1523 1524 def _retreat(self, index: int) -> None: 1525 if index != self._index: 1526 self._advance(index - self._index) 1527 1528 def _warn_unsupported(self) -> None: 1529 if len(self._tokens) <= 1: 1530 return 1531 1532 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1533 # interested in emitting a warning for the one being currently processed. 1534 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1535 1536 logger.warning( 1537 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1538 ) 1539 1540 def _parse_command(self) -> exp.Command: 1541 self._warn_unsupported() 1542 return self.expression( 1543 exp.Command, 1544 comments=self._prev_comments, 1545 this=self._prev.text.upper(), 1546 expression=self._parse_string(), 1547 ) 1548 1549 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1550 """ 1551 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1552 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1553 solve this by setting & resetting the parser state accordingly 1554 """ 1555 index = self._index 1556 error_level = self.error_level 1557 1558 self.error_level = ErrorLevel.IMMEDIATE 1559 try: 1560 this = parse_method() 1561 except ParseError: 1562 this = None 1563 finally: 1564 if not this or retreat: 1565 self._retreat(index) 1566 self.error_level = error_level 1567 1568 return this 1569 1570 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1571 start = self._prev 1572 exists = self._parse_exists() if allow_exists else None 1573 1574 self._match(TokenType.ON) 1575 1576 materialized = self._match_text_seq("MATERIALIZED") 1577 kind = self._match_set(self.CREATABLES) and self._prev 1578 if not kind: 1579 return self._parse_as_command(start) 1580 1581 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1582 this = self._parse_user_defined_function(kind=kind.token_type) 1583 elif kind.token_type == TokenType.TABLE: 1584 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1585 elif kind.token_type == TokenType.COLUMN: 1586 this = self._parse_column() 1587 else: 1588 this = self._parse_id_var() 1589 1590 self._match(TokenType.IS) 1591 1592 return self.expression( 1593 exp.Comment, 1594 this=this, 1595 kind=kind.text, 1596 expression=self._parse_string(), 1597 exists=exists, 1598 materialized=materialized, 1599 ) 1600 1601 def _parse_to_table( 1602 self, 1603 ) -> exp.ToTableProperty: 1604 table = self._parse_table_parts(schema=True) 1605 return self.expression(exp.ToTableProperty, this=table) 1606 1607 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1608 def _parse_ttl(self) -> exp.Expression: 1609 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1610 this = self._parse_bitwise() 1611 1612 if self._match_text_seq("DELETE"): 1613 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1614 if self._match_text_seq("RECOMPRESS"): 1615 return self.expression( 1616 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1617 ) 1618 if self._match_text_seq("TO", "DISK"): 1619 return self.expression( 1620 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1621 ) 1622 if self._match_text_seq("TO", "VOLUME"): 1623 return self.expression( 1624 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1625 ) 1626 1627 return this 1628 1629 expressions = self._parse_csv(_parse_ttl_action) 1630 where = self._parse_where() 1631 group = self._parse_group() 1632 1633 aggregates = None 1634 if group and self._match(TokenType.SET): 1635 aggregates = self._parse_csv(self._parse_set_item) 1636 1637 return self.expression( 1638 exp.MergeTreeTTL, 1639 expressions=expressions, 1640 where=where, 1641 group=group, 1642 aggregates=aggregates, 1643 ) 1644 1645 def _parse_statement(self) -> t.Optional[exp.Expression]: 1646 if self._curr is None: 1647 return None 1648 1649 if self._match_set(self.STATEMENT_PARSERS): 1650 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1651 1652 if self._match_set(self.dialect.tokenizer.COMMANDS): 1653 return self._parse_command() 1654 1655 expression = self._parse_expression() 1656 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1657 return self._parse_query_modifiers(expression) 1658 1659 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1660 start = self._prev 1661 temporary = self._match(TokenType.TEMPORARY) 1662 materialized = self._match_text_seq("MATERIALIZED") 1663 1664 kind = self._match_set(self.CREATABLES) and self._prev.text 1665 if not kind: 1666 return self._parse_as_command(start) 1667 1668 if_exists = exists or self._parse_exists() 1669 table = self._parse_table_parts( 1670 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1671 ) 1672 1673 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1674 1675 if self._match(TokenType.L_PAREN, advance=False): 1676 expressions = self._parse_wrapped_csv(self._parse_types) 1677 else: 1678 expressions = None 1679 1680 return self.expression( 1681 exp.Drop, 1682 comments=start.comments, 1683 exists=if_exists, 1684 this=table, 1685 expressions=expressions, 1686 kind=kind.upper(), 1687 temporary=temporary, 1688 materialized=materialized, 1689 cascade=self._match_text_seq("CASCADE"), 1690 constraints=self._match_text_seq("CONSTRAINTS"), 1691 purge=self._match_text_seq("PURGE"), 1692 cluster=cluster, 1693 ) 1694 1695 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1696 return ( 1697 self._match_text_seq("IF") 1698 and (not not_ or self._match(TokenType.NOT)) 1699 and self._match(TokenType.EXISTS) 1700 ) 1701 1702 def _parse_create(self) -> exp.Create | exp.Command: 1703 # Note: this can't be None because we've matched a statement parser 1704 start = self._prev 1705 comments = self._prev_comments 1706 1707 replace = ( 1708 start.token_type == TokenType.REPLACE 1709 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1710 or self._match_pair(TokenType.OR, TokenType.ALTER) 1711 ) 1712 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1713 1714 unique = self._match(TokenType.UNIQUE) 1715 1716 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1717 clustered = True 1718 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1719 "COLUMNSTORE" 1720 ): 1721 clustered = False 1722 else: 1723 clustered = None 1724 1725 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1726 self._advance() 1727 1728 properties = None 1729 create_token = self._match_set(self.CREATABLES) and self._prev 1730 1731 if not create_token: 1732 # exp.Properties.Location.POST_CREATE 1733 properties = self._parse_properties() 1734 create_token = self._match_set(self.CREATABLES) and self._prev 1735 1736 if not properties or not create_token: 1737 return self._parse_as_command(start) 1738 1739 concurrently = self._match_text_seq("CONCURRENTLY") 1740 exists = self._parse_exists(not_=True) 1741 this = None 1742 expression: t.Optional[exp.Expression] = None 1743 indexes = None 1744 no_schema_binding = None 1745 begin = None 1746 end = None 1747 clone = None 1748 1749 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1750 nonlocal properties 1751 if properties and temp_props: 1752 properties.expressions.extend(temp_props.expressions) 1753 elif temp_props: 1754 properties = temp_props 1755 1756 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1757 this = self._parse_user_defined_function(kind=create_token.token_type) 1758 1759 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1760 extend_props(self._parse_properties()) 1761 1762 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1763 extend_props(self._parse_properties()) 1764 1765 if not expression: 1766 if self._match(TokenType.COMMAND): 1767 expression = self._parse_as_command(self._prev) 1768 else: 1769 begin = self._match(TokenType.BEGIN) 1770 return_ = self._match_text_seq("RETURN") 1771 1772 if self._match(TokenType.STRING, advance=False): 1773 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1774 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1775 expression = self._parse_string() 1776 extend_props(self._parse_properties()) 1777 else: 1778 expression = self._parse_statement() 1779 1780 end = self._match_text_seq("END") 1781 1782 if return_: 1783 expression = self.expression(exp.Return, this=expression) 1784 elif create_token.token_type == TokenType.INDEX: 1785 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1786 if not self._match(TokenType.ON): 1787 index = self._parse_id_var() 1788 anonymous = False 1789 else: 1790 index = None 1791 anonymous = True 1792 1793 this = self._parse_index(index=index, anonymous=anonymous) 1794 elif create_token.token_type in self.DB_CREATABLES: 1795 table_parts = self._parse_table_parts( 1796 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1797 ) 1798 1799 # exp.Properties.Location.POST_NAME 1800 self._match(TokenType.COMMA) 1801 extend_props(self._parse_properties(before=True)) 1802 1803 this = self._parse_schema(this=table_parts) 1804 1805 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1806 extend_props(self._parse_properties()) 1807 1808 self._match(TokenType.ALIAS) 1809 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1810 # exp.Properties.Location.POST_ALIAS 1811 extend_props(self._parse_properties()) 1812 1813 if create_token.token_type == TokenType.SEQUENCE: 1814 expression = self._parse_types() 1815 extend_props(self._parse_properties()) 1816 else: 1817 expression = self._parse_ddl_select() 1818 1819 if create_token.token_type == TokenType.TABLE: 1820 # exp.Properties.Location.POST_EXPRESSION 1821 extend_props(self._parse_properties()) 1822 1823 indexes = [] 1824 while True: 1825 index = self._parse_index() 1826 1827 # exp.Properties.Location.POST_INDEX 1828 extend_props(self._parse_properties()) 1829 if not index: 1830 break 1831 else: 1832 self._match(TokenType.COMMA) 1833 indexes.append(index) 1834 elif create_token.token_type == TokenType.VIEW: 1835 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1836 no_schema_binding = True 1837 1838 shallow = self._match_text_seq("SHALLOW") 1839 1840 if self._match_texts(self.CLONE_KEYWORDS): 1841 copy = self._prev.text.lower() == "copy" 1842 clone = self.expression( 1843 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1844 ) 1845 1846 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1847 return self._parse_as_command(start) 1848 1849 return self.expression( 1850 exp.Create, 1851 comments=comments, 1852 this=this, 1853 kind=create_token.text.upper(), 1854 replace=replace, 1855 refresh=refresh, 1856 unique=unique, 1857 expression=expression, 1858 exists=exists, 1859 properties=properties, 1860 indexes=indexes, 1861 no_schema_binding=no_schema_binding, 1862 begin=begin, 1863 end=end, 1864 clone=clone, 1865 concurrently=concurrently, 1866 clustered=clustered, 1867 ) 1868 1869 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1870 seq = exp.SequenceProperties() 1871 1872 options = [] 1873 index = self._index 1874 1875 while self._curr: 1876 self._match(TokenType.COMMA) 1877 if self._match_text_seq("INCREMENT"): 1878 self._match_text_seq("BY") 1879 self._match_text_seq("=") 1880 seq.set("increment", self._parse_term()) 1881 elif self._match_text_seq("MINVALUE"): 1882 seq.set("minvalue", self._parse_term()) 1883 elif self._match_text_seq("MAXVALUE"): 1884 seq.set("maxvalue", self._parse_term()) 1885 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1886 self._match_text_seq("=") 1887 seq.set("start", self._parse_term()) 1888 elif self._match_text_seq("CACHE"): 1889 # T-SQL allows empty CACHE which is initialized dynamically 1890 seq.set("cache", self._parse_number() or True) 1891 elif self._match_text_seq("OWNED", "BY"): 1892 # "OWNED BY NONE" is the default 1893 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1894 else: 1895 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1896 if opt: 1897 options.append(opt) 1898 else: 1899 break 1900 1901 seq.set("options", options if options else None) 1902 return None if self._index == index else seq 1903 1904 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1905 # only used for teradata currently 1906 self._match(TokenType.COMMA) 1907 1908 kwargs = { 1909 "no": self._match_text_seq("NO"), 1910 "dual": self._match_text_seq("DUAL"), 1911 "before": self._match_text_seq("BEFORE"), 1912 "default": self._match_text_seq("DEFAULT"), 1913 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1914 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1915 "after": self._match_text_seq("AFTER"), 1916 "minimum": self._match_texts(("MIN", "MINIMUM")), 1917 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1918 } 1919 1920 if self._match_texts(self.PROPERTY_PARSERS): 1921 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1922 try: 1923 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1924 except TypeError: 1925 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1926 1927 return None 1928 1929 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1930 return self._parse_wrapped_csv(self._parse_property) 1931 1932 def _parse_property(self) -> t.Optional[exp.Expression]: 1933 if self._match_texts(self.PROPERTY_PARSERS): 1934 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1935 1936 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1937 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1938 1939 if self._match_text_seq("COMPOUND", "SORTKEY"): 1940 return self._parse_sortkey(compound=True) 1941 1942 if self._match_text_seq("SQL", "SECURITY"): 1943 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1944 1945 index = self._index 1946 key = self._parse_column() 1947 1948 if not self._match(TokenType.EQ): 1949 self._retreat(index) 1950 return self._parse_sequence_properties() 1951 1952 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1953 if isinstance(key, exp.Column): 1954 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1955 1956 value = self._parse_bitwise() or self._parse_var(any_token=True) 1957 1958 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1959 if isinstance(value, exp.Column): 1960 value = exp.var(value.name) 1961 1962 return self.expression(exp.Property, this=key, value=value) 1963 1964 def _parse_stored(self) -> exp.FileFormatProperty: 1965 self._match(TokenType.ALIAS) 1966 1967 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1968 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1969 1970 return self.expression( 1971 exp.FileFormatProperty, 1972 this=( 1973 self.expression( 1974 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1975 ) 1976 if input_format or output_format 1977 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1978 ), 1979 ) 1980 1981 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1982 field = self._parse_field() 1983 if isinstance(field, exp.Identifier) and not field.quoted: 1984 field = exp.var(field) 1985 1986 return field 1987 1988 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1989 self._match(TokenType.EQ) 1990 self._match(TokenType.ALIAS) 1991 1992 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1993 1994 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1995 properties = [] 1996 while True: 1997 if before: 1998 prop = self._parse_property_before() 1999 else: 2000 prop = self._parse_property() 2001 if not prop: 2002 break 2003 for p in ensure_list(prop): 2004 properties.append(p) 2005 2006 if properties: 2007 return self.expression(exp.Properties, expressions=properties) 2008 2009 return None 2010 2011 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2012 return self.expression( 2013 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2014 ) 2015 2016 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2017 if self._index >= 2: 2018 pre_volatile_token = self._tokens[self._index - 2] 2019 else: 2020 pre_volatile_token = None 2021 2022 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2023 return exp.VolatileProperty() 2024 2025 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2026 2027 def _parse_retention_period(self) -> exp.Var: 2028 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2029 number = self._parse_number() 2030 number_str = f"{number} " if number else "" 2031 unit = self._parse_var(any_token=True) 2032 return exp.var(f"{number_str}{unit}") 2033 2034 def _parse_system_versioning_property( 2035 self, with_: bool = False 2036 ) -> exp.WithSystemVersioningProperty: 2037 self._match(TokenType.EQ) 2038 prop = self.expression( 2039 exp.WithSystemVersioningProperty, 2040 **{ # type: ignore 2041 "on": True, 2042 "with": with_, 2043 }, 2044 ) 2045 2046 if self._match_text_seq("OFF"): 2047 prop.set("on", False) 2048 return prop 2049 2050 self._match(TokenType.ON) 2051 if self._match(TokenType.L_PAREN): 2052 while self._curr and not self._match(TokenType.R_PAREN): 2053 if self._match_text_seq("HISTORY_TABLE", "="): 2054 prop.set("this", self._parse_table_parts()) 2055 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2056 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2057 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2058 prop.set("retention_period", self._parse_retention_period()) 2059 2060 self._match(TokenType.COMMA) 2061 2062 return prop 2063 2064 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2065 self._match(TokenType.EQ) 2066 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2067 prop = self.expression(exp.DataDeletionProperty, on=on) 2068 2069 if self._match(TokenType.L_PAREN): 2070 while self._curr and not self._match(TokenType.R_PAREN): 2071 if self._match_text_seq("FILTER_COLUMN", "="): 2072 prop.set("filter_column", self._parse_column()) 2073 elif self._match_text_seq("RETENTION_PERIOD", "="): 2074 prop.set("retention_period", self._parse_retention_period()) 2075 2076 self._match(TokenType.COMMA) 2077 2078 return prop 2079 2080 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2081 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2082 prop = self._parse_system_versioning_property(with_=True) 2083 self._match_r_paren() 2084 return prop 2085 2086 if self._match(TokenType.L_PAREN, advance=False): 2087 return self._parse_wrapped_properties() 2088 2089 if self._match_text_seq("JOURNAL"): 2090 return self._parse_withjournaltable() 2091 2092 if self._match_texts(self.VIEW_ATTRIBUTES): 2093 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2094 2095 if self._match_text_seq("DATA"): 2096 return self._parse_withdata(no=False) 2097 elif self._match_text_seq("NO", "DATA"): 2098 return self._parse_withdata(no=True) 2099 2100 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2101 return self._parse_serde_properties(with_=True) 2102 2103 if self._match(TokenType.SCHEMA): 2104 return self.expression( 2105 exp.WithSchemaBindingProperty, 2106 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2107 ) 2108 2109 if not self._next: 2110 return None 2111 2112 return self._parse_withisolatedloading() 2113 2114 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2115 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2116 self._match(TokenType.EQ) 2117 2118 user = self._parse_id_var() 2119 self._match(TokenType.PARAMETER) 2120 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2121 2122 if not user or not host: 2123 return None 2124 2125 return exp.DefinerProperty(this=f"{user}@{host}") 2126 2127 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2128 self._match(TokenType.TABLE) 2129 self._match(TokenType.EQ) 2130 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2131 2132 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2133 return self.expression(exp.LogProperty, no=no) 2134 2135 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2136 return self.expression(exp.JournalProperty, **kwargs) 2137 2138 def _parse_checksum(self) -> exp.ChecksumProperty: 2139 self._match(TokenType.EQ) 2140 2141 on = None 2142 if self._match(TokenType.ON): 2143 on = True 2144 elif self._match_text_seq("OFF"): 2145 on = False 2146 2147 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2148 2149 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2150 return self.expression( 2151 exp.Cluster, 2152 expressions=( 2153 self._parse_wrapped_csv(self._parse_ordered) 2154 if wrapped 2155 else self._parse_csv(self._parse_ordered) 2156 ), 2157 ) 2158 2159 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2160 self._match_text_seq("BY") 2161 2162 self._match_l_paren() 2163 expressions = self._parse_csv(self._parse_column) 2164 self._match_r_paren() 2165 2166 if self._match_text_seq("SORTED", "BY"): 2167 self._match_l_paren() 2168 sorted_by = self._parse_csv(self._parse_ordered) 2169 self._match_r_paren() 2170 else: 2171 sorted_by = None 2172 2173 self._match(TokenType.INTO) 2174 buckets = self._parse_number() 2175 self._match_text_seq("BUCKETS") 2176 2177 return self.expression( 2178 exp.ClusteredByProperty, 2179 expressions=expressions, 2180 sorted_by=sorted_by, 2181 buckets=buckets, 2182 ) 2183 2184 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2185 if not self._match_text_seq("GRANTS"): 2186 self._retreat(self._index - 1) 2187 return None 2188 2189 return self.expression(exp.CopyGrantsProperty) 2190 2191 def _parse_freespace(self) -> exp.FreespaceProperty: 2192 self._match(TokenType.EQ) 2193 return self.expression( 2194 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2195 ) 2196 2197 def _parse_mergeblockratio( 2198 self, no: bool = False, default: bool = False 2199 ) -> exp.MergeBlockRatioProperty: 2200 if self._match(TokenType.EQ): 2201 return self.expression( 2202 exp.MergeBlockRatioProperty, 2203 this=self._parse_number(), 2204 percent=self._match(TokenType.PERCENT), 2205 ) 2206 2207 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2208 2209 def _parse_datablocksize( 2210 self, 2211 default: t.Optional[bool] = None, 2212 minimum: t.Optional[bool] = None, 2213 maximum: t.Optional[bool] = None, 2214 ) -> exp.DataBlocksizeProperty: 2215 self._match(TokenType.EQ) 2216 size = self._parse_number() 2217 2218 units = None 2219 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2220 units = self._prev.text 2221 2222 return self.expression( 2223 exp.DataBlocksizeProperty, 2224 size=size, 2225 units=units, 2226 default=default, 2227 minimum=minimum, 2228 maximum=maximum, 2229 ) 2230 2231 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2232 self._match(TokenType.EQ) 2233 always = self._match_text_seq("ALWAYS") 2234 manual = self._match_text_seq("MANUAL") 2235 never = self._match_text_seq("NEVER") 2236 default = self._match_text_seq("DEFAULT") 2237 2238 autotemp = None 2239 if self._match_text_seq("AUTOTEMP"): 2240 autotemp = self._parse_schema() 2241 2242 return self.expression( 2243 exp.BlockCompressionProperty, 2244 always=always, 2245 manual=manual, 2246 never=never, 2247 default=default, 2248 autotemp=autotemp, 2249 ) 2250 2251 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2252 index = self._index 2253 no = self._match_text_seq("NO") 2254 concurrent = self._match_text_seq("CONCURRENT") 2255 2256 if not self._match_text_seq("ISOLATED", "LOADING"): 2257 self._retreat(index) 2258 return None 2259 2260 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2261 return self.expression( 2262 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2263 ) 2264 2265 def _parse_locking(self) -> exp.LockingProperty: 2266 if self._match(TokenType.TABLE): 2267 kind = "TABLE" 2268 elif self._match(TokenType.VIEW): 2269 kind = "VIEW" 2270 elif self._match(TokenType.ROW): 2271 kind = "ROW" 2272 elif self._match_text_seq("DATABASE"): 2273 kind = "DATABASE" 2274 else: 2275 kind = None 2276 2277 if kind in ("DATABASE", "TABLE", "VIEW"): 2278 this = self._parse_table_parts() 2279 else: 2280 this = None 2281 2282 if self._match(TokenType.FOR): 2283 for_or_in = "FOR" 2284 elif self._match(TokenType.IN): 2285 for_or_in = "IN" 2286 else: 2287 for_or_in = None 2288 2289 if self._match_text_seq("ACCESS"): 2290 lock_type = "ACCESS" 2291 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2292 lock_type = "EXCLUSIVE" 2293 elif self._match_text_seq("SHARE"): 2294 lock_type = "SHARE" 2295 elif self._match_text_seq("READ"): 2296 lock_type = "READ" 2297 elif self._match_text_seq("WRITE"): 2298 lock_type = "WRITE" 2299 elif self._match_text_seq("CHECKSUM"): 2300 lock_type = "CHECKSUM" 2301 else: 2302 lock_type = None 2303 2304 override = self._match_text_seq("OVERRIDE") 2305 2306 return self.expression( 2307 exp.LockingProperty, 2308 this=this, 2309 kind=kind, 2310 for_or_in=for_or_in, 2311 lock_type=lock_type, 2312 override=override, 2313 ) 2314 2315 def _parse_partition_by(self) -> t.List[exp.Expression]: 2316 if self._match(TokenType.PARTITION_BY): 2317 return self._parse_csv(self._parse_assignment) 2318 return [] 2319 2320 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2321 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2322 if self._match_text_seq("MINVALUE"): 2323 return exp.var("MINVALUE") 2324 if self._match_text_seq("MAXVALUE"): 2325 return exp.var("MAXVALUE") 2326 return self._parse_bitwise() 2327 2328 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2329 expression = None 2330 from_expressions = None 2331 to_expressions = None 2332 2333 if self._match(TokenType.IN): 2334 this = self._parse_wrapped_csv(self._parse_bitwise) 2335 elif self._match(TokenType.FROM): 2336 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2337 self._match_text_seq("TO") 2338 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2339 elif self._match_text_seq("WITH", "(", "MODULUS"): 2340 this = self._parse_number() 2341 self._match_text_seq(",", "REMAINDER") 2342 expression = self._parse_number() 2343 self._match_r_paren() 2344 else: 2345 self.raise_error("Failed to parse partition bound spec.") 2346 2347 return self.expression( 2348 exp.PartitionBoundSpec, 2349 this=this, 2350 expression=expression, 2351 from_expressions=from_expressions, 2352 to_expressions=to_expressions, 2353 ) 2354 2355 # https://www.postgresql.org/docs/current/sql-createtable.html 2356 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2357 if not self._match_text_seq("OF"): 2358 self._retreat(self._index - 1) 2359 return None 2360 2361 this = self._parse_table(schema=True) 2362 2363 if self._match(TokenType.DEFAULT): 2364 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2365 elif self._match_text_seq("FOR", "VALUES"): 2366 expression = self._parse_partition_bound_spec() 2367 else: 2368 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2369 2370 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2371 2372 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2373 self._match(TokenType.EQ) 2374 return self.expression( 2375 exp.PartitionedByProperty, 2376 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2377 ) 2378 2379 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2380 if self._match_text_seq("AND", "STATISTICS"): 2381 statistics = True 2382 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2383 statistics = False 2384 else: 2385 statistics = None 2386 2387 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2388 2389 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2390 if self._match_text_seq("SQL"): 2391 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2392 return None 2393 2394 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2395 if self._match_text_seq("SQL", "DATA"): 2396 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2397 return None 2398 2399 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2400 if self._match_text_seq("PRIMARY", "INDEX"): 2401 return exp.NoPrimaryIndexProperty() 2402 if self._match_text_seq("SQL"): 2403 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2404 return None 2405 2406 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2407 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2408 return exp.OnCommitProperty() 2409 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2410 return exp.OnCommitProperty(delete=True) 2411 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2412 2413 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2414 if self._match_text_seq("SQL", "DATA"): 2415 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2416 return None 2417 2418 def _parse_distkey(self) -> exp.DistKeyProperty: 2419 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2420 2421 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2422 table = self._parse_table(schema=True) 2423 2424 options = [] 2425 while self._match_texts(("INCLUDING", "EXCLUDING")): 2426 this = self._prev.text.upper() 2427 2428 id_var = self._parse_id_var() 2429 if not id_var: 2430 return None 2431 2432 options.append( 2433 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2434 ) 2435 2436 return self.expression(exp.LikeProperty, this=table, expressions=options) 2437 2438 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2439 return self.expression( 2440 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2441 ) 2442 2443 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2444 self._match(TokenType.EQ) 2445 return self.expression( 2446 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2447 ) 2448 2449 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2450 self._match_text_seq("WITH", "CONNECTION") 2451 return self.expression( 2452 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2453 ) 2454 2455 def _parse_returns(self) -> exp.ReturnsProperty: 2456 value: t.Optional[exp.Expression] 2457 null = None 2458 is_table = self._match(TokenType.TABLE) 2459 2460 if is_table: 2461 if self._match(TokenType.LT): 2462 value = self.expression( 2463 exp.Schema, 2464 this="TABLE", 2465 expressions=self._parse_csv(self._parse_struct_types), 2466 ) 2467 if not self._match(TokenType.GT): 2468 self.raise_error("Expecting >") 2469 else: 2470 value = self._parse_schema(exp.var("TABLE")) 2471 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2472 null = True 2473 value = None 2474 else: 2475 value = self._parse_types() 2476 2477 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2478 2479 def _parse_describe(self) -> exp.Describe: 2480 kind = self._match_set(self.CREATABLES) and self._prev.text 2481 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2482 if self._match(TokenType.DOT): 2483 style = None 2484 self._retreat(self._index - 2) 2485 this = self._parse_table(schema=True) 2486 properties = self._parse_properties() 2487 expressions = properties.expressions if properties else None 2488 return self.expression( 2489 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2490 ) 2491 2492 def _parse_insert(self) -> exp.Insert: 2493 comments = ensure_list(self._prev_comments) 2494 hint = self._parse_hint() 2495 overwrite = self._match(TokenType.OVERWRITE) 2496 ignore = self._match(TokenType.IGNORE) 2497 local = self._match_text_seq("LOCAL") 2498 alternative = None 2499 is_function = None 2500 2501 if self._match_text_seq("DIRECTORY"): 2502 this: t.Optional[exp.Expression] = self.expression( 2503 exp.Directory, 2504 this=self._parse_var_or_string(), 2505 local=local, 2506 row_format=self._parse_row_format(match_row=True), 2507 ) 2508 else: 2509 if self._match(TokenType.OR): 2510 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2511 2512 self._match(TokenType.INTO) 2513 comments += ensure_list(self._prev_comments) 2514 self._match(TokenType.TABLE) 2515 is_function = self._match(TokenType.FUNCTION) 2516 2517 this = ( 2518 self._parse_table(schema=True, parse_partition=True) 2519 if not is_function 2520 else self._parse_function() 2521 ) 2522 2523 returning = self._parse_returning() 2524 2525 return self.expression( 2526 exp.Insert, 2527 comments=comments, 2528 hint=hint, 2529 is_function=is_function, 2530 this=this, 2531 stored=self._match_text_seq("STORED") and self._parse_stored(), 2532 by_name=self._match_text_seq("BY", "NAME"), 2533 exists=self._parse_exists(), 2534 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2535 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2536 conflict=self._parse_on_conflict(), 2537 returning=returning or self._parse_returning(), 2538 overwrite=overwrite, 2539 alternative=alternative, 2540 ignore=ignore, 2541 ) 2542 2543 def _parse_kill(self) -> exp.Kill: 2544 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2545 2546 return self.expression( 2547 exp.Kill, 2548 this=self._parse_primary(), 2549 kind=kind, 2550 ) 2551 2552 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2553 conflict = self._match_text_seq("ON", "CONFLICT") 2554 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2555 2556 if not conflict and not duplicate: 2557 return None 2558 2559 conflict_keys = None 2560 constraint = None 2561 2562 if conflict: 2563 if self._match_text_seq("ON", "CONSTRAINT"): 2564 constraint = self._parse_id_var() 2565 elif self._match(TokenType.L_PAREN): 2566 conflict_keys = self._parse_csv(self._parse_id_var) 2567 self._match_r_paren() 2568 2569 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2570 if self._prev.token_type == TokenType.UPDATE: 2571 self._match(TokenType.SET) 2572 expressions = self._parse_csv(self._parse_equality) 2573 else: 2574 expressions = None 2575 2576 return self.expression( 2577 exp.OnConflict, 2578 duplicate=duplicate, 2579 expressions=expressions, 2580 action=action, 2581 conflict_keys=conflict_keys, 2582 constraint=constraint, 2583 ) 2584 2585 def _parse_returning(self) -> t.Optional[exp.Returning]: 2586 if not self._match(TokenType.RETURNING): 2587 return None 2588 return self.expression( 2589 exp.Returning, 2590 expressions=self._parse_csv(self._parse_expression), 2591 into=self._match(TokenType.INTO) and self._parse_table_part(), 2592 ) 2593 2594 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2595 if not self._match(TokenType.FORMAT): 2596 return None 2597 return self._parse_row_format() 2598 2599 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2600 index = self._index 2601 with_ = with_ or self._match_text_seq("WITH") 2602 2603 if not self._match(TokenType.SERDE_PROPERTIES): 2604 self._retreat(index) 2605 return None 2606 return self.expression( 2607 exp.SerdeProperties, 2608 **{ # type: ignore 2609 "expressions": self._parse_wrapped_properties(), 2610 "with": with_, 2611 }, 2612 ) 2613 2614 def _parse_row_format( 2615 self, match_row: bool = False 2616 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2617 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2618 return None 2619 2620 if self._match_text_seq("SERDE"): 2621 this = self._parse_string() 2622 2623 serde_properties = self._parse_serde_properties() 2624 2625 return self.expression( 2626 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2627 ) 2628 2629 self._match_text_seq("DELIMITED") 2630 2631 kwargs = {} 2632 2633 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2634 kwargs["fields"] = self._parse_string() 2635 if self._match_text_seq("ESCAPED", "BY"): 2636 kwargs["escaped"] = self._parse_string() 2637 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2638 kwargs["collection_items"] = self._parse_string() 2639 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2640 kwargs["map_keys"] = self._parse_string() 2641 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2642 kwargs["lines"] = self._parse_string() 2643 if self._match_text_seq("NULL", "DEFINED", "AS"): 2644 kwargs["null"] = self._parse_string() 2645 2646 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2647 2648 def _parse_load(self) -> exp.LoadData | exp.Command: 2649 if self._match_text_seq("DATA"): 2650 local = self._match_text_seq("LOCAL") 2651 self._match_text_seq("INPATH") 2652 inpath = self._parse_string() 2653 overwrite = self._match(TokenType.OVERWRITE) 2654 self._match_pair(TokenType.INTO, TokenType.TABLE) 2655 2656 return self.expression( 2657 exp.LoadData, 2658 this=self._parse_table(schema=True), 2659 local=local, 2660 overwrite=overwrite, 2661 inpath=inpath, 2662 partition=self._parse_partition(), 2663 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2664 serde=self._match_text_seq("SERDE") and self._parse_string(), 2665 ) 2666 return self._parse_as_command(self._prev) 2667 2668 def _parse_delete(self) -> exp.Delete: 2669 # This handles MySQL's "Multiple-Table Syntax" 2670 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2671 tables = None 2672 comments = self._prev_comments 2673 if not self._match(TokenType.FROM, advance=False): 2674 tables = self._parse_csv(self._parse_table) or None 2675 2676 returning = self._parse_returning() 2677 2678 return self.expression( 2679 exp.Delete, 2680 comments=comments, 2681 tables=tables, 2682 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2683 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2684 where=self._parse_where(), 2685 returning=returning or self._parse_returning(), 2686 limit=self._parse_limit(), 2687 ) 2688 2689 def _parse_update(self) -> exp.Update: 2690 comments = self._prev_comments 2691 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2692 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2693 returning = self._parse_returning() 2694 return self.expression( 2695 exp.Update, 2696 comments=comments, 2697 **{ # type: ignore 2698 "this": this, 2699 "expressions": expressions, 2700 "from": self._parse_from(joins=True), 2701 "where": self._parse_where(), 2702 "returning": returning or self._parse_returning(), 2703 "order": self._parse_order(), 2704 "limit": self._parse_limit(), 2705 }, 2706 ) 2707 2708 def _parse_uncache(self) -> exp.Uncache: 2709 if not self._match(TokenType.TABLE): 2710 self.raise_error("Expecting TABLE after UNCACHE") 2711 2712 return self.expression( 2713 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2714 ) 2715 2716 def _parse_cache(self) -> exp.Cache: 2717 lazy = self._match_text_seq("LAZY") 2718 self._match(TokenType.TABLE) 2719 table = self._parse_table(schema=True) 2720 2721 options = [] 2722 if self._match_text_seq("OPTIONS"): 2723 self._match_l_paren() 2724 k = self._parse_string() 2725 self._match(TokenType.EQ) 2726 v = self._parse_string() 2727 options = [k, v] 2728 self._match_r_paren() 2729 2730 self._match(TokenType.ALIAS) 2731 return self.expression( 2732 exp.Cache, 2733 this=table, 2734 lazy=lazy, 2735 options=options, 2736 expression=self._parse_select(nested=True), 2737 ) 2738 2739 def _parse_partition(self) -> t.Optional[exp.Partition]: 2740 if not self._match(TokenType.PARTITION): 2741 return None 2742 2743 return self.expression( 2744 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2745 ) 2746 2747 def _parse_value(self) -> t.Optional[exp.Tuple]: 2748 if self._match(TokenType.L_PAREN): 2749 expressions = self._parse_csv(self._parse_expression) 2750 self._match_r_paren() 2751 return self.expression(exp.Tuple, expressions=expressions) 2752 2753 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2754 expression = self._parse_expression() 2755 if expression: 2756 return self.expression(exp.Tuple, expressions=[expression]) 2757 return None 2758 2759 def _parse_projections(self) -> t.List[exp.Expression]: 2760 return self._parse_expressions() 2761 2762 def _parse_select( 2763 self, 2764 nested: bool = False, 2765 table: bool = False, 2766 parse_subquery_alias: bool = True, 2767 parse_set_operation: bool = True, 2768 ) -> t.Optional[exp.Expression]: 2769 cte = self._parse_with() 2770 2771 if cte: 2772 this = self._parse_statement() 2773 2774 if not this: 2775 self.raise_error("Failed to parse any statement following CTE") 2776 return cte 2777 2778 if "with" in this.arg_types: 2779 this.set("with", cte) 2780 else: 2781 self.raise_error(f"{this.key} does not support CTE") 2782 this = cte 2783 2784 return this 2785 2786 # duckdb supports leading with FROM x 2787 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2788 2789 if self._match(TokenType.SELECT): 2790 comments = self._prev_comments 2791 2792 hint = self._parse_hint() 2793 2794 if self._next and not self._next.token_type == TokenType.DOT: 2795 all_ = self._match(TokenType.ALL) 2796 distinct = self._match_set(self.DISTINCT_TOKENS) 2797 else: 2798 all_, distinct = None, None 2799 2800 kind = ( 2801 self._match(TokenType.ALIAS) 2802 and self._match_texts(("STRUCT", "VALUE")) 2803 and self._prev.text.upper() 2804 ) 2805 2806 if distinct: 2807 distinct = self.expression( 2808 exp.Distinct, 2809 on=self._parse_value() if self._match(TokenType.ON) else None, 2810 ) 2811 2812 if all_ and distinct: 2813 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2814 2815 limit = self._parse_limit(top=True) 2816 projections = self._parse_projections() 2817 2818 this = self.expression( 2819 exp.Select, 2820 kind=kind, 2821 hint=hint, 2822 distinct=distinct, 2823 expressions=projections, 2824 limit=limit, 2825 ) 2826 this.comments = comments 2827 2828 into = self._parse_into() 2829 if into: 2830 this.set("into", into) 2831 2832 if not from_: 2833 from_ = self._parse_from() 2834 2835 if from_: 2836 this.set("from", from_) 2837 2838 this = self._parse_query_modifiers(this) 2839 elif (table or nested) and self._match(TokenType.L_PAREN): 2840 if self._match(TokenType.PIVOT): 2841 this = self._parse_simplified_pivot() 2842 elif self._match(TokenType.FROM): 2843 this = exp.select("*").from_( 2844 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2845 ) 2846 else: 2847 this = ( 2848 self._parse_table() 2849 if table 2850 else self._parse_select(nested=True, parse_set_operation=False) 2851 ) 2852 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2853 2854 self._match_r_paren() 2855 2856 # We return early here so that the UNION isn't attached to the subquery by the 2857 # following call to _parse_set_operations, but instead becomes the parent node 2858 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2859 elif self._match(TokenType.VALUES, advance=False): 2860 this = self._parse_derived_table_values() 2861 elif from_: 2862 this = exp.select("*").from_(from_.this, copy=False) 2863 elif self._match(TokenType.SUMMARIZE): 2864 table = self._match(TokenType.TABLE) 2865 this = self._parse_select() or self._parse_string() or self._parse_table() 2866 return self.expression(exp.Summarize, this=this, table=table) 2867 elif self._match(TokenType.DESCRIBE): 2868 this = self._parse_describe() 2869 elif self._match_text_seq("STREAM"): 2870 this = self.expression(exp.Stream, this=self._parse_function()) 2871 else: 2872 this = None 2873 2874 return self._parse_set_operations(this) if parse_set_operation else this 2875 2876 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2877 if not skip_with_token and not self._match(TokenType.WITH): 2878 return None 2879 2880 comments = self._prev_comments 2881 recursive = self._match(TokenType.RECURSIVE) 2882 2883 expressions = [] 2884 while True: 2885 expressions.append(self._parse_cte()) 2886 2887 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2888 break 2889 else: 2890 self._match(TokenType.WITH) 2891 2892 return self.expression( 2893 exp.With, comments=comments, expressions=expressions, recursive=recursive 2894 ) 2895 2896 def _parse_cte(self) -> exp.CTE: 2897 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2898 if not alias or not alias.this: 2899 self.raise_error("Expected CTE to have alias") 2900 2901 self._match(TokenType.ALIAS) 2902 comments = self._prev_comments 2903 2904 if self._match_text_seq("NOT", "MATERIALIZED"): 2905 materialized = False 2906 elif self._match_text_seq("MATERIALIZED"): 2907 materialized = True 2908 else: 2909 materialized = None 2910 2911 return self.expression( 2912 exp.CTE, 2913 this=self._parse_wrapped(self._parse_statement), 2914 alias=alias, 2915 materialized=materialized, 2916 comments=comments, 2917 ) 2918 2919 def _parse_table_alias( 2920 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2921 ) -> t.Optional[exp.TableAlias]: 2922 any_token = self._match(TokenType.ALIAS) 2923 alias = ( 2924 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2925 or self._parse_string_as_identifier() 2926 ) 2927 2928 index = self._index 2929 if self._match(TokenType.L_PAREN): 2930 columns = self._parse_csv(self._parse_function_parameter) 2931 self._match_r_paren() if columns else self._retreat(index) 2932 else: 2933 columns = None 2934 2935 if not alias and not columns: 2936 return None 2937 2938 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2939 2940 # We bubble up comments from the Identifier to the TableAlias 2941 if isinstance(alias, exp.Identifier): 2942 table_alias.add_comments(alias.pop_comments()) 2943 2944 return table_alias 2945 2946 def _parse_subquery( 2947 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2948 ) -> t.Optional[exp.Subquery]: 2949 if not this: 2950 return None 2951 2952 return self.expression( 2953 exp.Subquery, 2954 this=this, 2955 pivots=self._parse_pivots(), 2956 alias=self._parse_table_alias() if parse_alias else None, 2957 ) 2958 2959 def _implicit_unnests_to_explicit(self, this: E) -> E: 2960 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2961 2962 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2963 for i, join in enumerate(this.args.get("joins") or []): 2964 table = join.this 2965 normalized_table = table.copy() 2966 normalized_table.meta["maybe_column"] = True 2967 normalized_table = _norm(normalized_table, dialect=self.dialect) 2968 2969 if isinstance(table, exp.Table) and not join.args.get("on"): 2970 if normalized_table.parts[0].name in refs: 2971 table_as_column = table.to_column() 2972 unnest = exp.Unnest(expressions=[table_as_column]) 2973 2974 # Table.to_column creates a parent Alias node that we want to convert to 2975 # a TableAlias and attach to the Unnest, so it matches the parser's output 2976 if isinstance(table.args.get("alias"), exp.TableAlias): 2977 table_as_column.replace(table_as_column.this) 2978 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2979 2980 table.replace(unnest) 2981 2982 refs.add(normalized_table.alias_or_name) 2983 2984 return this 2985 2986 def _parse_query_modifiers( 2987 self, this: t.Optional[exp.Expression] 2988 ) -> t.Optional[exp.Expression]: 2989 if isinstance(this, (exp.Query, exp.Table)): 2990 for join in self._parse_joins(): 2991 this.append("joins", join) 2992 for lateral in iter(self._parse_lateral, None): 2993 this.append("laterals", lateral) 2994 2995 while True: 2996 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2997 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2998 key, expression = parser(self) 2999 3000 if expression: 3001 this.set(key, expression) 3002 if key == "limit": 3003 offset = expression.args.pop("offset", None) 3004 3005 if offset: 3006 offset = exp.Offset(expression=offset) 3007 this.set("offset", offset) 3008 3009 limit_by_expressions = expression.expressions 3010 expression.set("expressions", None) 3011 offset.set("expressions", limit_by_expressions) 3012 continue 3013 break 3014 3015 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3016 this = self._implicit_unnests_to_explicit(this) 3017 3018 return this 3019 3020 def _parse_hint(self) -> t.Optional[exp.Hint]: 3021 if self._match(TokenType.HINT): 3022 hints = [] 3023 for hint in iter( 3024 lambda: self._parse_csv( 3025 lambda: self._parse_function() or self._parse_var(upper=True) 3026 ), 3027 [], 3028 ): 3029 hints.extend(hint) 3030 3031 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3032 self.raise_error("Expected */ after HINT") 3033 3034 return self.expression(exp.Hint, expressions=hints) 3035 3036 return None 3037 3038 def _parse_into(self) -> t.Optional[exp.Into]: 3039 if not self._match(TokenType.INTO): 3040 return None 3041 3042 temp = self._match(TokenType.TEMPORARY) 3043 unlogged = self._match_text_seq("UNLOGGED") 3044 self._match(TokenType.TABLE) 3045 3046 return self.expression( 3047 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3048 ) 3049 3050 def _parse_from( 3051 self, joins: bool = False, skip_from_token: bool = False 3052 ) -> t.Optional[exp.From]: 3053 if not skip_from_token and not self._match(TokenType.FROM): 3054 return None 3055 3056 return self.expression( 3057 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3058 ) 3059 3060 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3061 return self.expression( 3062 exp.MatchRecognizeMeasure, 3063 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3064 this=self._parse_expression(), 3065 ) 3066 3067 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3068 if not self._match(TokenType.MATCH_RECOGNIZE): 3069 return None 3070 3071 self._match_l_paren() 3072 3073 partition = self._parse_partition_by() 3074 order = self._parse_order() 3075 3076 measures = ( 3077 self._parse_csv(self._parse_match_recognize_measure) 3078 if self._match_text_seq("MEASURES") 3079 else None 3080 ) 3081 3082 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3083 rows = exp.var("ONE ROW PER MATCH") 3084 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3085 text = "ALL ROWS PER MATCH" 3086 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3087 text += " SHOW EMPTY MATCHES" 3088 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3089 text += " OMIT EMPTY MATCHES" 3090 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3091 text += " WITH UNMATCHED ROWS" 3092 rows = exp.var(text) 3093 else: 3094 rows = None 3095 3096 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3097 text = "AFTER MATCH SKIP" 3098 if self._match_text_seq("PAST", "LAST", "ROW"): 3099 text += " PAST LAST ROW" 3100 elif self._match_text_seq("TO", "NEXT", "ROW"): 3101 text += " TO NEXT ROW" 3102 elif self._match_text_seq("TO", "FIRST"): 3103 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3104 elif self._match_text_seq("TO", "LAST"): 3105 text += f" TO LAST {self._advance_any().text}" # type: ignore 3106 after = exp.var(text) 3107 else: 3108 after = None 3109 3110 if self._match_text_seq("PATTERN"): 3111 self._match_l_paren() 3112 3113 if not self._curr: 3114 self.raise_error("Expecting )", self._curr) 3115 3116 paren = 1 3117 start = self._curr 3118 3119 while self._curr and paren > 0: 3120 if self._curr.token_type == TokenType.L_PAREN: 3121 paren += 1 3122 if self._curr.token_type == TokenType.R_PAREN: 3123 paren -= 1 3124 3125 end = self._prev 3126 self._advance() 3127 3128 if paren > 0: 3129 self.raise_error("Expecting )", self._curr) 3130 3131 pattern = exp.var(self._find_sql(start, end)) 3132 else: 3133 pattern = None 3134 3135 define = ( 3136 self._parse_csv(self._parse_name_as_expression) 3137 if self._match_text_seq("DEFINE") 3138 else None 3139 ) 3140 3141 self._match_r_paren() 3142 3143 return self.expression( 3144 exp.MatchRecognize, 3145 partition_by=partition, 3146 order=order, 3147 measures=measures, 3148 rows=rows, 3149 after=after, 3150 pattern=pattern, 3151 define=define, 3152 alias=self._parse_table_alias(), 3153 ) 3154 3155 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3156 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3157 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3158 cross_apply = False 3159 3160 if cross_apply is not None: 3161 this = self._parse_select(table=True) 3162 view = None 3163 outer = None 3164 elif self._match(TokenType.LATERAL): 3165 this = self._parse_select(table=True) 3166 view = self._match(TokenType.VIEW) 3167 outer = self._match(TokenType.OUTER) 3168 else: 3169 return None 3170 3171 if not this: 3172 this = ( 3173 self._parse_unnest() 3174 or self._parse_function() 3175 or self._parse_id_var(any_token=False) 3176 ) 3177 3178 while self._match(TokenType.DOT): 3179 this = exp.Dot( 3180 this=this, 3181 expression=self._parse_function() or self._parse_id_var(any_token=False), 3182 ) 3183 3184 if view: 3185 table = self._parse_id_var(any_token=False) 3186 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3187 table_alias: t.Optional[exp.TableAlias] = self.expression( 3188 exp.TableAlias, this=table, columns=columns 3189 ) 3190 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3191 # We move the alias from the lateral's child node to the lateral itself 3192 table_alias = this.args["alias"].pop() 3193 else: 3194 table_alias = self._parse_table_alias() 3195 3196 return self.expression( 3197 exp.Lateral, 3198 this=this, 3199 view=view, 3200 outer=outer, 3201 alias=table_alias, 3202 cross_apply=cross_apply, 3203 ) 3204 3205 def _parse_join_parts( 3206 self, 3207 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3208 return ( 3209 self._match_set(self.JOIN_METHODS) and self._prev, 3210 self._match_set(self.JOIN_SIDES) and self._prev, 3211 self._match_set(self.JOIN_KINDS) and self._prev, 3212 ) 3213 3214 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3215 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3216 this = self._parse_column() 3217 if isinstance(this, exp.Column): 3218 return this.this 3219 return this 3220 3221 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3222 3223 def _parse_join( 3224 self, skip_join_token: bool = False, parse_bracket: bool = False 3225 ) -> t.Optional[exp.Join]: 3226 if self._match(TokenType.COMMA): 3227 return self.expression(exp.Join, this=self._parse_table()) 3228 3229 index = self._index 3230 method, side, kind = self._parse_join_parts() 3231 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3232 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3233 3234 if not skip_join_token and not join: 3235 self._retreat(index) 3236 kind = None 3237 method = None 3238 side = None 3239 3240 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3241 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3242 3243 if not skip_join_token and not join and not outer_apply and not cross_apply: 3244 return None 3245 3246 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3247 3248 if method: 3249 kwargs["method"] = method.text 3250 if side: 3251 kwargs["side"] = side.text 3252 if kind: 3253 kwargs["kind"] = kind.text 3254 if hint: 3255 kwargs["hint"] = hint 3256 3257 if self._match(TokenType.MATCH_CONDITION): 3258 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3259 3260 if self._match(TokenType.ON): 3261 kwargs["on"] = self._parse_assignment() 3262 elif self._match(TokenType.USING): 3263 kwargs["using"] = self._parse_using_identifiers() 3264 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3265 kind and kind.token_type == TokenType.CROSS 3266 ): 3267 index = self._index 3268 joins: t.Optional[list] = list(self._parse_joins()) 3269 3270 if joins and self._match(TokenType.ON): 3271 kwargs["on"] = self._parse_assignment() 3272 elif joins and self._match(TokenType.USING): 3273 kwargs["using"] = self._parse_using_identifiers() 3274 else: 3275 joins = None 3276 self._retreat(index) 3277 3278 kwargs["this"].set("joins", joins if joins else None) 3279 3280 comments = [c for token in (method, side, kind) if token for c in token.comments] 3281 return self.expression(exp.Join, comments=comments, **kwargs) 3282 3283 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3284 this = self._parse_assignment() 3285 3286 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3287 return this 3288 3289 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3290 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3291 3292 return this 3293 3294 def _parse_index_params(self) -> exp.IndexParameters: 3295 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3296 3297 if self._match(TokenType.L_PAREN, advance=False): 3298 columns = self._parse_wrapped_csv(self._parse_with_operator) 3299 else: 3300 columns = None 3301 3302 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3303 partition_by = self._parse_partition_by() 3304 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3305 tablespace = ( 3306 self._parse_var(any_token=True) 3307 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3308 else None 3309 ) 3310 where = self._parse_where() 3311 3312 on = self._parse_field() if self._match(TokenType.ON) else None 3313 3314 return self.expression( 3315 exp.IndexParameters, 3316 using=using, 3317 columns=columns, 3318 include=include, 3319 partition_by=partition_by, 3320 where=where, 3321 with_storage=with_storage, 3322 tablespace=tablespace, 3323 on=on, 3324 ) 3325 3326 def _parse_index( 3327 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3328 ) -> t.Optional[exp.Index]: 3329 if index or anonymous: 3330 unique = None 3331 primary = None 3332 amp = None 3333 3334 self._match(TokenType.ON) 3335 self._match(TokenType.TABLE) # hive 3336 table = self._parse_table_parts(schema=True) 3337 else: 3338 unique = self._match(TokenType.UNIQUE) 3339 primary = self._match_text_seq("PRIMARY") 3340 amp = self._match_text_seq("AMP") 3341 3342 if not self._match(TokenType.INDEX): 3343 return None 3344 3345 index = self._parse_id_var() 3346 table = None 3347 3348 params = self._parse_index_params() 3349 3350 return self.expression( 3351 exp.Index, 3352 this=index, 3353 table=table, 3354 unique=unique, 3355 primary=primary, 3356 amp=amp, 3357 params=params, 3358 ) 3359 3360 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3361 hints: t.List[exp.Expression] = [] 3362 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3363 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3364 hints.append( 3365 self.expression( 3366 exp.WithTableHint, 3367 expressions=self._parse_csv( 3368 lambda: self._parse_function() or self._parse_var(any_token=True) 3369 ), 3370 ) 3371 ) 3372 self._match_r_paren() 3373 else: 3374 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3375 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3376 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3377 3378 self._match_set((TokenType.INDEX, TokenType.KEY)) 3379 if self._match(TokenType.FOR): 3380 hint.set("target", self._advance_any() and self._prev.text.upper()) 3381 3382 hint.set("expressions", self._parse_wrapped_id_vars()) 3383 hints.append(hint) 3384 3385 return hints or None 3386 3387 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3388 return ( 3389 (not schema and self._parse_function(optional_parens=False)) 3390 or self._parse_id_var(any_token=False) 3391 or self._parse_string_as_identifier() 3392 or self._parse_placeholder() 3393 ) 3394 3395 def _parse_table_parts( 3396 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3397 ) -> exp.Table: 3398 catalog = None 3399 db = None 3400 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3401 3402 while self._match(TokenType.DOT): 3403 if catalog: 3404 # This allows nesting the table in arbitrarily many dot expressions if needed 3405 table = self.expression( 3406 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3407 ) 3408 else: 3409 catalog = db 3410 db = table 3411 # "" used for tsql FROM a..b case 3412 table = self._parse_table_part(schema=schema) or "" 3413 3414 if ( 3415 wildcard 3416 and self._is_connected() 3417 and (isinstance(table, exp.Identifier) or not table) 3418 and self._match(TokenType.STAR) 3419 ): 3420 if isinstance(table, exp.Identifier): 3421 table.args["this"] += "*" 3422 else: 3423 table = exp.Identifier(this="*") 3424 3425 # We bubble up comments from the Identifier to the Table 3426 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3427 3428 if is_db_reference: 3429 catalog = db 3430 db = table 3431 table = None 3432 3433 if not table and not is_db_reference: 3434 self.raise_error(f"Expected table name but got {self._curr}") 3435 if not db and is_db_reference: 3436 self.raise_error(f"Expected database name but got {self._curr}") 3437 3438 table = self.expression( 3439 exp.Table, 3440 comments=comments, 3441 this=table, 3442 db=db, 3443 catalog=catalog, 3444 ) 3445 3446 changes = self._parse_changes() 3447 if changes: 3448 table.set("changes", changes) 3449 3450 at_before = self._parse_historical_data() 3451 if at_before: 3452 table.set("when", at_before) 3453 3454 pivots = self._parse_pivots() 3455 if pivots: 3456 table.set("pivots", pivots) 3457 3458 return table 3459 3460 def _parse_table( 3461 self, 3462 schema: bool = False, 3463 joins: bool = False, 3464 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3465 parse_bracket: bool = False, 3466 is_db_reference: bool = False, 3467 parse_partition: bool = False, 3468 ) -> t.Optional[exp.Expression]: 3469 lateral = self._parse_lateral() 3470 if lateral: 3471 return lateral 3472 3473 unnest = self._parse_unnest() 3474 if unnest: 3475 return unnest 3476 3477 values = self._parse_derived_table_values() 3478 if values: 3479 return values 3480 3481 subquery = self._parse_select(table=True) 3482 if subquery: 3483 if not subquery.args.get("pivots"): 3484 subquery.set("pivots", self._parse_pivots()) 3485 return subquery 3486 3487 bracket = parse_bracket and self._parse_bracket(None) 3488 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3489 3490 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3491 self._parse_table 3492 ) 3493 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3494 3495 only = self._match(TokenType.ONLY) 3496 3497 this = t.cast( 3498 exp.Expression, 3499 bracket 3500 or rows_from 3501 or self._parse_bracket( 3502 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3503 ), 3504 ) 3505 3506 if only: 3507 this.set("only", only) 3508 3509 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3510 self._match_text_seq("*") 3511 3512 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3513 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3514 this.set("partition", self._parse_partition()) 3515 3516 if schema: 3517 return self._parse_schema(this=this) 3518 3519 version = self._parse_version() 3520 3521 if version: 3522 this.set("version", version) 3523 3524 if self.dialect.ALIAS_POST_TABLESAMPLE: 3525 table_sample = self._parse_table_sample() 3526 3527 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3528 if alias: 3529 this.set("alias", alias) 3530 3531 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3532 return self.expression( 3533 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3534 ) 3535 3536 this.set("hints", self._parse_table_hints()) 3537 3538 if not this.args.get("pivots"): 3539 this.set("pivots", self._parse_pivots()) 3540 3541 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3542 table_sample = self._parse_table_sample() 3543 3544 if table_sample: 3545 table_sample.set("this", this) 3546 this = table_sample 3547 3548 if joins: 3549 for join in self._parse_joins(): 3550 this.append("joins", join) 3551 3552 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3553 this.set("ordinality", True) 3554 this.set("alias", self._parse_table_alias()) 3555 3556 return this 3557 3558 def _parse_version(self) -> t.Optional[exp.Version]: 3559 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3560 this = "TIMESTAMP" 3561 elif self._match(TokenType.VERSION_SNAPSHOT): 3562 this = "VERSION" 3563 else: 3564 return None 3565 3566 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3567 kind = self._prev.text.upper() 3568 start = self._parse_bitwise() 3569 self._match_texts(("TO", "AND")) 3570 end = self._parse_bitwise() 3571 expression: t.Optional[exp.Expression] = self.expression( 3572 exp.Tuple, expressions=[start, end] 3573 ) 3574 elif self._match_text_seq("CONTAINED", "IN"): 3575 kind = "CONTAINED IN" 3576 expression = self.expression( 3577 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3578 ) 3579 elif self._match(TokenType.ALL): 3580 kind = "ALL" 3581 expression = None 3582 else: 3583 self._match_text_seq("AS", "OF") 3584 kind = "AS OF" 3585 expression = self._parse_type() 3586 3587 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3588 3589 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3590 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3591 index = self._index 3592 historical_data = None 3593 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3594 this = self._prev.text.upper() 3595 kind = ( 3596 self._match(TokenType.L_PAREN) 3597 and self._match_texts(self.HISTORICAL_DATA_KIND) 3598 and self._prev.text.upper() 3599 ) 3600 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3601 3602 if expression: 3603 self._match_r_paren() 3604 historical_data = self.expression( 3605 exp.HistoricalData, this=this, kind=kind, expression=expression 3606 ) 3607 else: 3608 self._retreat(index) 3609 3610 return historical_data 3611 3612 def _parse_changes(self) -> t.Optional[exp.Changes]: 3613 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3614 return None 3615 3616 information = self._parse_var(any_token=True) 3617 self._match_r_paren() 3618 3619 return self.expression( 3620 exp.Changes, 3621 information=information, 3622 at_before=self._parse_historical_data(), 3623 end=self._parse_historical_data(), 3624 ) 3625 3626 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3627 if not self._match(TokenType.UNNEST): 3628 return None 3629 3630 expressions = self._parse_wrapped_csv(self._parse_equality) 3631 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3632 3633 alias = self._parse_table_alias() if with_alias else None 3634 3635 if alias: 3636 if self.dialect.UNNEST_COLUMN_ONLY: 3637 if alias.args.get("columns"): 3638 self.raise_error("Unexpected extra column alias in unnest.") 3639 3640 alias.set("columns", [alias.this]) 3641 alias.set("this", None) 3642 3643 columns = alias.args.get("columns") or [] 3644 if offset and len(expressions) < len(columns): 3645 offset = columns.pop() 3646 3647 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3648 self._match(TokenType.ALIAS) 3649 offset = self._parse_id_var( 3650 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3651 ) or exp.to_identifier("offset") 3652 3653 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3654 3655 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3656 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3657 if not is_derived and not self._match_text_seq("VALUES"): 3658 return None 3659 3660 expressions = self._parse_csv(self._parse_value) 3661 alias = self._parse_table_alias() 3662 3663 if is_derived: 3664 self._match_r_paren() 3665 3666 return self.expression( 3667 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3668 ) 3669 3670 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3671 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3672 as_modifier and self._match_text_seq("USING", "SAMPLE") 3673 ): 3674 return None 3675 3676 bucket_numerator = None 3677 bucket_denominator = None 3678 bucket_field = None 3679 percent = None 3680 size = None 3681 seed = None 3682 3683 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3684 matched_l_paren = self._match(TokenType.L_PAREN) 3685 3686 if self.TABLESAMPLE_CSV: 3687 num = None 3688 expressions = self._parse_csv(self._parse_primary) 3689 else: 3690 expressions = None 3691 num = ( 3692 self._parse_factor() 3693 if self._match(TokenType.NUMBER, advance=False) 3694 else self._parse_primary() or self._parse_placeholder() 3695 ) 3696 3697 if self._match_text_seq("BUCKET"): 3698 bucket_numerator = self._parse_number() 3699 self._match_text_seq("OUT", "OF") 3700 bucket_denominator = bucket_denominator = self._parse_number() 3701 self._match(TokenType.ON) 3702 bucket_field = self._parse_field() 3703 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3704 percent = num 3705 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3706 size = num 3707 else: 3708 percent = num 3709 3710 if matched_l_paren: 3711 self._match_r_paren() 3712 3713 if self._match(TokenType.L_PAREN): 3714 method = self._parse_var(upper=True) 3715 seed = self._match(TokenType.COMMA) and self._parse_number() 3716 self._match_r_paren() 3717 elif self._match_texts(("SEED", "REPEATABLE")): 3718 seed = self._parse_wrapped(self._parse_number) 3719 3720 if not method and self.DEFAULT_SAMPLING_METHOD: 3721 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3722 3723 return self.expression( 3724 exp.TableSample, 3725 expressions=expressions, 3726 method=method, 3727 bucket_numerator=bucket_numerator, 3728 bucket_denominator=bucket_denominator, 3729 bucket_field=bucket_field, 3730 percent=percent, 3731 size=size, 3732 seed=seed, 3733 ) 3734 3735 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3736 return list(iter(self._parse_pivot, None)) or None 3737 3738 def _parse_joins(self) -> t.Iterator[exp.Join]: 3739 return iter(self._parse_join, None) 3740 3741 # https://duckdb.org/docs/sql/statements/pivot 3742 def _parse_simplified_pivot(self) -> exp.Pivot: 3743 def _parse_on() -> t.Optional[exp.Expression]: 3744 this = self._parse_bitwise() 3745 return self._parse_in(this) if self._match(TokenType.IN) else this 3746 3747 this = self._parse_table() 3748 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3749 using = self._match(TokenType.USING) and self._parse_csv( 3750 lambda: self._parse_alias(self._parse_function()) 3751 ) 3752 group = self._parse_group() 3753 return self.expression( 3754 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3755 ) 3756 3757 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3758 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3759 this = self._parse_select_or_expression() 3760 3761 self._match(TokenType.ALIAS) 3762 alias = self._parse_field() 3763 if alias: 3764 return self.expression(exp.PivotAlias, this=this, alias=alias) 3765 3766 return this 3767 3768 value = self._parse_column() 3769 3770 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3771 self.raise_error("Expecting IN (") 3772 3773 if self._match(TokenType.ANY): 3774 expr: exp.PivotAny | exp.In = self.expression(exp.PivotAny, this=self._parse_order()) 3775 else: 3776 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3777 expr = self.expression(exp.In, this=value, expressions=aliased_expressions) 3778 3779 self._match_r_paren() 3780 return expr 3781 3782 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3783 index = self._index 3784 include_nulls = None 3785 3786 if self._match(TokenType.PIVOT): 3787 unpivot = False 3788 elif self._match(TokenType.UNPIVOT): 3789 unpivot = True 3790 3791 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3792 if self._match_text_seq("INCLUDE", "NULLS"): 3793 include_nulls = True 3794 elif self._match_text_seq("EXCLUDE", "NULLS"): 3795 include_nulls = False 3796 else: 3797 return None 3798 3799 expressions = [] 3800 3801 if not self._match(TokenType.L_PAREN): 3802 self._retreat(index) 3803 return None 3804 3805 if unpivot: 3806 expressions = self._parse_csv(self._parse_column) 3807 else: 3808 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3809 3810 if not expressions: 3811 self.raise_error("Failed to parse PIVOT's aggregation list") 3812 3813 if not self._match(TokenType.FOR): 3814 self.raise_error("Expecting FOR") 3815 3816 field = self._parse_pivot_in() 3817 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3818 self._parse_bitwise 3819 ) 3820 3821 self._match_r_paren() 3822 3823 pivot = self.expression( 3824 exp.Pivot, 3825 expressions=expressions, 3826 field=field, 3827 unpivot=unpivot, 3828 include_nulls=include_nulls, 3829 default_on_null=default_on_null, 3830 ) 3831 3832 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3833 pivot.set("alias", self._parse_table_alias()) 3834 3835 if not unpivot: 3836 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3837 3838 columns: t.List[exp.Expression] = [] 3839 for fld in pivot.args["field"].expressions: 3840 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3841 for name in names: 3842 if self.PREFIXED_PIVOT_COLUMNS: 3843 name = f"{name}_{field_name}" if name else field_name 3844 else: 3845 name = f"{field_name}_{name}" if name else field_name 3846 3847 columns.append(exp.to_identifier(name)) 3848 3849 pivot.set("columns", columns) 3850 3851 return pivot 3852 3853 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3854 return [agg.alias for agg in aggregations] 3855 3856 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3857 if not skip_where_token and not self._match(TokenType.PREWHERE): 3858 return None 3859 3860 return self.expression( 3861 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3862 ) 3863 3864 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3865 if not skip_where_token and not self._match(TokenType.WHERE): 3866 return None 3867 3868 return self.expression( 3869 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3870 ) 3871 3872 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3873 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3874 return None 3875 3876 elements: t.Dict[str, t.Any] = defaultdict(list) 3877 3878 if self._match(TokenType.ALL): 3879 elements["all"] = True 3880 elif self._match(TokenType.DISTINCT): 3881 elements["all"] = False 3882 3883 while True: 3884 expressions = self._parse_csv( 3885 lambda: None 3886 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 3887 else self._parse_assignment() 3888 ) 3889 if expressions: 3890 elements["expressions"].extend(expressions) 3891 3892 grouping_sets = self._parse_grouping_sets() 3893 if grouping_sets: 3894 elements["grouping_sets"].extend(grouping_sets) 3895 3896 rollup = None 3897 cube = None 3898 totals = None 3899 3900 index = self._index 3901 with_ = self._match(TokenType.WITH) 3902 if self._match(TokenType.ROLLUP): 3903 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3904 elements["rollup"].extend(ensure_list(rollup)) 3905 3906 if self._match(TokenType.CUBE): 3907 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3908 elements["cube"].extend(ensure_list(cube)) 3909 3910 if self._match_text_seq("TOTALS"): 3911 totals = True 3912 elements["totals"] = True # type: ignore 3913 3914 if not (grouping_sets or rollup or cube or totals): 3915 if with_: 3916 self._retreat(index) 3917 break 3918 3919 return self.expression(exp.Group, **elements) # type: ignore 3920 3921 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3922 if not self._match(TokenType.GROUPING_SETS): 3923 return None 3924 3925 return self._parse_wrapped_csv(self._parse_grouping_set) 3926 3927 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3928 if self._match(TokenType.L_PAREN): 3929 grouping_set = self._parse_csv(self._parse_column) 3930 self._match_r_paren() 3931 return self.expression(exp.Tuple, expressions=grouping_set) 3932 3933 return self._parse_column() 3934 3935 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3936 if not skip_having_token and not self._match(TokenType.HAVING): 3937 return None 3938 return self.expression(exp.Having, this=self._parse_assignment()) 3939 3940 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3941 if not self._match(TokenType.QUALIFY): 3942 return None 3943 return self.expression(exp.Qualify, this=self._parse_assignment()) 3944 3945 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3946 if skip_start_token: 3947 start = None 3948 elif self._match(TokenType.START_WITH): 3949 start = self._parse_assignment() 3950 else: 3951 return None 3952 3953 self._match(TokenType.CONNECT_BY) 3954 nocycle = self._match_text_seq("NOCYCLE") 3955 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3956 exp.Prior, this=self._parse_bitwise() 3957 ) 3958 connect = self._parse_assignment() 3959 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3960 3961 if not start and self._match(TokenType.START_WITH): 3962 start = self._parse_assignment() 3963 3964 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3965 3966 def _parse_name_as_expression(self) -> exp.Alias: 3967 return self.expression( 3968 exp.Alias, 3969 alias=self._parse_id_var(any_token=True), 3970 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3971 ) 3972 3973 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3974 if self._match_text_seq("INTERPOLATE"): 3975 return self._parse_wrapped_csv(self._parse_name_as_expression) 3976 return None 3977 3978 def _parse_order( 3979 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3980 ) -> t.Optional[exp.Expression]: 3981 siblings = None 3982 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3983 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3984 return this 3985 3986 siblings = True 3987 3988 return self.expression( 3989 exp.Order, 3990 this=this, 3991 expressions=self._parse_csv(self._parse_ordered), 3992 interpolate=self._parse_interpolate(), 3993 siblings=siblings, 3994 ) 3995 3996 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3997 if not self._match(token): 3998 return None 3999 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4000 4001 def _parse_ordered( 4002 self, parse_method: t.Optional[t.Callable] = None 4003 ) -> t.Optional[exp.Ordered]: 4004 this = parse_method() if parse_method else self._parse_assignment() 4005 if not this: 4006 return None 4007 4008 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4009 this = exp.var("ALL") 4010 4011 asc = self._match(TokenType.ASC) 4012 desc = self._match(TokenType.DESC) or (asc and False) 4013 4014 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4015 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4016 4017 nulls_first = is_nulls_first or False 4018 explicitly_null_ordered = is_nulls_first or is_nulls_last 4019 4020 if ( 4021 not explicitly_null_ordered 4022 and ( 4023 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4024 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4025 ) 4026 and self.dialect.NULL_ORDERING != "nulls_are_last" 4027 ): 4028 nulls_first = True 4029 4030 if self._match_text_seq("WITH", "FILL"): 4031 with_fill = self.expression( 4032 exp.WithFill, 4033 **{ # type: ignore 4034 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4035 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4036 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4037 }, 4038 ) 4039 else: 4040 with_fill = None 4041 4042 return self.expression( 4043 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4044 ) 4045 4046 def _parse_limit( 4047 self, 4048 this: t.Optional[exp.Expression] = None, 4049 top: bool = False, 4050 skip_limit_token: bool = False, 4051 ) -> t.Optional[exp.Expression]: 4052 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4053 comments = self._prev_comments 4054 if top: 4055 limit_paren = self._match(TokenType.L_PAREN) 4056 expression = self._parse_term() if limit_paren else self._parse_number() 4057 4058 if limit_paren: 4059 self._match_r_paren() 4060 else: 4061 expression = self._parse_term() 4062 4063 if self._match(TokenType.COMMA): 4064 offset = expression 4065 expression = self._parse_term() 4066 else: 4067 offset = None 4068 4069 limit_exp = self.expression( 4070 exp.Limit, 4071 this=this, 4072 expression=expression, 4073 offset=offset, 4074 comments=comments, 4075 expressions=self._parse_limit_by(), 4076 ) 4077 4078 return limit_exp 4079 4080 if self._match(TokenType.FETCH): 4081 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4082 direction = self._prev.text.upper() if direction else "FIRST" 4083 4084 count = self._parse_field(tokens=self.FETCH_TOKENS) 4085 percent = self._match(TokenType.PERCENT) 4086 4087 self._match_set((TokenType.ROW, TokenType.ROWS)) 4088 4089 only = self._match_text_seq("ONLY") 4090 with_ties = self._match_text_seq("WITH", "TIES") 4091 4092 if only and with_ties: 4093 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4094 4095 return self.expression( 4096 exp.Fetch, 4097 direction=direction, 4098 count=count, 4099 percent=percent, 4100 with_ties=with_ties, 4101 ) 4102 4103 return this 4104 4105 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4106 if not self._match(TokenType.OFFSET): 4107 return this 4108 4109 count = self._parse_term() 4110 self._match_set((TokenType.ROW, TokenType.ROWS)) 4111 4112 return self.expression( 4113 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4114 ) 4115 4116 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4117 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4118 4119 def _parse_locks(self) -> t.List[exp.Lock]: 4120 locks = [] 4121 while True: 4122 if self._match_text_seq("FOR", "UPDATE"): 4123 update = True 4124 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4125 "LOCK", "IN", "SHARE", "MODE" 4126 ): 4127 update = False 4128 else: 4129 break 4130 4131 expressions = None 4132 if self._match_text_seq("OF"): 4133 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4134 4135 wait: t.Optional[bool | exp.Expression] = None 4136 if self._match_text_seq("NOWAIT"): 4137 wait = True 4138 elif self._match_text_seq("WAIT"): 4139 wait = self._parse_primary() 4140 elif self._match_text_seq("SKIP", "LOCKED"): 4141 wait = False 4142 4143 locks.append( 4144 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4145 ) 4146 4147 return locks 4148 4149 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4150 while this and self._match_set(self.SET_OPERATIONS): 4151 token_type = self._prev.token_type 4152 4153 if token_type == TokenType.UNION: 4154 operation: t.Type[exp.SetOperation] = exp.Union 4155 elif token_type == TokenType.EXCEPT: 4156 operation = exp.Except 4157 else: 4158 operation = exp.Intersect 4159 4160 comments = self._prev.comments 4161 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4162 by_name = self._match_text_seq("BY", "NAME") 4163 expression = self._parse_select(nested=True, parse_set_operation=False) 4164 4165 this = self.expression( 4166 operation, 4167 comments=comments, 4168 this=this, 4169 distinct=distinct, 4170 by_name=by_name, 4171 expression=expression, 4172 ) 4173 4174 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4175 expression = this.expression 4176 4177 if expression: 4178 for arg in self.SET_OP_MODIFIERS: 4179 expr = expression.args.get(arg) 4180 if expr: 4181 this.set(arg, expr.pop()) 4182 4183 return this 4184 4185 def _parse_expression(self) -> t.Optional[exp.Expression]: 4186 return self._parse_alias(self._parse_assignment()) 4187 4188 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4189 this = self._parse_disjunction() 4190 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4191 # This allows us to parse <non-identifier token> := <expr> 4192 this = exp.column( 4193 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4194 ) 4195 4196 while self._match_set(self.ASSIGNMENT): 4197 this = self.expression( 4198 self.ASSIGNMENT[self._prev.token_type], 4199 this=this, 4200 comments=self._prev_comments, 4201 expression=self._parse_assignment(), 4202 ) 4203 4204 return this 4205 4206 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4207 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4208 4209 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4210 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4211 4212 def _parse_equality(self) -> t.Optional[exp.Expression]: 4213 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4214 4215 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4216 return self._parse_tokens(self._parse_range, self.COMPARISON) 4217 4218 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4219 this = this or self._parse_bitwise() 4220 negate = self._match(TokenType.NOT) 4221 4222 if self._match_set(self.RANGE_PARSERS): 4223 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4224 if not expression: 4225 return this 4226 4227 this = expression 4228 elif self._match(TokenType.ISNULL): 4229 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4230 4231 # Postgres supports ISNULL and NOTNULL for conditions. 4232 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4233 if self._match(TokenType.NOTNULL): 4234 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4235 this = self.expression(exp.Not, this=this) 4236 4237 if negate: 4238 this = self.expression(exp.Not, this=this) 4239 4240 if self._match(TokenType.IS): 4241 this = self._parse_is(this) 4242 4243 return this 4244 4245 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4246 index = self._index - 1 4247 negate = self._match(TokenType.NOT) 4248 4249 if self._match_text_seq("DISTINCT", "FROM"): 4250 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4251 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4252 4253 expression = self._parse_null() or self._parse_boolean() 4254 if not expression: 4255 self._retreat(index) 4256 return None 4257 4258 this = self.expression(exp.Is, this=this, expression=expression) 4259 return self.expression(exp.Not, this=this) if negate else this 4260 4261 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4262 unnest = self._parse_unnest(with_alias=False) 4263 if unnest: 4264 this = self.expression(exp.In, this=this, unnest=unnest) 4265 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4266 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4267 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4268 4269 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4270 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4271 else: 4272 this = self.expression(exp.In, this=this, expressions=expressions) 4273 4274 if matched_l_paren: 4275 self._match_r_paren(this) 4276 elif not self._match(TokenType.R_BRACKET, expression=this): 4277 self.raise_error("Expecting ]") 4278 else: 4279 this = self.expression(exp.In, this=this, field=self._parse_field()) 4280 4281 return this 4282 4283 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4284 low = self._parse_bitwise() 4285 self._match(TokenType.AND) 4286 high = self._parse_bitwise() 4287 return self.expression(exp.Between, this=this, low=low, high=high) 4288 4289 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4290 if not self._match(TokenType.ESCAPE): 4291 return this 4292 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4293 4294 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4295 index = self._index 4296 4297 if not self._match(TokenType.INTERVAL) and match_interval: 4298 return None 4299 4300 if self._match(TokenType.STRING, advance=False): 4301 this = self._parse_primary() 4302 else: 4303 this = self._parse_term() 4304 4305 if not this or ( 4306 isinstance(this, exp.Column) 4307 and not this.table 4308 and not this.this.quoted 4309 and this.name.upper() == "IS" 4310 ): 4311 self._retreat(index) 4312 return None 4313 4314 unit = self._parse_function() or ( 4315 not self._match(TokenType.ALIAS, advance=False) 4316 and self._parse_var(any_token=True, upper=True) 4317 ) 4318 4319 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4320 # each INTERVAL expression into this canonical form so it's easy to transpile 4321 if this and this.is_number: 4322 this = exp.Literal.string(this.to_py()) 4323 elif this and this.is_string: 4324 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4325 if len(parts) == 1: 4326 if unit: 4327 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4328 self._retreat(self._index - 1) 4329 4330 this = exp.Literal.string(parts[0][0]) 4331 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4332 4333 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4334 unit = self.expression( 4335 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4336 ) 4337 4338 interval = self.expression(exp.Interval, this=this, unit=unit) 4339 4340 index = self._index 4341 self._match(TokenType.PLUS) 4342 4343 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4344 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4345 return self.expression( 4346 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4347 ) 4348 4349 self._retreat(index) 4350 return interval 4351 4352 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4353 this = self._parse_term() 4354 4355 while True: 4356 if self._match_set(self.BITWISE): 4357 this = self.expression( 4358 self.BITWISE[self._prev.token_type], 4359 this=this, 4360 expression=self._parse_term(), 4361 ) 4362 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4363 this = self.expression( 4364 exp.DPipe, 4365 this=this, 4366 expression=self._parse_term(), 4367 safe=not self.dialect.STRICT_STRING_CONCAT, 4368 ) 4369 elif self._match(TokenType.DQMARK): 4370 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4371 elif self._match_pair(TokenType.LT, TokenType.LT): 4372 this = self.expression( 4373 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4374 ) 4375 elif self._match_pair(TokenType.GT, TokenType.GT): 4376 this = self.expression( 4377 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4378 ) 4379 else: 4380 break 4381 4382 return this 4383 4384 def _parse_term(self) -> t.Optional[exp.Expression]: 4385 this = self._parse_factor() 4386 4387 while self._match_set(self.TERM): 4388 klass = self.TERM[self._prev.token_type] 4389 comments = self._prev_comments 4390 expression = self._parse_factor() 4391 4392 this = self.expression(klass, this=this, comments=comments, expression=expression) 4393 4394 if isinstance(this, exp.Collate): 4395 expr = this.expression 4396 4397 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4398 # fallback to Identifier / Var 4399 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4400 ident = expr.this 4401 if isinstance(ident, exp.Identifier): 4402 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4403 4404 return this 4405 4406 def _parse_factor(self) -> t.Optional[exp.Expression]: 4407 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4408 this = parse_method() 4409 4410 while self._match_set(self.FACTOR): 4411 klass = self.FACTOR[self._prev.token_type] 4412 comments = self._prev_comments 4413 expression = parse_method() 4414 4415 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4416 self._retreat(self._index - 1) 4417 return this 4418 4419 this = self.expression(klass, this=this, comments=comments, expression=expression) 4420 4421 if isinstance(this, exp.Div): 4422 this.args["typed"] = self.dialect.TYPED_DIVISION 4423 this.args["safe"] = self.dialect.SAFE_DIVISION 4424 4425 return this 4426 4427 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4428 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4429 4430 def _parse_unary(self) -> t.Optional[exp.Expression]: 4431 if self._match_set(self.UNARY_PARSERS): 4432 return self.UNARY_PARSERS[self._prev.token_type](self) 4433 return self._parse_at_time_zone(self._parse_type()) 4434 4435 def _parse_type( 4436 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4437 ) -> t.Optional[exp.Expression]: 4438 interval = parse_interval and self._parse_interval() 4439 if interval: 4440 return interval 4441 4442 index = self._index 4443 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4444 4445 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4446 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4447 if isinstance(data_type, exp.Cast): 4448 # This constructor can contain ops directly after it, for instance struct unnesting: 4449 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4450 return self._parse_column_ops(data_type) 4451 4452 if data_type: 4453 index2 = self._index 4454 this = self._parse_primary() 4455 4456 if isinstance(this, exp.Literal): 4457 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4458 if parser: 4459 return parser(self, this, data_type) 4460 4461 return self.expression(exp.Cast, this=this, to=data_type) 4462 4463 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4464 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4465 # 4466 # If the index difference here is greater than 1, that means the parser itself must have 4467 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4468 # 4469 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4470 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4471 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4472 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4473 # 4474 # In these cases, we don't really want to return the converted type, but instead retreat 4475 # and try to parse a Column or Identifier in the section below. 4476 if data_type.expressions and index2 - index > 1: 4477 self._retreat(index2) 4478 return self._parse_column_ops(data_type) 4479 4480 self._retreat(index) 4481 4482 if fallback_to_identifier: 4483 return self._parse_id_var() 4484 4485 this = self._parse_column() 4486 return this and self._parse_column_ops(this) 4487 4488 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4489 this = self._parse_type() 4490 if not this: 4491 return None 4492 4493 if isinstance(this, exp.Column) and not this.table: 4494 this = exp.var(this.name.upper()) 4495 4496 return self.expression( 4497 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4498 ) 4499 4500 def _parse_types( 4501 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4502 ) -> t.Optional[exp.Expression]: 4503 index = self._index 4504 4505 this: t.Optional[exp.Expression] = None 4506 prefix = self._match_text_seq("SYSUDTLIB", ".") 4507 4508 if not self._match_set(self.TYPE_TOKENS): 4509 identifier = allow_identifiers and self._parse_id_var( 4510 any_token=False, tokens=(TokenType.VAR,) 4511 ) 4512 if isinstance(identifier, exp.Identifier): 4513 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4514 4515 if len(tokens) != 1: 4516 self.raise_error("Unexpected identifier", self._prev) 4517 4518 if tokens[0].token_type in self.TYPE_TOKENS: 4519 self._prev = tokens[0] 4520 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4521 type_name = identifier.name 4522 4523 while self._match(TokenType.DOT): 4524 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4525 4526 this = exp.DataType.build(type_name, udt=True) 4527 else: 4528 self._retreat(self._index - 1) 4529 return None 4530 else: 4531 return None 4532 4533 type_token = self._prev.token_type 4534 4535 if type_token == TokenType.PSEUDO_TYPE: 4536 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4537 4538 if type_token == TokenType.OBJECT_IDENTIFIER: 4539 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4540 4541 # https://materialize.com/docs/sql/types/map/ 4542 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4543 key_type = self._parse_types( 4544 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4545 ) 4546 if not self._match(TokenType.FARROW): 4547 self._retreat(index) 4548 return None 4549 4550 value_type = self._parse_types( 4551 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4552 ) 4553 if not self._match(TokenType.R_BRACKET): 4554 self._retreat(index) 4555 return None 4556 4557 return exp.DataType( 4558 this=exp.DataType.Type.MAP, 4559 expressions=[key_type, value_type], 4560 nested=True, 4561 prefix=prefix, 4562 ) 4563 4564 nested = type_token in self.NESTED_TYPE_TOKENS 4565 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4566 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4567 expressions = None 4568 maybe_func = False 4569 4570 if self._match(TokenType.L_PAREN): 4571 if is_struct: 4572 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4573 elif nested: 4574 expressions = self._parse_csv( 4575 lambda: self._parse_types( 4576 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4577 ) 4578 ) 4579 elif type_token in self.ENUM_TYPE_TOKENS: 4580 expressions = self._parse_csv(self._parse_equality) 4581 elif is_aggregate: 4582 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4583 any_token=False, tokens=(TokenType.VAR,) 4584 ) 4585 if not func_or_ident or not self._match(TokenType.COMMA): 4586 return None 4587 expressions = self._parse_csv( 4588 lambda: self._parse_types( 4589 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4590 ) 4591 ) 4592 expressions.insert(0, func_or_ident) 4593 else: 4594 expressions = self._parse_csv(self._parse_type_size) 4595 4596 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4597 if type_token == TokenType.VECTOR and len(expressions) == 2: 4598 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4599 4600 if not expressions or not self._match(TokenType.R_PAREN): 4601 self._retreat(index) 4602 return None 4603 4604 maybe_func = True 4605 4606 values: t.Optional[t.List[exp.Expression]] = None 4607 4608 if nested and self._match(TokenType.LT): 4609 if is_struct: 4610 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4611 else: 4612 expressions = self._parse_csv( 4613 lambda: self._parse_types( 4614 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4615 ) 4616 ) 4617 4618 if not self._match(TokenType.GT): 4619 self.raise_error("Expecting >") 4620 4621 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4622 values = self._parse_csv(self._parse_assignment) 4623 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4624 4625 if type_token in self.TIMESTAMPS: 4626 if self._match_text_seq("WITH", "TIME", "ZONE"): 4627 maybe_func = False 4628 tz_type = ( 4629 exp.DataType.Type.TIMETZ 4630 if type_token in self.TIMES 4631 else exp.DataType.Type.TIMESTAMPTZ 4632 ) 4633 this = exp.DataType(this=tz_type, expressions=expressions) 4634 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4635 maybe_func = False 4636 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4637 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4638 maybe_func = False 4639 elif type_token == TokenType.INTERVAL: 4640 unit = self._parse_var(upper=True) 4641 if unit: 4642 if self._match_text_seq("TO"): 4643 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4644 4645 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4646 else: 4647 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4648 4649 if maybe_func and check_func: 4650 index2 = self._index 4651 peek = self._parse_string() 4652 4653 if not peek: 4654 self._retreat(index) 4655 return None 4656 4657 self._retreat(index2) 4658 4659 if not this: 4660 if self._match_text_seq("UNSIGNED"): 4661 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4662 if not unsigned_type_token: 4663 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4664 4665 type_token = unsigned_type_token or type_token 4666 4667 this = exp.DataType( 4668 this=exp.DataType.Type[type_token.value], 4669 expressions=expressions, 4670 nested=nested, 4671 prefix=prefix, 4672 ) 4673 4674 # Empty arrays/structs are allowed 4675 if values is not None: 4676 cls = exp.Struct if is_struct else exp.Array 4677 this = exp.cast(cls(expressions=values), this, copy=False) 4678 4679 elif expressions: 4680 this.set("expressions", expressions) 4681 4682 # https://materialize.com/docs/sql/types/list/#type-name 4683 while self._match(TokenType.LIST): 4684 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4685 4686 index = self._index 4687 4688 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4689 matched_array = self._match(TokenType.ARRAY) 4690 4691 while self._curr: 4692 datatype_token = self._prev.token_type 4693 matched_l_bracket = self._match(TokenType.L_BRACKET) 4694 if not matched_l_bracket and not matched_array: 4695 break 4696 4697 matched_array = False 4698 values = self._parse_csv(self._parse_assignment) or None 4699 if ( 4700 values 4701 and not schema 4702 and ( 4703 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4704 ) 4705 ): 4706 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4707 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4708 self._retreat(index) 4709 break 4710 4711 this = exp.DataType( 4712 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4713 ) 4714 self._match(TokenType.R_BRACKET) 4715 4716 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4717 converter = self.TYPE_CONVERTERS.get(this.this) 4718 if converter: 4719 this = converter(t.cast(exp.DataType, this)) 4720 4721 return this 4722 4723 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4724 index = self._index 4725 4726 if ( 4727 self._curr 4728 and self._next 4729 and self._curr.token_type in self.TYPE_TOKENS 4730 and self._next.token_type in self.TYPE_TOKENS 4731 ): 4732 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4733 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4734 this = self._parse_id_var() 4735 else: 4736 this = ( 4737 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4738 or self._parse_id_var() 4739 ) 4740 4741 self._match(TokenType.COLON) 4742 4743 if ( 4744 type_required 4745 and not isinstance(this, exp.DataType) 4746 and not self._match_set(self.TYPE_TOKENS, advance=False) 4747 ): 4748 self._retreat(index) 4749 return self._parse_types() 4750 4751 return self._parse_column_def(this) 4752 4753 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4754 if not self._match_text_seq("AT", "TIME", "ZONE"): 4755 return this 4756 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4757 4758 def _parse_column(self) -> t.Optional[exp.Expression]: 4759 this = self._parse_column_reference() 4760 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4761 4762 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4763 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4764 4765 return column 4766 4767 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4768 this = self._parse_field() 4769 if ( 4770 not this 4771 and self._match(TokenType.VALUES, advance=False) 4772 and self.VALUES_FOLLOWED_BY_PAREN 4773 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4774 ): 4775 this = self._parse_id_var() 4776 4777 if isinstance(this, exp.Identifier): 4778 # We bubble up comments from the Identifier to the Column 4779 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4780 4781 return this 4782 4783 def _parse_colon_as_variant_extract( 4784 self, this: t.Optional[exp.Expression] 4785 ) -> t.Optional[exp.Expression]: 4786 casts = [] 4787 json_path = [] 4788 4789 while self._match(TokenType.COLON): 4790 start_index = self._index 4791 4792 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4793 path = self._parse_column_ops( 4794 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4795 ) 4796 4797 # The cast :: operator has a lower precedence than the extraction operator :, so 4798 # we rearrange the AST appropriately to avoid casting the JSON path 4799 while isinstance(path, exp.Cast): 4800 casts.append(path.to) 4801 path = path.this 4802 4803 if casts: 4804 dcolon_offset = next( 4805 i 4806 for i, t in enumerate(self._tokens[start_index:]) 4807 if t.token_type == TokenType.DCOLON 4808 ) 4809 end_token = self._tokens[start_index + dcolon_offset - 1] 4810 else: 4811 end_token = self._prev 4812 4813 if path: 4814 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4815 4816 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4817 # Databricks transforms it back to the colon/dot notation 4818 if json_path: 4819 this = self.expression( 4820 exp.JSONExtract, 4821 this=this, 4822 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4823 variant_extract=True, 4824 ) 4825 4826 while casts: 4827 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4828 4829 return this 4830 4831 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4832 return self._parse_types() 4833 4834 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4835 this = self._parse_bracket(this) 4836 4837 while self._match_set(self.COLUMN_OPERATORS): 4838 op_token = self._prev.token_type 4839 op = self.COLUMN_OPERATORS.get(op_token) 4840 4841 if op_token == TokenType.DCOLON: 4842 field = self._parse_dcolon() 4843 if not field: 4844 self.raise_error("Expected type") 4845 elif op and self._curr: 4846 field = self._parse_column_reference() 4847 else: 4848 field = self._parse_field(any_token=True, anonymous_func=True) 4849 4850 if isinstance(field, exp.Func) and this: 4851 # bigquery allows function calls like x.y.count(...) 4852 # SAFE.SUBSTR(...) 4853 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4854 this = exp.replace_tree( 4855 this, 4856 lambda n: ( 4857 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4858 if n.table 4859 else n.this 4860 ) 4861 if isinstance(n, exp.Column) 4862 else n, 4863 ) 4864 4865 if op: 4866 this = op(self, this, field) 4867 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4868 this = self.expression( 4869 exp.Column, 4870 this=field, 4871 table=this.this, 4872 db=this.args.get("table"), 4873 catalog=this.args.get("db"), 4874 ) 4875 else: 4876 this = self.expression(exp.Dot, this=this, expression=field) 4877 4878 this = self._parse_bracket(this) 4879 4880 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4881 4882 def _parse_primary(self) -> t.Optional[exp.Expression]: 4883 if self._match_set(self.PRIMARY_PARSERS): 4884 token_type = self._prev.token_type 4885 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4886 4887 if token_type == TokenType.STRING: 4888 expressions = [primary] 4889 while self._match(TokenType.STRING): 4890 expressions.append(exp.Literal.string(self._prev.text)) 4891 4892 if len(expressions) > 1: 4893 return self.expression(exp.Concat, expressions=expressions) 4894 4895 return primary 4896 4897 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4898 return exp.Literal.number(f"0.{self._prev.text}") 4899 4900 if self._match(TokenType.L_PAREN): 4901 comments = self._prev_comments 4902 query = self._parse_select() 4903 4904 if query: 4905 expressions = [query] 4906 else: 4907 expressions = self._parse_expressions() 4908 4909 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4910 4911 if not this and self._match(TokenType.R_PAREN, advance=False): 4912 this = self.expression(exp.Tuple) 4913 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4914 this = self._parse_subquery(this=this, parse_alias=False) 4915 elif isinstance(this, exp.Subquery): 4916 this = self._parse_subquery( 4917 this=self._parse_set_operations(this), parse_alias=False 4918 ) 4919 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4920 this = self.expression(exp.Tuple, expressions=expressions) 4921 else: 4922 this = self.expression(exp.Paren, this=this) 4923 4924 if this: 4925 this.add_comments(comments) 4926 4927 self._match_r_paren(expression=this) 4928 return this 4929 4930 return None 4931 4932 def _parse_field( 4933 self, 4934 any_token: bool = False, 4935 tokens: t.Optional[t.Collection[TokenType]] = None, 4936 anonymous_func: bool = False, 4937 ) -> t.Optional[exp.Expression]: 4938 if anonymous_func: 4939 field = ( 4940 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4941 or self._parse_primary() 4942 ) 4943 else: 4944 field = self._parse_primary() or self._parse_function( 4945 anonymous=anonymous_func, any_token=any_token 4946 ) 4947 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4948 4949 def _parse_function( 4950 self, 4951 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4952 anonymous: bool = False, 4953 optional_parens: bool = True, 4954 any_token: bool = False, 4955 ) -> t.Optional[exp.Expression]: 4956 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4957 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4958 fn_syntax = False 4959 if ( 4960 self._match(TokenType.L_BRACE, advance=False) 4961 and self._next 4962 and self._next.text.upper() == "FN" 4963 ): 4964 self._advance(2) 4965 fn_syntax = True 4966 4967 func = self._parse_function_call( 4968 functions=functions, 4969 anonymous=anonymous, 4970 optional_parens=optional_parens, 4971 any_token=any_token, 4972 ) 4973 4974 if fn_syntax: 4975 self._match(TokenType.R_BRACE) 4976 4977 return func 4978 4979 def _parse_function_call( 4980 self, 4981 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4982 anonymous: bool = False, 4983 optional_parens: bool = True, 4984 any_token: bool = False, 4985 ) -> t.Optional[exp.Expression]: 4986 if not self._curr: 4987 return None 4988 4989 comments = self._curr.comments 4990 token_type = self._curr.token_type 4991 this = self._curr.text 4992 upper = this.upper() 4993 4994 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4995 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4996 self._advance() 4997 return self._parse_window(parser(self)) 4998 4999 if not self._next or self._next.token_type != TokenType.L_PAREN: 5000 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5001 self._advance() 5002 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5003 5004 return None 5005 5006 if any_token: 5007 if token_type in self.RESERVED_TOKENS: 5008 return None 5009 elif token_type not in self.FUNC_TOKENS: 5010 return None 5011 5012 self._advance(2) 5013 5014 parser = self.FUNCTION_PARSERS.get(upper) 5015 if parser and not anonymous: 5016 this = parser(self) 5017 else: 5018 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5019 5020 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5021 this = self.expression(subquery_predicate, this=self._parse_select()) 5022 self._match_r_paren() 5023 return this 5024 5025 if functions is None: 5026 functions = self.FUNCTIONS 5027 5028 function = functions.get(upper) 5029 5030 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5031 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5032 5033 if alias: 5034 args = self._kv_to_prop_eq(args) 5035 5036 if function and not anonymous: 5037 if "dialect" in function.__code__.co_varnames: 5038 func = function(args, dialect=self.dialect) 5039 else: 5040 func = function(args) 5041 5042 func = self.validate_expression(func, args) 5043 if not self.dialect.NORMALIZE_FUNCTIONS: 5044 func.meta["name"] = this 5045 5046 this = func 5047 else: 5048 if token_type == TokenType.IDENTIFIER: 5049 this = exp.Identifier(this=this, quoted=True) 5050 this = self.expression(exp.Anonymous, this=this, expressions=args) 5051 5052 if isinstance(this, exp.Expression): 5053 this.add_comments(comments) 5054 5055 self._match_r_paren(this) 5056 return self._parse_window(this) 5057 5058 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5059 transformed = [] 5060 5061 for e in expressions: 5062 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5063 if isinstance(e, exp.Alias): 5064 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5065 5066 if not isinstance(e, exp.PropertyEQ): 5067 e = self.expression( 5068 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5069 ) 5070 5071 if isinstance(e.this, exp.Column): 5072 e.this.replace(e.this.this) 5073 5074 transformed.append(e) 5075 5076 return transformed 5077 5078 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5079 return self._parse_column_def(self._parse_id_var()) 5080 5081 def _parse_user_defined_function( 5082 self, kind: t.Optional[TokenType] = None 5083 ) -> t.Optional[exp.Expression]: 5084 this = self._parse_id_var() 5085 5086 while self._match(TokenType.DOT): 5087 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5088 5089 if not self._match(TokenType.L_PAREN): 5090 return this 5091 5092 expressions = self._parse_csv(self._parse_function_parameter) 5093 self._match_r_paren() 5094 return self.expression( 5095 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5096 ) 5097 5098 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5099 literal = self._parse_primary() 5100 if literal: 5101 return self.expression(exp.Introducer, this=token.text, expression=literal) 5102 5103 return self.expression(exp.Identifier, this=token.text) 5104 5105 def _parse_session_parameter(self) -> exp.SessionParameter: 5106 kind = None 5107 this = self._parse_id_var() or self._parse_primary() 5108 5109 if this and self._match(TokenType.DOT): 5110 kind = this.name 5111 this = self._parse_var() or self._parse_primary() 5112 5113 return self.expression(exp.SessionParameter, this=this, kind=kind) 5114 5115 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5116 return self._parse_id_var() 5117 5118 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5119 index = self._index 5120 5121 if self._match(TokenType.L_PAREN): 5122 expressions = t.cast( 5123 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5124 ) 5125 5126 if not self._match(TokenType.R_PAREN): 5127 self._retreat(index) 5128 else: 5129 expressions = [self._parse_lambda_arg()] 5130 5131 if self._match_set(self.LAMBDAS): 5132 return self.LAMBDAS[self._prev.token_type](self, expressions) 5133 5134 self._retreat(index) 5135 5136 this: t.Optional[exp.Expression] 5137 5138 if self._match(TokenType.DISTINCT): 5139 this = self.expression( 5140 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5141 ) 5142 else: 5143 this = self._parse_select_or_expression(alias=alias) 5144 5145 return self._parse_limit( 5146 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5147 ) 5148 5149 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5150 index = self._index 5151 if not self._match(TokenType.L_PAREN): 5152 return this 5153 5154 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5155 # expr can be of both types 5156 if self._match_set(self.SELECT_START_TOKENS): 5157 self._retreat(index) 5158 return this 5159 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5160 self._match_r_paren() 5161 return self.expression(exp.Schema, this=this, expressions=args) 5162 5163 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5164 return self._parse_column_def(self._parse_field(any_token=True)) 5165 5166 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5167 # column defs are not really columns, they're identifiers 5168 if isinstance(this, exp.Column): 5169 this = this.this 5170 5171 kind = self._parse_types(schema=True) 5172 5173 if self._match_text_seq("FOR", "ORDINALITY"): 5174 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5175 5176 constraints: t.List[exp.Expression] = [] 5177 5178 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5179 ("ALIAS", "MATERIALIZED") 5180 ): 5181 persisted = self._prev.text.upper() == "MATERIALIZED" 5182 constraints.append( 5183 self.expression( 5184 exp.ComputedColumnConstraint, 5185 this=self._parse_assignment(), 5186 persisted=persisted or self._match_text_seq("PERSISTED"), 5187 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5188 ) 5189 ) 5190 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5191 self._match(TokenType.ALIAS) 5192 constraints.append( 5193 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5194 ) 5195 5196 while True: 5197 constraint = self._parse_column_constraint() 5198 if not constraint: 5199 break 5200 constraints.append(constraint) 5201 5202 if not kind and not constraints: 5203 return this 5204 5205 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5206 5207 def _parse_auto_increment( 5208 self, 5209 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5210 start = None 5211 increment = None 5212 5213 if self._match(TokenType.L_PAREN, advance=False): 5214 args = self._parse_wrapped_csv(self._parse_bitwise) 5215 start = seq_get(args, 0) 5216 increment = seq_get(args, 1) 5217 elif self._match_text_seq("START"): 5218 start = self._parse_bitwise() 5219 self._match_text_seq("INCREMENT") 5220 increment = self._parse_bitwise() 5221 5222 if start and increment: 5223 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5224 5225 return exp.AutoIncrementColumnConstraint() 5226 5227 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5228 if not self._match_text_seq("REFRESH"): 5229 self._retreat(self._index - 1) 5230 return None 5231 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5232 5233 def _parse_compress(self) -> exp.CompressColumnConstraint: 5234 if self._match(TokenType.L_PAREN, advance=False): 5235 return self.expression( 5236 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5237 ) 5238 5239 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5240 5241 def _parse_generated_as_identity( 5242 self, 5243 ) -> ( 5244 exp.GeneratedAsIdentityColumnConstraint 5245 | exp.ComputedColumnConstraint 5246 | exp.GeneratedAsRowColumnConstraint 5247 ): 5248 if self._match_text_seq("BY", "DEFAULT"): 5249 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5250 this = self.expression( 5251 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5252 ) 5253 else: 5254 self._match_text_seq("ALWAYS") 5255 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5256 5257 self._match(TokenType.ALIAS) 5258 5259 if self._match_text_seq("ROW"): 5260 start = self._match_text_seq("START") 5261 if not start: 5262 self._match(TokenType.END) 5263 hidden = self._match_text_seq("HIDDEN") 5264 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5265 5266 identity = self._match_text_seq("IDENTITY") 5267 5268 if self._match(TokenType.L_PAREN): 5269 if self._match(TokenType.START_WITH): 5270 this.set("start", self._parse_bitwise()) 5271 if self._match_text_seq("INCREMENT", "BY"): 5272 this.set("increment", self._parse_bitwise()) 5273 if self._match_text_seq("MINVALUE"): 5274 this.set("minvalue", self._parse_bitwise()) 5275 if self._match_text_seq("MAXVALUE"): 5276 this.set("maxvalue", self._parse_bitwise()) 5277 5278 if self._match_text_seq("CYCLE"): 5279 this.set("cycle", True) 5280 elif self._match_text_seq("NO", "CYCLE"): 5281 this.set("cycle", False) 5282 5283 if not identity: 5284 this.set("expression", self._parse_range()) 5285 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5286 args = self._parse_csv(self._parse_bitwise) 5287 this.set("start", seq_get(args, 0)) 5288 this.set("increment", seq_get(args, 1)) 5289 5290 self._match_r_paren() 5291 5292 return this 5293 5294 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5295 self._match_text_seq("LENGTH") 5296 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5297 5298 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5299 if self._match_text_seq("NULL"): 5300 return self.expression(exp.NotNullColumnConstraint) 5301 if self._match_text_seq("CASESPECIFIC"): 5302 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5303 if self._match_text_seq("FOR", "REPLICATION"): 5304 return self.expression(exp.NotForReplicationColumnConstraint) 5305 return None 5306 5307 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5308 if self._match(TokenType.CONSTRAINT): 5309 this = self._parse_id_var() 5310 else: 5311 this = None 5312 5313 if self._match_texts(self.CONSTRAINT_PARSERS): 5314 return self.expression( 5315 exp.ColumnConstraint, 5316 this=this, 5317 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5318 ) 5319 5320 return this 5321 5322 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5323 if not self._match(TokenType.CONSTRAINT): 5324 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5325 5326 return self.expression( 5327 exp.Constraint, 5328 this=self._parse_id_var(), 5329 expressions=self._parse_unnamed_constraints(), 5330 ) 5331 5332 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5333 constraints = [] 5334 while True: 5335 constraint = self._parse_unnamed_constraint() or self._parse_function() 5336 if not constraint: 5337 break 5338 constraints.append(constraint) 5339 5340 return constraints 5341 5342 def _parse_unnamed_constraint( 5343 self, constraints: t.Optional[t.Collection[str]] = None 5344 ) -> t.Optional[exp.Expression]: 5345 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5346 constraints or self.CONSTRAINT_PARSERS 5347 ): 5348 return None 5349 5350 constraint = self._prev.text.upper() 5351 if constraint not in self.CONSTRAINT_PARSERS: 5352 self.raise_error(f"No parser found for schema constraint {constraint}.") 5353 5354 return self.CONSTRAINT_PARSERS[constraint](self) 5355 5356 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5357 return self._parse_id_var(any_token=False) 5358 5359 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5360 self._match_text_seq("KEY") 5361 return self.expression( 5362 exp.UniqueColumnConstraint, 5363 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5364 this=self._parse_schema(self._parse_unique_key()), 5365 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5366 on_conflict=self._parse_on_conflict(), 5367 ) 5368 5369 def _parse_key_constraint_options(self) -> t.List[str]: 5370 options = [] 5371 while True: 5372 if not self._curr: 5373 break 5374 5375 if self._match(TokenType.ON): 5376 action = None 5377 on = self._advance_any() and self._prev.text 5378 5379 if self._match_text_seq("NO", "ACTION"): 5380 action = "NO ACTION" 5381 elif self._match_text_seq("CASCADE"): 5382 action = "CASCADE" 5383 elif self._match_text_seq("RESTRICT"): 5384 action = "RESTRICT" 5385 elif self._match_pair(TokenType.SET, TokenType.NULL): 5386 action = "SET NULL" 5387 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5388 action = "SET DEFAULT" 5389 else: 5390 self.raise_error("Invalid key constraint") 5391 5392 options.append(f"ON {on} {action}") 5393 else: 5394 var = self._parse_var_from_options( 5395 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5396 ) 5397 if not var: 5398 break 5399 options.append(var.name) 5400 5401 return options 5402 5403 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5404 if match and not self._match(TokenType.REFERENCES): 5405 return None 5406 5407 expressions = None 5408 this = self._parse_table(schema=True) 5409 options = self._parse_key_constraint_options() 5410 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5411 5412 def _parse_foreign_key(self) -> exp.ForeignKey: 5413 expressions = self._parse_wrapped_id_vars() 5414 reference = self._parse_references() 5415 options = {} 5416 5417 while self._match(TokenType.ON): 5418 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5419 self.raise_error("Expected DELETE or UPDATE") 5420 5421 kind = self._prev.text.lower() 5422 5423 if self._match_text_seq("NO", "ACTION"): 5424 action = "NO ACTION" 5425 elif self._match(TokenType.SET): 5426 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5427 action = "SET " + self._prev.text.upper() 5428 else: 5429 self._advance() 5430 action = self._prev.text.upper() 5431 5432 options[kind] = action 5433 5434 return self.expression( 5435 exp.ForeignKey, 5436 expressions=expressions, 5437 reference=reference, 5438 **options, # type: ignore 5439 ) 5440 5441 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5442 return self._parse_field() 5443 5444 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5445 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5446 self._retreat(self._index - 1) 5447 return None 5448 5449 id_vars = self._parse_wrapped_id_vars() 5450 return self.expression( 5451 exp.PeriodForSystemTimeConstraint, 5452 this=seq_get(id_vars, 0), 5453 expression=seq_get(id_vars, 1), 5454 ) 5455 5456 def _parse_primary_key( 5457 self, wrapped_optional: bool = False, in_props: bool = False 5458 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5459 desc = ( 5460 self._match_set((TokenType.ASC, TokenType.DESC)) 5461 and self._prev.token_type == TokenType.DESC 5462 ) 5463 5464 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5465 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5466 5467 expressions = self._parse_wrapped_csv( 5468 self._parse_primary_key_part, optional=wrapped_optional 5469 ) 5470 options = self._parse_key_constraint_options() 5471 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5472 5473 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5474 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5475 5476 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5477 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5478 return this 5479 5480 bracket_kind = self._prev.token_type 5481 expressions = self._parse_csv( 5482 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5483 ) 5484 5485 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5486 self.raise_error("Expected ]") 5487 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5488 self.raise_error("Expected }") 5489 5490 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5491 if bracket_kind == TokenType.L_BRACE: 5492 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5493 elif not this: 5494 this = build_array_constructor( 5495 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5496 ) 5497 else: 5498 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5499 if constructor_type: 5500 return build_array_constructor( 5501 constructor_type, 5502 args=expressions, 5503 bracket_kind=bracket_kind, 5504 dialect=self.dialect, 5505 ) 5506 5507 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5508 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5509 5510 self._add_comments(this) 5511 return self._parse_bracket(this) 5512 5513 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5514 if self._match(TokenType.COLON): 5515 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5516 return this 5517 5518 def _parse_case(self) -> t.Optional[exp.Expression]: 5519 ifs = [] 5520 default = None 5521 5522 comments = self._prev_comments 5523 expression = self._parse_assignment() 5524 5525 while self._match(TokenType.WHEN): 5526 this = self._parse_assignment() 5527 self._match(TokenType.THEN) 5528 then = self._parse_assignment() 5529 ifs.append(self.expression(exp.If, this=this, true=then)) 5530 5531 if self._match(TokenType.ELSE): 5532 default = self._parse_assignment() 5533 5534 if not self._match(TokenType.END): 5535 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5536 default = exp.column("interval") 5537 else: 5538 self.raise_error("Expected END after CASE", self._prev) 5539 5540 return self.expression( 5541 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5542 ) 5543 5544 def _parse_if(self) -> t.Optional[exp.Expression]: 5545 if self._match(TokenType.L_PAREN): 5546 args = self._parse_csv(self._parse_assignment) 5547 this = self.validate_expression(exp.If.from_arg_list(args), args) 5548 self._match_r_paren() 5549 else: 5550 index = self._index - 1 5551 5552 if self.NO_PAREN_IF_COMMANDS and index == 0: 5553 return self._parse_as_command(self._prev) 5554 5555 condition = self._parse_assignment() 5556 5557 if not condition: 5558 self._retreat(index) 5559 return None 5560 5561 self._match(TokenType.THEN) 5562 true = self._parse_assignment() 5563 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5564 self._match(TokenType.END) 5565 this = self.expression(exp.If, this=condition, true=true, false=false) 5566 5567 return this 5568 5569 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5570 if not self._match_text_seq("VALUE", "FOR"): 5571 self._retreat(self._index - 1) 5572 return None 5573 5574 return self.expression( 5575 exp.NextValueFor, 5576 this=self._parse_column(), 5577 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5578 ) 5579 5580 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5581 this = self._parse_function() or self._parse_var_or_string(upper=True) 5582 5583 if self._match(TokenType.FROM): 5584 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5585 5586 if not self._match(TokenType.COMMA): 5587 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5588 5589 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5590 5591 def _parse_gap_fill(self) -> exp.GapFill: 5592 self._match(TokenType.TABLE) 5593 this = self._parse_table() 5594 5595 self._match(TokenType.COMMA) 5596 args = [this, *self._parse_csv(self._parse_lambda)] 5597 5598 gap_fill = exp.GapFill.from_arg_list(args) 5599 return self.validate_expression(gap_fill, args) 5600 5601 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5602 this = self._parse_assignment() 5603 5604 if not self._match(TokenType.ALIAS): 5605 if self._match(TokenType.COMMA): 5606 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5607 5608 self.raise_error("Expected AS after CAST") 5609 5610 fmt = None 5611 to = self._parse_types() 5612 5613 if self._match(TokenType.FORMAT): 5614 fmt_string = self._parse_string() 5615 fmt = self._parse_at_time_zone(fmt_string) 5616 5617 if not to: 5618 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5619 if to.this in exp.DataType.TEMPORAL_TYPES: 5620 this = self.expression( 5621 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5622 this=this, 5623 format=exp.Literal.string( 5624 format_time( 5625 fmt_string.this if fmt_string else "", 5626 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5627 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5628 ) 5629 ), 5630 safe=safe, 5631 ) 5632 5633 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5634 this.set("zone", fmt.args["zone"]) 5635 return this 5636 elif not to: 5637 self.raise_error("Expected TYPE after CAST") 5638 elif isinstance(to, exp.Identifier): 5639 to = exp.DataType.build(to.name, udt=True) 5640 elif to.this == exp.DataType.Type.CHAR: 5641 if self._match(TokenType.CHARACTER_SET): 5642 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5643 5644 return self.expression( 5645 exp.Cast if strict else exp.TryCast, 5646 this=this, 5647 to=to, 5648 format=fmt, 5649 safe=safe, 5650 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5651 ) 5652 5653 def _parse_string_agg(self) -> exp.Expression: 5654 if self._match(TokenType.DISTINCT): 5655 args: t.List[t.Optional[exp.Expression]] = [ 5656 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5657 ] 5658 if self._match(TokenType.COMMA): 5659 args.extend(self._parse_csv(self._parse_assignment)) 5660 else: 5661 args = self._parse_csv(self._parse_assignment) # type: ignore 5662 5663 index = self._index 5664 if not self._match(TokenType.R_PAREN) and args: 5665 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5666 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5667 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5668 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5669 5670 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5671 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5672 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5673 if not self._match_text_seq("WITHIN", "GROUP"): 5674 self._retreat(index) 5675 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5676 5677 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5678 order = self._parse_order(this=seq_get(args, 0)) 5679 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5680 5681 def _parse_convert( 5682 self, strict: bool, safe: t.Optional[bool] = None 5683 ) -> t.Optional[exp.Expression]: 5684 this = self._parse_bitwise() 5685 5686 if self._match(TokenType.USING): 5687 to: t.Optional[exp.Expression] = self.expression( 5688 exp.CharacterSet, this=self._parse_var() 5689 ) 5690 elif self._match(TokenType.COMMA): 5691 to = self._parse_types() 5692 else: 5693 to = None 5694 5695 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5696 5697 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5698 """ 5699 There are generally two variants of the DECODE function: 5700 5701 - DECODE(bin, charset) 5702 - DECODE(expression, search, result [, search, result] ... [, default]) 5703 5704 The second variant will always be parsed into a CASE expression. Note that NULL 5705 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5706 instead of relying on pattern matching. 5707 """ 5708 args = self._parse_csv(self._parse_assignment) 5709 5710 if len(args) < 3: 5711 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5712 5713 expression, *expressions = args 5714 if not expression: 5715 return None 5716 5717 ifs = [] 5718 for search, result in zip(expressions[::2], expressions[1::2]): 5719 if not search or not result: 5720 return None 5721 5722 if isinstance(search, exp.Literal): 5723 ifs.append( 5724 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5725 ) 5726 elif isinstance(search, exp.Null): 5727 ifs.append( 5728 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5729 ) 5730 else: 5731 cond = exp.or_( 5732 exp.EQ(this=expression.copy(), expression=search), 5733 exp.and_( 5734 exp.Is(this=expression.copy(), expression=exp.Null()), 5735 exp.Is(this=search.copy(), expression=exp.Null()), 5736 copy=False, 5737 ), 5738 copy=False, 5739 ) 5740 ifs.append(exp.If(this=cond, true=result)) 5741 5742 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5743 5744 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5745 self._match_text_seq("KEY") 5746 key = self._parse_column() 5747 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5748 self._match_text_seq("VALUE") 5749 value = self._parse_bitwise() 5750 5751 if not key and not value: 5752 return None 5753 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5754 5755 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5756 if not this or not self._match_text_seq("FORMAT", "JSON"): 5757 return this 5758 5759 return self.expression(exp.FormatJson, this=this) 5760 5761 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5762 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5763 for value in values: 5764 if self._match_text_seq(value, "ON", on): 5765 return f"{value} ON {on}" 5766 5767 return None 5768 5769 @t.overload 5770 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5771 5772 @t.overload 5773 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5774 5775 def _parse_json_object(self, agg=False): 5776 star = self._parse_star() 5777 expressions = ( 5778 [star] 5779 if star 5780 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5781 ) 5782 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5783 5784 unique_keys = None 5785 if self._match_text_seq("WITH", "UNIQUE"): 5786 unique_keys = True 5787 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5788 unique_keys = False 5789 5790 self._match_text_seq("KEYS") 5791 5792 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5793 self._parse_type() 5794 ) 5795 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5796 5797 return self.expression( 5798 exp.JSONObjectAgg if agg else exp.JSONObject, 5799 expressions=expressions, 5800 null_handling=null_handling, 5801 unique_keys=unique_keys, 5802 return_type=return_type, 5803 encoding=encoding, 5804 ) 5805 5806 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5807 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5808 if not self._match_text_seq("NESTED"): 5809 this = self._parse_id_var() 5810 kind = self._parse_types(allow_identifiers=False) 5811 nested = None 5812 else: 5813 this = None 5814 kind = None 5815 nested = True 5816 5817 path = self._match_text_seq("PATH") and self._parse_string() 5818 nested_schema = nested and self._parse_json_schema() 5819 5820 return self.expression( 5821 exp.JSONColumnDef, 5822 this=this, 5823 kind=kind, 5824 path=path, 5825 nested_schema=nested_schema, 5826 ) 5827 5828 def _parse_json_schema(self) -> exp.JSONSchema: 5829 self._match_text_seq("COLUMNS") 5830 return self.expression( 5831 exp.JSONSchema, 5832 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5833 ) 5834 5835 def _parse_json_table(self) -> exp.JSONTable: 5836 this = self._parse_format_json(self._parse_bitwise()) 5837 path = self._match(TokenType.COMMA) and self._parse_string() 5838 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5839 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5840 schema = self._parse_json_schema() 5841 5842 return exp.JSONTable( 5843 this=this, 5844 schema=schema, 5845 path=path, 5846 error_handling=error_handling, 5847 empty_handling=empty_handling, 5848 ) 5849 5850 def _parse_match_against(self) -> exp.MatchAgainst: 5851 expressions = self._parse_csv(self._parse_column) 5852 5853 self._match_text_seq(")", "AGAINST", "(") 5854 5855 this = self._parse_string() 5856 5857 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5858 modifier = "IN NATURAL LANGUAGE MODE" 5859 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5860 modifier = f"{modifier} WITH QUERY EXPANSION" 5861 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5862 modifier = "IN BOOLEAN MODE" 5863 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5864 modifier = "WITH QUERY EXPANSION" 5865 else: 5866 modifier = None 5867 5868 return self.expression( 5869 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5870 ) 5871 5872 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5873 def _parse_open_json(self) -> exp.OpenJSON: 5874 this = self._parse_bitwise() 5875 path = self._match(TokenType.COMMA) and self._parse_string() 5876 5877 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5878 this = self._parse_field(any_token=True) 5879 kind = self._parse_types() 5880 path = self._parse_string() 5881 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5882 5883 return self.expression( 5884 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5885 ) 5886 5887 expressions = None 5888 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5889 self._match_l_paren() 5890 expressions = self._parse_csv(_parse_open_json_column_def) 5891 5892 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5893 5894 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5895 args = self._parse_csv(self._parse_bitwise) 5896 5897 if self._match(TokenType.IN): 5898 return self.expression( 5899 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5900 ) 5901 5902 if haystack_first: 5903 haystack = seq_get(args, 0) 5904 needle = seq_get(args, 1) 5905 else: 5906 needle = seq_get(args, 0) 5907 haystack = seq_get(args, 1) 5908 5909 return self.expression( 5910 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5911 ) 5912 5913 def _parse_predict(self) -> exp.Predict: 5914 self._match_text_seq("MODEL") 5915 this = self._parse_table() 5916 5917 self._match(TokenType.COMMA) 5918 self._match_text_seq("TABLE") 5919 5920 return self.expression( 5921 exp.Predict, 5922 this=this, 5923 expression=self._parse_table(), 5924 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5925 ) 5926 5927 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5928 args = self._parse_csv(self._parse_table) 5929 return exp.JoinHint(this=func_name.upper(), expressions=args) 5930 5931 def _parse_substring(self) -> exp.Substring: 5932 # Postgres supports the form: substring(string [from int] [for int]) 5933 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5934 5935 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5936 5937 if self._match(TokenType.FROM): 5938 args.append(self._parse_bitwise()) 5939 if self._match(TokenType.FOR): 5940 if len(args) == 1: 5941 args.append(exp.Literal.number(1)) 5942 args.append(self._parse_bitwise()) 5943 5944 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5945 5946 def _parse_trim(self) -> exp.Trim: 5947 # https://www.w3resource.com/sql/character-functions/trim.php 5948 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5949 5950 position = None 5951 collation = None 5952 expression = None 5953 5954 if self._match_texts(self.TRIM_TYPES): 5955 position = self._prev.text.upper() 5956 5957 this = self._parse_bitwise() 5958 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5959 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5960 expression = self._parse_bitwise() 5961 5962 if invert_order: 5963 this, expression = expression, this 5964 5965 if self._match(TokenType.COLLATE): 5966 collation = self._parse_bitwise() 5967 5968 return self.expression( 5969 exp.Trim, this=this, position=position, expression=expression, collation=collation 5970 ) 5971 5972 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5973 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5974 5975 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5976 return self._parse_window(self._parse_id_var(), alias=True) 5977 5978 def _parse_respect_or_ignore_nulls( 5979 self, this: t.Optional[exp.Expression] 5980 ) -> t.Optional[exp.Expression]: 5981 if self._match_text_seq("IGNORE", "NULLS"): 5982 return self.expression(exp.IgnoreNulls, this=this) 5983 if self._match_text_seq("RESPECT", "NULLS"): 5984 return self.expression(exp.RespectNulls, this=this) 5985 return this 5986 5987 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5988 if self._match(TokenType.HAVING): 5989 self._match_texts(("MAX", "MIN")) 5990 max = self._prev.text.upper() != "MIN" 5991 return self.expression( 5992 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5993 ) 5994 5995 return this 5996 5997 def _parse_window( 5998 self, this: t.Optional[exp.Expression], alias: bool = False 5999 ) -> t.Optional[exp.Expression]: 6000 func = this 6001 comments = func.comments if isinstance(func, exp.Expression) else None 6002 6003 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6004 self._match(TokenType.WHERE) 6005 this = self.expression( 6006 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6007 ) 6008 self._match_r_paren() 6009 6010 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6011 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6012 if self._match_text_seq("WITHIN", "GROUP"): 6013 order = self._parse_wrapped(self._parse_order) 6014 this = self.expression(exp.WithinGroup, this=this, expression=order) 6015 6016 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6017 # Some dialects choose to implement and some do not. 6018 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6019 6020 # There is some code above in _parse_lambda that handles 6021 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6022 6023 # The below changes handle 6024 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6025 6026 # Oracle allows both formats 6027 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6028 # and Snowflake chose to do the same for familiarity 6029 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6030 if isinstance(this, exp.AggFunc): 6031 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6032 6033 if ignore_respect and ignore_respect is not this: 6034 ignore_respect.replace(ignore_respect.this) 6035 this = self.expression(ignore_respect.__class__, this=this) 6036 6037 this = self._parse_respect_or_ignore_nulls(this) 6038 6039 # bigquery select from window x AS (partition by ...) 6040 if alias: 6041 over = None 6042 self._match(TokenType.ALIAS) 6043 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6044 return this 6045 else: 6046 over = self._prev.text.upper() 6047 6048 if comments and isinstance(func, exp.Expression): 6049 func.pop_comments() 6050 6051 if not self._match(TokenType.L_PAREN): 6052 return self.expression( 6053 exp.Window, 6054 comments=comments, 6055 this=this, 6056 alias=self._parse_id_var(False), 6057 over=over, 6058 ) 6059 6060 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6061 6062 first = self._match(TokenType.FIRST) 6063 if self._match_text_seq("LAST"): 6064 first = False 6065 6066 partition, order = self._parse_partition_and_order() 6067 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6068 6069 if kind: 6070 self._match(TokenType.BETWEEN) 6071 start = self._parse_window_spec() 6072 self._match(TokenType.AND) 6073 end = self._parse_window_spec() 6074 6075 spec = self.expression( 6076 exp.WindowSpec, 6077 kind=kind, 6078 start=start["value"], 6079 start_side=start["side"], 6080 end=end["value"], 6081 end_side=end["side"], 6082 ) 6083 else: 6084 spec = None 6085 6086 self._match_r_paren() 6087 6088 window = self.expression( 6089 exp.Window, 6090 comments=comments, 6091 this=this, 6092 partition_by=partition, 6093 order=order, 6094 spec=spec, 6095 alias=window_alias, 6096 over=over, 6097 first=first, 6098 ) 6099 6100 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6101 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6102 return self._parse_window(window, alias=alias) 6103 6104 return window 6105 6106 def _parse_partition_and_order( 6107 self, 6108 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6109 return self._parse_partition_by(), self._parse_order() 6110 6111 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6112 self._match(TokenType.BETWEEN) 6113 6114 return { 6115 "value": ( 6116 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6117 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6118 or self._parse_bitwise() 6119 ), 6120 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6121 } 6122 6123 def _parse_alias( 6124 self, this: t.Optional[exp.Expression], explicit: bool = False 6125 ) -> t.Optional[exp.Expression]: 6126 any_token = self._match(TokenType.ALIAS) 6127 comments = self._prev_comments or [] 6128 6129 if explicit and not any_token: 6130 return this 6131 6132 if self._match(TokenType.L_PAREN): 6133 aliases = self.expression( 6134 exp.Aliases, 6135 comments=comments, 6136 this=this, 6137 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6138 ) 6139 self._match_r_paren(aliases) 6140 return aliases 6141 6142 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6143 self.STRING_ALIASES and self._parse_string_as_identifier() 6144 ) 6145 6146 if alias: 6147 comments.extend(alias.pop_comments()) 6148 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6149 column = this.this 6150 6151 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6152 if not this.comments and column and column.comments: 6153 this.comments = column.pop_comments() 6154 6155 return this 6156 6157 def _parse_id_var( 6158 self, 6159 any_token: bool = True, 6160 tokens: t.Optional[t.Collection[TokenType]] = None, 6161 ) -> t.Optional[exp.Expression]: 6162 expression = self._parse_identifier() 6163 if not expression and ( 6164 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6165 ): 6166 quoted = self._prev.token_type == TokenType.STRING 6167 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6168 6169 return expression 6170 6171 def _parse_string(self) -> t.Optional[exp.Expression]: 6172 if self._match_set(self.STRING_PARSERS): 6173 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6174 return self._parse_placeholder() 6175 6176 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6177 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6178 6179 def _parse_number(self) -> t.Optional[exp.Expression]: 6180 if self._match_set(self.NUMERIC_PARSERS): 6181 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6182 return self._parse_placeholder() 6183 6184 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6185 if self._match(TokenType.IDENTIFIER): 6186 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6187 return self._parse_placeholder() 6188 6189 def _parse_var( 6190 self, 6191 any_token: bool = False, 6192 tokens: t.Optional[t.Collection[TokenType]] = None, 6193 upper: bool = False, 6194 ) -> t.Optional[exp.Expression]: 6195 if ( 6196 (any_token and self._advance_any()) 6197 or self._match(TokenType.VAR) 6198 or (self._match_set(tokens) if tokens else False) 6199 ): 6200 return self.expression( 6201 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6202 ) 6203 return self._parse_placeholder() 6204 6205 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6206 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6207 self._advance() 6208 return self._prev 6209 return None 6210 6211 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6212 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6213 6214 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6215 return self._parse_primary() or self._parse_var(any_token=True) 6216 6217 def _parse_null(self) -> t.Optional[exp.Expression]: 6218 if self._match_set(self.NULL_TOKENS): 6219 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6220 return self._parse_placeholder() 6221 6222 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6223 if self._match(TokenType.TRUE): 6224 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6225 if self._match(TokenType.FALSE): 6226 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6227 return self._parse_placeholder() 6228 6229 def _parse_star(self) -> t.Optional[exp.Expression]: 6230 if self._match(TokenType.STAR): 6231 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6232 return self._parse_placeholder() 6233 6234 def _parse_parameter(self) -> exp.Parameter: 6235 this = self._parse_identifier() or self._parse_primary_or_var() 6236 return self.expression(exp.Parameter, this=this) 6237 6238 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6239 if self._match_set(self.PLACEHOLDER_PARSERS): 6240 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6241 if placeholder: 6242 return placeholder 6243 self._advance(-1) 6244 return None 6245 6246 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6247 if not self._match_texts(keywords): 6248 return None 6249 if self._match(TokenType.L_PAREN, advance=False): 6250 return self._parse_wrapped_csv(self._parse_expression) 6251 6252 expression = self._parse_expression() 6253 return [expression] if expression else None 6254 6255 def _parse_csv( 6256 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6257 ) -> t.List[exp.Expression]: 6258 parse_result = parse_method() 6259 items = [parse_result] if parse_result is not None else [] 6260 6261 while self._match(sep): 6262 self._add_comments(parse_result) 6263 parse_result = parse_method() 6264 if parse_result is not None: 6265 items.append(parse_result) 6266 6267 return items 6268 6269 def _parse_tokens( 6270 self, parse_method: t.Callable, expressions: t.Dict 6271 ) -> t.Optional[exp.Expression]: 6272 this = parse_method() 6273 6274 while self._match_set(expressions): 6275 this = self.expression( 6276 expressions[self._prev.token_type], 6277 this=this, 6278 comments=self._prev_comments, 6279 expression=parse_method(), 6280 ) 6281 6282 return this 6283 6284 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6285 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6286 6287 def _parse_wrapped_csv( 6288 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6289 ) -> t.List[exp.Expression]: 6290 return self._parse_wrapped( 6291 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6292 ) 6293 6294 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6295 wrapped = self._match(TokenType.L_PAREN) 6296 if not wrapped and not optional: 6297 self.raise_error("Expecting (") 6298 parse_result = parse_method() 6299 if wrapped: 6300 self._match_r_paren() 6301 return parse_result 6302 6303 def _parse_expressions(self) -> t.List[exp.Expression]: 6304 return self._parse_csv(self._parse_expression) 6305 6306 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6307 return self._parse_select() or self._parse_set_operations( 6308 self._parse_expression() if alias else self._parse_assignment() 6309 ) 6310 6311 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6312 return self._parse_query_modifiers( 6313 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6314 ) 6315 6316 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6317 this = None 6318 if self._match_texts(self.TRANSACTION_KIND): 6319 this = self._prev.text 6320 6321 self._match_texts(("TRANSACTION", "WORK")) 6322 6323 modes = [] 6324 while True: 6325 mode = [] 6326 while self._match(TokenType.VAR): 6327 mode.append(self._prev.text) 6328 6329 if mode: 6330 modes.append(" ".join(mode)) 6331 if not self._match(TokenType.COMMA): 6332 break 6333 6334 return self.expression(exp.Transaction, this=this, modes=modes) 6335 6336 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6337 chain = None 6338 savepoint = None 6339 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6340 6341 self._match_texts(("TRANSACTION", "WORK")) 6342 6343 if self._match_text_seq("TO"): 6344 self._match_text_seq("SAVEPOINT") 6345 savepoint = self._parse_id_var() 6346 6347 if self._match(TokenType.AND): 6348 chain = not self._match_text_seq("NO") 6349 self._match_text_seq("CHAIN") 6350 6351 if is_rollback: 6352 return self.expression(exp.Rollback, savepoint=savepoint) 6353 6354 return self.expression(exp.Commit, chain=chain) 6355 6356 def _parse_refresh(self) -> exp.Refresh: 6357 self._match(TokenType.TABLE) 6358 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6359 6360 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6361 if not self._match_text_seq("ADD"): 6362 return None 6363 6364 self._match(TokenType.COLUMN) 6365 exists_column = self._parse_exists(not_=True) 6366 expression = self._parse_field_def() 6367 6368 if expression: 6369 expression.set("exists", exists_column) 6370 6371 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6372 if self._match_texts(("FIRST", "AFTER")): 6373 position = self._prev.text 6374 column_position = self.expression( 6375 exp.ColumnPosition, this=self._parse_column(), position=position 6376 ) 6377 expression.set("position", column_position) 6378 6379 return expression 6380 6381 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6382 drop = self._match(TokenType.DROP) and self._parse_drop() 6383 if drop and not isinstance(drop, exp.Command): 6384 drop.set("kind", drop.args.get("kind", "COLUMN")) 6385 return drop 6386 6387 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6388 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6389 return self.expression( 6390 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6391 ) 6392 6393 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6394 index = self._index - 1 6395 6396 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6397 return self._parse_csv( 6398 lambda: self.expression( 6399 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6400 ) 6401 ) 6402 6403 self._retreat(index) 6404 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6405 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6406 6407 if self._match_text_seq("ADD", "COLUMNS"): 6408 schema = self._parse_schema() 6409 if schema: 6410 return [schema] 6411 return [] 6412 6413 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6414 6415 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6416 if self._match_texts(self.ALTER_ALTER_PARSERS): 6417 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6418 6419 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6420 # keyword after ALTER we default to parsing this statement 6421 self._match(TokenType.COLUMN) 6422 column = self._parse_field(any_token=True) 6423 6424 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6425 return self.expression(exp.AlterColumn, this=column, drop=True) 6426 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6427 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6428 if self._match(TokenType.COMMENT): 6429 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6430 if self._match_text_seq("DROP", "NOT", "NULL"): 6431 return self.expression( 6432 exp.AlterColumn, 6433 this=column, 6434 drop=True, 6435 allow_null=True, 6436 ) 6437 if self._match_text_seq("SET", "NOT", "NULL"): 6438 return self.expression( 6439 exp.AlterColumn, 6440 this=column, 6441 allow_null=False, 6442 ) 6443 self._match_text_seq("SET", "DATA") 6444 self._match_text_seq("TYPE") 6445 return self.expression( 6446 exp.AlterColumn, 6447 this=column, 6448 dtype=self._parse_types(), 6449 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6450 using=self._match(TokenType.USING) and self._parse_assignment(), 6451 ) 6452 6453 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6454 if self._match_texts(("ALL", "EVEN", "AUTO")): 6455 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6456 6457 self._match_text_seq("KEY", "DISTKEY") 6458 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6459 6460 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6461 if compound: 6462 self._match_text_seq("SORTKEY") 6463 6464 if self._match(TokenType.L_PAREN, advance=False): 6465 return self.expression( 6466 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6467 ) 6468 6469 self._match_texts(("AUTO", "NONE")) 6470 return self.expression( 6471 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6472 ) 6473 6474 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6475 index = self._index - 1 6476 6477 partition_exists = self._parse_exists() 6478 if self._match(TokenType.PARTITION, advance=False): 6479 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6480 6481 self._retreat(index) 6482 return self._parse_csv(self._parse_drop_column) 6483 6484 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6485 if self._match(TokenType.COLUMN): 6486 exists = self._parse_exists() 6487 old_column = self._parse_column() 6488 to = self._match_text_seq("TO") 6489 new_column = self._parse_column() 6490 6491 if old_column is None or to is None or new_column is None: 6492 return None 6493 6494 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6495 6496 self._match_text_seq("TO") 6497 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6498 6499 def _parse_alter_table_set(self) -> exp.AlterSet: 6500 alter_set = self.expression(exp.AlterSet) 6501 6502 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6503 "TABLE", "PROPERTIES" 6504 ): 6505 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6506 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6507 alter_set.set("expressions", [self._parse_assignment()]) 6508 elif self._match_texts(("LOGGED", "UNLOGGED")): 6509 alter_set.set("option", exp.var(self._prev.text.upper())) 6510 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6511 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6512 elif self._match_text_seq("LOCATION"): 6513 alter_set.set("location", self._parse_field()) 6514 elif self._match_text_seq("ACCESS", "METHOD"): 6515 alter_set.set("access_method", self._parse_field()) 6516 elif self._match_text_seq("TABLESPACE"): 6517 alter_set.set("tablespace", self._parse_field()) 6518 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6519 alter_set.set("file_format", [self._parse_field()]) 6520 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6521 alter_set.set("file_format", self._parse_wrapped_options()) 6522 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6523 alter_set.set("copy_options", self._parse_wrapped_options()) 6524 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6525 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6526 else: 6527 if self._match_text_seq("SERDE"): 6528 alter_set.set("serde", self._parse_field()) 6529 6530 alter_set.set("expressions", [self._parse_properties()]) 6531 6532 return alter_set 6533 6534 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6535 start = self._prev 6536 6537 if not self._match(TokenType.TABLE): 6538 return self._parse_as_command(start) 6539 6540 exists = self._parse_exists() 6541 only = self._match_text_seq("ONLY") 6542 this = self._parse_table(schema=True) 6543 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6544 6545 if self._next: 6546 self._advance() 6547 6548 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6549 if parser: 6550 actions = ensure_list(parser(self)) 6551 options = self._parse_csv(self._parse_property) 6552 6553 if not self._curr and actions: 6554 return self.expression( 6555 exp.AlterTable, 6556 this=this, 6557 exists=exists, 6558 actions=actions, 6559 only=only, 6560 options=options, 6561 cluster=cluster, 6562 ) 6563 6564 return self._parse_as_command(start) 6565 6566 def _parse_merge(self) -> exp.Merge: 6567 self._match(TokenType.INTO) 6568 target = self._parse_table() 6569 6570 if target and self._match(TokenType.ALIAS, advance=False): 6571 target.set("alias", self._parse_table_alias()) 6572 6573 self._match(TokenType.USING) 6574 using = self._parse_table() 6575 6576 self._match(TokenType.ON) 6577 on = self._parse_assignment() 6578 6579 return self.expression( 6580 exp.Merge, 6581 this=target, 6582 using=using, 6583 on=on, 6584 expressions=self._parse_when_matched(), 6585 ) 6586 6587 def _parse_when_matched(self) -> t.List[exp.When]: 6588 whens = [] 6589 6590 while self._match(TokenType.WHEN): 6591 matched = not self._match(TokenType.NOT) 6592 self._match_text_seq("MATCHED") 6593 source = ( 6594 False 6595 if self._match_text_seq("BY", "TARGET") 6596 else self._match_text_seq("BY", "SOURCE") 6597 ) 6598 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6599 6600 self._match(TokenType.THEN) 6601 6602 if self._match(TokenType.INSERT): 6603 _this = self._parse_star() 6604 if _this: 6605 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6606 else: 6607 then = self.expression( 6608 exp.Insert, 6609 this=self._parse_value(), 6610 expression=self._match_text_seq("VALUES") and self._parse_value(), 6611 ) 6612 elif self._match(TokenType.UPDATE): 6613 expressions = self._parse_star() 6614 if expressions: 6615 then = self.expression(exp.Update, expressions=expressions) 6616 else: 6617 then = self.expression( 6618 exp.Update, 6619 expressions=self._match(TokenType.SET) 6620 and self._parse_csv(self._parse_equality), 6621 ) 6622 elif self._match(TokenType.DELETE): 6623 then = self.expression(exp.Var, this=self._prev.text) 6624 else: 6625 then = None 6626 6627 whens.append( 6628 self.expression( 6629 exp.When, 6630 matched=matched, 6631 source=source, 6632 condition=condition, 6633 then=then, 6634 ) 6635 ) 6636 return whens 6637 6638 def _parse_show(self) -> t.Optional[exp.Expression]: 6639 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6640 if parser: 6641 return parser(self) 6642 return self._parse_as_command(self._prev) 6643 6644 def _parse_set_item_assignment( 6645 self, kind: t.Optional[str] = None 6646 ) -> t.Optional[exp.Expression]: 6647 index = self._index 6648 6649 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6650 return self._parse_set_transaction(global_=kind == "GLOBAL") 6651 6652 left = self._parse_primary() or self._parse_column() 6653 assignment_delimiter = self._match_texts(("=", "TO")) 6654 6655 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6656 self._retreat(index) 6657 return None 6658 6659 right = self._parse_statement() or self._parse_id_var() 6660 if isinstance(right, (exp.Column, exp.Identifier)): 6661 right = exp.var(right.name) 6662 6663 this = self.expression(exp.EQ, this=left, expression=right) 6664 return self.expression(exp.SetItem, this=this, kind=kind) 6665 6666 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6667 self._match_text_seq("TRANSACTION") 6668 characteristics = self._parse_csv( 6669 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6670 ) 6671 return self.expression( 6672 exp.SetItem, 6673 expressions=characteristics, 6674 kind="TRANSACTION", 6675 **{"global": global_}, # type: ignore 6676 ) 6677 6678 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6679 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6680 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6681 6682 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6683 index = self._index 6684 set_ = self.expression( 6685 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6686 ) 6687 6688 if self._curr: 6689 self._retreat(index) 6690 return self._parse_as_command(self._prev) 6691 6692 return set_ 6693 6694 def _parse_var_from_options( 6695 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6696 ) -> t.Optional[exp.Var]: 6697 start = self._curr 6698 if not start: 6699 return None 6700 6701 option = start.text.upper() 6702 continuations = options.get(option) 6703 6704 index = self._index 6705 self._advance() 6706 for keywords in continuations or []: 6707 if isinstance(keywords, str): 6708 keywords = (keywords,) 6709 6710 if self._match_text_seq(*keywords): 6711 option = f"{option} {' '.join(keywords)}" 6712 break 6713 else: 6714 if continuations or continuations is None: 6715 if raise_unmatched: 6716 self.raise_error(f"Unknown option {option}") 6717 6718 self._retreat(index) 6719 return None 6720 6721 return exp.var(option) 6722 6723 def _parse_as_command(self, start: Token) -> exp.Command: 6724 while self._curr: 6725 self._advance() 6726 text = self._find_sql(start, self._prev) 6727 size = len(start.text) 6728 self._warn_unsupported() 6729 return exp.Command(this=text[:size], expression=text[size:]) 6730 6731 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6732 settings = [] 6733 6734 self._match_l_paren() 6735 kind = self._parse_id_var() 6736 6737 if self._match(TokenType.L_PAREN): 6738 while True: 6739 key = self._parse_id_var() 6740 value = self._parse_primary() 6741 6742 if not key and value is None: 6743 break 6744 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6745 self._match(TokenType.R_PAREN) 6746 6747 self._match_r_paren() 6748 6749 return self.expression( 6750 exp.DictProperty, 6751 this=this, 6752 kind=kind.this if kind else None, 6753 settings=settings, 6754 ) 6755 6756 def _parse_dict_range(self, this: str) -> exp.DictRange: 6757 self._match_l_paren() 6758 has_min = self._match_text_seq("MIN") 6759 if has_min: 6760 min = self._parse_var() or self._parse_primary() 6761 self._match_text_seq("MAX") 6762 max = self._parse_var() or self._parse_primary() 6763 else: 6764 max = self._parse_var() or self._parse_primary() 6765 min = exp.Literal.number(0) 6766 self._match_r_paren() 6767 return self.expression(exp.DictRange, this=this, min=min, max=max) 6768 6769 def _parse_comprehension( 6770 self, this: t.Optional[exp.Expression] 6771 ) -> t.Optional[exp.Comprehension]: 6772 index = self._index 6773 expression = self._parse_column() 6774 if not self._match(TokenType.IN): 6775 self._retreat(index - 1) 6776 return None 6777 iterator = self._parse_column() 6778 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6779 return self.expression( 6780 exp.Comprehension, 6781 this=this, 6782 expression=expression, 6783 iterator=iterator, 6784 condition=condition, 6785 ) 6786 6787 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6788 if self._match(TokenType.HEREDOC_STRING): 6789 return self.expression(exp.Heredoc, this=self._prev.text) 6790 6791 if not self._match_text_seq("$"): 6792 return None 6793 6794 tags = ["$"] 6795 tag_text = None 6796 6797 if self._is_connected(): 6798 self._advance() 6799 tags.append(self._prev.text.upper()) 6800 else: 6801 self.raise_error("No closing $ found") 6802 6803 if tags[-1] != "$": 6804 if self._is_connected() and self._match_text_seq("$"): 6805 tag_text = tags[-1] 6806 tags.append("$") 6807 else: 6808 self.raise_error("No closing $ found") 6809 6810 heredoc_start = self._curr 6811 6812 while self._curr: 6813 if self._match_text_seq(*tags, advance=False): 6814 this = self._find_sql(heredoc_start, self._prev) 6815 self._advance(len(tags)) 6816 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6817 6818 self._advance() 6819 6820 self.raise_error(f"No closing {''.join(tags)} found") 6821 return None 6822 6823 def _find_parser( 6824 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6825 ) -> t.Optional[t.Callable]: 6826 if not self._curr: 6827 return None 6828 6829 index = self._index 6830 this = [] 6831 while True: 6832 # The current token might be multiple words 6833 curr = self._curr.text.upper() 6834 key = curr.split(" ") 6835 this.append(curr) 6836 6837 self._advance() 6838 result, trie = in_trie(trie, key) 6839 if result == TrieResult.FAILED: 6840 break 6841 6842 if result == TrieResult.EXISTS: 6843 subparser = parsers[" ".join(this)] 6844 return subparser 6845 6846 self._retreat(index) 6847 return None 6848 6849 def _match(self, token_type, advance=True, expression=None): 6850 if not self._curr: 6851 return None 6852 6853 if self._curr.token_type == token_type: 6854 if advance: 6855 self._advance() 6856 self._add_comments(expression) 6857 return True 6858 6859 return None 6860 6861 def _match_set(self, types, advance=True): 6862 if not self._curr: 6863 return None 6864 6865 if self._curr.token_type in types: 6866 if advance: 6867 self._advance() 6868 return True 6869 6870 return None 6871 6872 def _match_pair(self, token_type_a, token_type_b, advance=True): 6873 if not self._curr or not self._next: 6874 return None 6875 6876 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6877 if advance: 6878 self._advance(2) 6879 return True 6880 6881 return None 6882 6883 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6884 if not self._match(TokenType.L_PAREN, expression=expression): 6885 self.raise_error("Expecting (") 6886 6887 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6888 if not self._match(TokenType.R_PAREN, expression=expression): 6889 self.raise_error("Expecting )") 6890 6891 def _match_texts(self, texts, advance=True): 6892 if self._curr and self._curr.text.upper() in texts: 6893 if advance: 6894 self._advance() 6895 return True 6896 return None 6897 6898 def _match_text_seq(self, *texts, advance=True): 6899 index = self._index 6900 for text in texts: 6901 if self._curr and self._curr.text.upper() == text: 6902 self._advance() 6903 else: 6904 self._retreat(index) 6905 return None 6906 6907 if not advance: 6908 self._retreat(index) 6909 6910 return True 6911 6912 def _replace_lambda( 6913 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6914 ) -> t.Optional[exp.Expression]: 6915 if not node: 6916 return node 6917 6918 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6919 6920 for column in node.find_all(exp.Column): 6921 typ = lambda_types.get(column.parts[0].name) 6922 if typ is not None: 6923 dot_or_id = column.to_dot() if column.table else column.this 6924 6925 if typ: 6926 dot_or_id = self.expression( 6927 exp.Cast, 6928 this=dot_or_id, 6929 to=typ, 6930 ) 6931 6932 parent = column.parent 6933 6934 while isinstance(parent, exp.Dot): 6935 if not isinstance(parent.parent, exp.Dot): 6936 parent.replace(dot_or_id) 6937 break 6938 parent = parent.parent 6939 else: 6940 if column is node: 6941 node = dot_or_id 6942 else: 6943 column.replace(dot_or_id) 6944 return node 6945 6946 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6947 start = self._prev 6948 6949 # Not to be confused with TRUNCATE(number, decimals) function call 6950 if self._match(TokenType.L_PAREN): 6951 self._retreat(self._index - 2) 6952 return self._parse_function() 6953 6954 # Clickhouse supports TRUNCATE DATABASE as well 6955 is_database = self._match(TokenType.DATABASE) 6956 6957 self._match(TokenType.TABLE) 6958 6959 exists = self._parse_exists(not_=False) 6960 6961 expressions = self._parse_csv( 6962 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6963 ) 6964 6965 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6966 6967 if self._match_text_seq("RESTART", "IDENTITY"): 6968 identity = "RESTART" 6969 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6970 identity = "CONTINUE" 6971 else: 6972 identity = None 6973 6974 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6975 option = self._prev.text 6976 else: 6977 option = None 6978 6979 partition = self._parse_partition() 6980 6981 # Fallback case 6982 if self._curr: 6983 return self._parse_as_command(start) 6984 6985 return self.expression( 6986 exp.TruncateTable, 6987 expressions=expressions, 6988 is_database=is_database, 6989 exists=exists, 6990 cluster=cluster, 6991 identity=identity, 6992 option=option, 6993 partition=partition, 6994 ) 6995 6996 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6997 this = self._parse_ordered(self._parse_opclass) 6998 6999 if not self._match(TokenType.WITH): 7000 return this 7001 7002 op = self._parse_var(any_token=True) 7003 7004 return self.expression(exp.WithOperator, this=this, op=op) 7005 7006 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7007 self._match(TokenType.EQ) 7008 self._match(TokenType.L_PAREN) 7009 7010 opts: t.List[t.Optional[exp.Expression]] = [] 7011 while self._curr and not self._match(TokenType.R_PAREN): 7012 if self._match_text_seq("FORMAT_NAME", "="): 7013 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7014 # so we parse it separately to use _parse_field() 7015 prop = self.expression( 7016 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7017 ) 7018 opts.append(prop) 7019 else: 7020 opts.append(self._parse_property()) 7021 7022 self._match(TokenType.COMMA) 7023 7024 return opts 7025 7026 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7027 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7028 7029 options = [] 7030 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7031 option = self._parse_var(any_token=True) 7032 prev = self._prev.text.upper() 7033 7034 # Different dialects might separate options and values by white space, "=" and "AS" 7035 self._match(TokenType.EQ) 7036 self._match(TokenType.ALIAS) 7037 7038 param = self.expression(exp.CopyParameter, this=option) 7039 7040 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7041 TokenType.L_PAREN, advance=False 7042 ): 7043 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7044 param.set("expressions", self._parse_wrapped_options()) 7045 elif prev == "FILE_FORMAT": 7046 # T-SQL's external file format case 7047 param.set("expression", self._parse_field()) 7048 else: 7049 param.set("expression", self._parse_unquoted_field()) 7050 7051 options.append(param) 7052 self._match(sep) 7053 7054 return options 7055 7056 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7057 expr = self.expression(exp.Credentials) 7058 7059 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7060 expr.set("storage", self._parse_field()) 7061 if self._match_text_seq("CREDENTIALS"): 7062 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7063 creds = ( 7064 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7065 ) 7066 expr.set("credentials", creds) 7067 if self._match_text_seq("ENCRYPTION"): 7068 expr.set("encryption", self._parse_wrapped_options()) 7069 if self._match_text_seq("IAM_ROLE"): 7070 expr.set("iam_role", self._parse_field()) 7071 if self._match_text_seq("REGION"): 7072 expr.set("region", self._parse_field()) 7073 7074 return expr 7075 7076 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7077 return self._parse_field() 7078 7079 def _parse_copy(self) -> exp.Copy | exp.Command: 7080 start = self._prev 7081 7082 self._match(TokenType.INTO) 7083 7084 this = ( 7085 self._parse_select(nested=True, parse_subquery_alias=False) 7086 if self._match(TokenType.L_PAREN, advance=False) 7087 else self._parse_table(schema=True) 7088 ) 7089 7090 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7091 7092 files = self._parse_csv(self._parse_file_location) 7093 credentials = self._parse_credentials() 7094 7095 self._match_text_seq("WITH") 7096 7097 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7098 7099 # Fallback case 7100 if self._curr: 7101 return self._parse_as_command(start) 7102 7103 return self.expression( 7104 exp.Copy, 7105 this=this, 7106 kind=kind, 7107 credentials=credentials, 7108 files=files, 7109 params=params, 7110 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
154class Parser(metaclass=_Parser): 155 """ 156 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 157 158 Args: 159 error_level: The desired error level. 160 Default: ErrorLevel.IMMEDIATE 161 error_message_context: The amount of context to capture from a query string when displaying 162 the error message (in number of characters). 163 Default: 100 164 max_errors: Maximum number of error messages to include in a raised ParseError. 165 This is only relevant if error_level is ErrorLevel.RAISE. 166 Default: 3 167 """ 168 169 FUNCTIONS: t.Dict[str, t.Callable] = { 170 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 171 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 172 "CONCAT": lambda args, dialect: exp.Concat( 173 expressions=args, 174 safe=not dialect.STRICT_STRING_CONCAT, 175 coalesce=dialect.CONCAT_COALESCE, 176 ), 177 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 178 expressions=args, 179 safe=not dialect.STRICT_STRING_CONCAT, 180 coalesce=dialect.CONCAT_COALESCE, 181 ), 182 "CONVERT_TIMEZONE": build_convert_timezone, 183 "DATE_TO_DATE_STR": lambda args: exp.Cast( 184 this=seq_get(args, 0), 185 to=exp.DataType(this=exp.DataType.Type.TEXT), 186 ), 187 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 188 start=seq_get(args, 0), 189 end=seq_get(args, 1), 190 interval=seq_get(args, 2) 191 or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 192 ), 193 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 194 "HEX": build_hex, 195 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 196 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 197 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 198 "LIKE": build_like, 199 "LOG": build_logarithm, 200 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 201 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 202 "LOWER": build_lower, 203 "LPAD": lambda args: build_pad(args), 204 "LEFTPAD": lambda args: build_pad(args), 205 "MOD": build_mod, 206 "RPAD": lambda args: build_pad(args, is_left=False), 207 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 208 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 209 if len(args) != 2 210 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 211 "TIME_TO_TIME_STR": lambda args: exp.Cast( 212 this=seq_get(args, 0), 213 to=exp.DataType(this=exp.DataType.Type.TEXT), 214 ), 215 "TO_HEX": build_hex, 216 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 217 this=exp.Cast( 218 this=seq_get(args, 0), 219 to=exp.DataType(this=exp.DataType.Type.TEXT), 220 ), 221 start=exp.Literal.number(1), 222 length=exp.Literal.number(10), 223 ), 224 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 225 "UPPER": build_upper, 226 "VAR_MAP": build_var_map, 227 } 228 229 NO_PAREN_FUNCTIONS = { 230 TokenType.CURRENT_DATE: exp.CurrentDate, 231 TokenType.CURRENT_DATETIME: exp.CurrentDate, 232 TokenType.CURRENT_TIME: exp.CurrentTime, 233 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 234 TokenType.CURRENT_USER: exp.CurrentUser, 235 } 236 237 STRUCT_TYPE_TOKENS = { 238 TokenType.NESTED, 239 TokenType.OBJECT, 240 TokenType.STRUCT, 241 } 242 243 NESTED_TYPE_TOKENS = { 244 TokenType.ARRAY, 245 TokenType.LIST, 246 TokenType.LOWCARDINALITY, 247 TokenType.MAP, 248 TokenType.NULLABLE, 249 *STRUCT_TYPE_TOKENS, 250 } 251 252 ENUM_TYPE_TOKENS = { 253 TokenType.ENUM, 254 TokenType.ENUM8, 255 TokenType.ENUM16, 256 } 257 258 AGGREGATE_TYPE_TOKENS = { 259 TokenType.AGGREGATEFUNCTION, 260 TokenType.SIMPLEAGGREGATEFUNCTION, 261 } 262 263 TYPE_TOKENS = { 264 TokenType.BIT, 265 TokenType.BOOLEAN, 266 TokenType.TINYINT, 267 TokenType.UTINYINT, 268 TokenType.SMALLINT, 269 TokenType.USMALLINT, 270 TokenType.INT, 271 TokenType.UINT, 272 TokenType.BIGINT, 273 TokenType.UBIGINT, 274 TokenType.INT128, 275 TokenType.UINT128, 276 TokenType.INT256, 277 TokenType.UINT256, 278 TokenType.MEDIUMINT, 279 TokenType.UMEDIUMINT, 280 TokenType.FIXEDSTRING, 281 TokenType.FLOAT, 282 TokenType.DOUBLE, 283 TokenType.CHAR, 284 TokenType.NCHAR, 285 TokenType.VARCHAR, 286 TokenType.NVARCHAR, 287 TokenType.BPCHAR, 288 TokenType.TEXT, 289 TokenType.MEDIUMTEXT, 290 TokenType.LONGTEXT, 291 TokenType.MEDIUMBLOB, 292 TokenType.LONGBLOB, 293 TokenType.BINARY, 294 TokenType.VARBINARY, 295 TokenType.JSON, 296 TokenType.JSONB, 297 TokenType.INTERVAL, 298 TokenType.TINYBLOB, 299 TokenType.TINYTEXT, 300 TokenType.TIME, 301 TokenType.TIMETZ, 302 TokenType.TIMESTAMP, 303 TokenType.TIMESTAMP_S, 304 TokenType.TIMESTAMP_MS, 305 TokenType.TIMESTAMP_NS, 306 TokenType.TIMESTAMPTZ, 307 TokenType.TIMESTAMPLTZ, 308 TokenType.TIMESTAMPNTZ, 309 TokenType.DATETIME, 310 TokenType.DATETIME64, 311 TokenType.DATE, 312 TokenType.DATE32, 313 TokenType.INT4RANGE, 314 TokenType.INT4MULTIRANGE, 315 TokenType.INT8RANGE, 316 TokenType.INT8MULTIRANGE, 317 TokenType.NUMRANGE, 318 TokenType.NUMMULTIRANGE, 319 TokenType.TSRANGE, 320 TokenType.TSMULTIRANGE, 321 TokenType.TSTZRANGE, 322 TokenType.TSTZMULTIRANGE, 323 TokenType.DATERANGE, 324 TokenType.DATEMULTIRANGE, 325 TokenType.DECIMAL, 326 TokenType.UDECIMAL, 327 TokenType.BIGDECIMAL, 328 TokenType.UUID, 329 TokenType.GEOGRAPHY, 330 TokenType.GEOMETRY, 331 TokenType.HLLSKETCH, 332 TokenType.HSTORE, 333 TokenType.PSEUDO_TYPE, 334 TokenType.SUPER, 335 TokenType.SERIAL, 336 TokenType.SMALLSERIAL, 337 TokenType.BIGSERIAL, 338 TokenType.XML, 339 TokenType.YEAR, 340 TokenType.UNIQUEIDENTIFIER, 341 TokenType.USERDEFINED, 342 TokenType.MONEY, 343 TokenType.SMALLMONEY, 344 TokenType.ROWVERSION, 345 TokenType.IMAGE, 346 TokenType.VARIANT, 347 TokenType.VECTOR, 348 TokenType.OBJECT, 349 TokenType.OBJECT_IDENTIFIER, 350 TokenType.INET, 351 TokenType.IPADDRESS, 352 TokenType.IPPREFIX, 353 TokenType.IPV4, 354 TokenType.IPV6, 355 TokenType.UNKNOWN, 356 TokenType.NULL, 357 TokenType.NAME, 358 TokenType.TDIGEST, 359 *ENUM_TYPE_TOKENS, 360 *NESTED_TYPE_TOKENS, 361 *AGGREGATE_TYPE_TOKENS, 362 } 363 364 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 365 TokenType.BIGINT: TokenType.UBIGINT, 366 TokenType.INT: TokenType.UINT, 367 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 368 TokenType.SMALLINT: TokenType.USMALLINT, 369 TokenType.TINYINT: TokenType.UTINYINT, 370 TokenType.DECIMAL: TokenType.UDECIMAL, 371 } 372 373 SUBQUERY_PREDICATES = { 374 TokenType.ANY: exp.Any, 375 TokenType.ALL: exp.All, 376 TokenType.EXISTS: exp.Exists, 377 TokenType.SOME: exp.Any, 378 } 379 380 RESERVED_TOKENS = { 381 *Tokenizer.SINGLE_TOKENS.values(), 382 TokenType.SELECT, 383 } - {TokenType.IDENTIFIER} 384 385 DB_CREATABLES = { 386 TokenType.DATABASE, 387 TokenType.DICTIONARY, 388 TokenType.MODEL, 389 TokenType.SCHEMA, 390 TokenType.SEQUENCE, 391 TokenType.STORAGE_INTEGRATION, 392 TokenType.TABLE, 393 TokenType.TAG, 394 TokenType.VIEW, 395 TokenType.WAREHOUSE, 396 TokenType.STREAMLIT, 397 } 398 399 CREATABLES = { 400 TokenType.COLUMN, 401 TokenType.CONSTRAINT, 402 TokenType.FOREIGN_KEY, 403 TokenType.FUNCTION, 404 TokenType.INDEX, 405 TokenType.PROCEDURE, 406 *DB_CREATABLES, 407 } 408 409 # Tokens that can represent identifiers 410 ID_VAR_TOKENS = { 411 TokenType.ALL, 412 TokenType.VAR, 413 TokenType.ANTI, 414 TokenType.APPLY, 415 TokenType.ASC, 416 TokenType.ASOF, 417 TokenType.AUTO_INCREMENT, 418 TokenType.BEGIN, 419 TokenType.BPCHAR, 420 TokenType.CACHE, 421 TokenType.CASE, 422 TokenType.COLLATE, 423 TokenType.COMMAND, 424 TokenType.COMMENT, 425 TokenType.COMMIT, 426 TokenType.CONSTRAINT, 427 TokenType.COPY, 428 TokenType.CUBE, 429 TokenType.DEFAULT, 430 TokenType.DELETE, 431 TokenType.DESC, 432 TokenType.DESCRIBE, 433 TokenType.DICTIONARY, 434 TokenType.DIV, 435 TokenType.END, 436 TokenType.EXECUTE, 437 TokenType.ESCAPE, 438 TokenType.FALSE, 439 TokenType.FIRST, 440 TokenType.FILTER, 441 TokenType.FINAL, 442 TokenType.FORMAT, 443 TokenType.FULL, 444 TokenType.IDENTIFIER, 445 TokenType.IS, 446 TokenType.ISNULL, 447 TokenType.INTERVAL, 448 TokenType.KEEP, 449 TokenType.KILL, 450 TokenType.LEFT, 451 TokenType.LOAD, 452 TokenType.MERGE, 453 TokenType.NATURAL, 454 TokenType.NEXT, 455 TokenType.OFFSET, 456 TokenType.OPERATOR, 457 TokenType.ORDINALITY, 458 TokenType.OVERLAPS, 459 TokenType.OVERWRITE, 460 TokenType.PARTITION, 461 TokenType.PERCENT, 462 TokenType.PIVOT, 463 TokenType.PRAGMA, 464 TokenType.RANGE, 465 TokenType.RECURSIVE, 466 TokenType.REFERENCES, 467 TokenType.REFRESH, 468 TokenType.RENAME, 469 TokenType.REPLACE, 470 TokenType.RIGHT, 471 TokenType.ROLLUP, 472 TokenType.ROW, 473 TokenType.ROWS, 474 TokenType.SEMI, 475 TokenType.SET, 476 TokenType.SETTINGS, 477 TokenType.SHOW, 478 TokenType.TEMPORARY, 479 TokenType.TOP, 480 TokenType.TRUE, 481 TokenType.TRUNCATE, 482 TokenType.UNIQUE, 483 TokenType.UNNEST, 484 TokenType.UNPIVOT, 485 TokenType.UPDATE, 486 TokenType.USE, 487 TokenType.VOLATILE, 488 TokenType.WINDOW, 489 *CREATABLES, 490 *SUBQUERY_PREDICATES, 491 *TYPE_TOKENS, 492 *NO_PAREN_FUNCTIONS, 493 } 494 495 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 496 497 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 498 TokenType.ANTI, 499 TokenType.APPLY, 500 TokenType.ASOF, 501 TokenType.FULL, 502 TokenType.LEFT, 503 TokenType.LOCK, 504 TokenType.NATURAL, 505 TokenType.OFFSET, 506 TokenType.RIGHT, 507 TokenType.SEMI, 508 TokenType.WINDOW, 509 } 510 511 ALIAS_TOKENS = ID_VAR_TOKENS 512 513 ARRAY_CONSTRUCTORS = { 514 "ARRAY": exp.Array, 515 "LIST": exp.List, 516 } 517 518 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 519 520 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 521 522 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 523 524 FUNC_TOKENS = { 525 TokenType.COLLATE, 526 TokenType.COMMAND, 527 TokenType.CURRENT_DATE, 528 TokenType.CURRENT_DATETIME, 529 TokenType.CURRENT_TIMESTAMP, 530 TokenType.CURRENT_TIME, 531 TokenType.CURRENT_USER, 532 TokenType.FILTER, 533 TokenType.FIRST, 534 TokenType.FORMAT, 535 TokenType.GLOB, 536 TokenType.IDENTIFIER, 537 TokenType.INDEX, 538 TokenType.ISNULL, 539 TokenType.ILIKE, 540 TokenType.INSERT, 541 TokenType.LIKE, 542 TokenType.MERGE, 543 TokenType.OFFSET, 544 TokenType.PRIMARY_KEY, 545 TokenType.RANGE, 546 TokenType.REPLACE, 547 TokenType.RLIKE, 548 TokenType.ROW, 549 TokenType.UNNEST, 550 TokenType.VAR, 551 TokenType.LEFT, 552 TokenType.RIGHT, 553 TokenType.SEQUENCE, 554 TokenType.DATE, 555 TokenType.DATETIME, 556 TokenType.TABLE, 557 TokenType.TIMESTAMP, 558 TokenType.TIMESTAMPTZ, 559 TokenType.TRUNCATE, 560 TokenType.WINDOW, 561 TokenType.XOR, 562 *TYPE_TOKENS, 563 *SUBQUERY_PREDICATES, 564 } 565 566 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 567 TokenType.AND: exp.And, 568 } 569 570 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 571 TokenType.COLON_EQ: exp.PropertyEQ, 572 } 573 574 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 575 TokenType.OR: exp.Or, 576 } 577 578 EQUALITY = { 579 TokenType.EQ: exp.EQ, 580 TokenType.NEQ: exp.NEQ, 581 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 582 } 583 584 COMPARISON = { 585 TokenType.GT: exp.GT, 586 TokenType.GTE: exp.GTE, 587 TokenType.LT: exp.LT, 588 TokenType.LTE: exp.LTE, 589 } 590 591 BITWISE = { 592 TokenType.AMP: exp.BitwiseAnd, 593 TokenType.CARET: exp.BitwiseXor, 594 TokenType.PIPE: exp.BitwiseOr, 595 } 596 597 TERM = { 598 TokenType.DASH: exp.Sub, 599 TokenType.PLUS: exp.Add, 600 TokenType.MOD: exp.Mod, 601 TokenType.COLLATE: exp.Collate, 602 } 603 604 FACTOR = { 605 TokenType.DIV: exp.IntDiv, 606 TokenType.LR_ARROW: exp.Distance, 607 TokenType.SLASH: exp.Div, 608 TokenType.STAR: exp.Mul, 609 } 610 611 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 612 613 TIMES = { 614 TokenType.TIME, 615 TokenType.TIMETZ, 616 } 617 618 TIMESTAMPS = { 619 TokenType.TIMESTAMP, 620 TokenType.TIMESTAMPTZ, 621 TokenType.TIMESTAMPLTZ, 622 *TIMES, 623 } 624 625 SET_OPERATIONS = { 626 TokenType.UNION, 627 TokenType.INTERSECT, 628 TokenType.EXCEPT, 629 } 630 631 JOIN_METHODS = { 632 TokenType.ASOF, 633 TokenType.NATURAL, 634 TokenType.POSITIONAL, 635 } 636 637 JOIN_SIDES = { 638 TokenType.LEFT, 639 TokenType.RIGHT, 640 TokenType.FULL, 641 } 642 643 JOIN_KINDS = { 644 TokenType.ANTI, 645 TokenType.CROSS, 646 TokenType.INNER, 647 TokenType.OUTER, 648 TokenType.SEMI, 649 TokenType.STRAIGHT_JOIN, 650 } 651 652 JOIN_HINTS: t.Set[str] = set() 653 654 LAMBDAS = { 655 TokenType.ARROW: lambda self, expressions: self.expression( 656 exp.Lambda, 657 this=self._replace_lambda( 658 self._parse_assignment(), 659 expressions, 660 ), 661 expressions=expressions, 662 ), 663 TokenType.FARROW: lambda self, expressions: self.expression( 664 exp.Kwarg, 665 this=exp.var(expressions[0].name), 666 expression=self._parse_assignment(), 667 ), 668 } 669 670 COLUMN_OPERATORS = { 671 TokenType.DOT: None, 672 TokenType.DCOLON: lambda self, this, to: self.expression( 673 exp.Cast if self.STRICT_CAST else exp.TryCast, 674 this=this, 675 to=to, 676 ), 677 TokenType.ARROW: lambda self, this, path: self.expression( 678 exp.JSONExtract, 679 this=this, 680 expression=self.dialect.to_json_path(path), 681 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 682 ), 683 TokenType.DARROW: lambda self, this, path: self.expression( 684 exp.JSONExtractScalar, 685 this=this, 686 expression=self.dialect.to_json_path(path), 687 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 688 ), 689 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 690 exp.JSONBExtract, 691 this=this, 692 expression=path, 693 ), 694 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 695 exp.JSONBExtractScalar, 696 this=this, 697 expression=path, 698 ), 699 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 700 exp.JSONBContains, 701 this=this, 702 expression=key, 703 ), 704 } 705 706 EXPRESSION_PARSERS = { 707 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 708 exp.Column: lambda self: self._parse_column(), 709 exp.Condition: lambda self: self._parse_assignment(), 710 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 711 exp.Expression: lambda self: self._parse_expression(), 712 exp.From: lambda self: self._parse_from(joins=True), 713 exp.Group: lambda self: self._parse_group(), 714 exp.Having: lambda self: self._parse_having(), 715 exp.Identifier: lambda self: self._parse_id_var(), 716 exp.Join: lambda self: self._parse_join(), 717 exp.Lambda: lambda self: self._parse_lambda(), 718 exp.Lateral: lambda self: self._parse_lateral(), 719 exp.Limit: lambda self: self._parse_limit(), 720 exp.Offset: lambda self: self._parse_offset(), 721 exp.Order: lambda self: self._parse_order(), 722 exp.Ordered: lambda self: self._parse_ordered(), 723 exp.Properties: lambda self: self._parse_properties(), 724 exp.Qualify: lambda self: self._parse_qualify(), 725 exp.Returning: lambda self: self._parse_returning(), 726 exp.Select: lambda self: self._parse_select(), 727 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 728 exp.Table: lambda self: self._parse_table_parts(), 729 exp.TableAlias: lambda self: self._parse_table_alias(), 730 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 731 exp.Where: lambda self: self._parse_where(), 732 exp.Window: lambda self: self._parse_named_window(), 733 exp.With: lambda self: self._parse_with(), 734 "JOIN_TYPE": lambda self: self._parse_join_parts(), 735 } 736 737 STATEMENT_PARSERS = { 738 TokenType.ALTER: lambda self: self._parse_alter(), 739 TokenType.BEGIN: lambda self: self._parse_transaction(), 740 TokenType.CACHE: lambda self: self._parse_cache(), 741 TokenType.COMMENT: lambda self: self._parse_comment(), 742 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 743 TokenType.COPY: lambda self: self._parse_copy(), 744 TokenType.CREATE: lambda self: self._parse_create(), 745 TokenType.DELETE: lambda self: self._parse_delete(), 746 TokenType.DESC: lambda self: self._parse_describe(), 747 TokenType.DESCRIBE: lambda self: self._parse_describe(), 748 TokenType.DROP: lambda self: self._parse_drop(), 749 TokenType.INSERT: lambda self: self._parse_insert(), 750 TokenType.KILL: lambda self: self._parse_kill(), 751 TokenType.LOAD: lambda self: self._parse_load(), 752 TokenType.MERGE: lambda self: self._parse_merge(), 753 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 754 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 755 TokenType.REFRESH: lambda self: self._parse_refresh(), 756 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 757 TokenType.SET: lambda self: self._parse_set(), 758 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 759 TokenType.UNCACHE: lambda self: self._parse_uncache(), 760 TokenType.UPDATE: lambda self: self._parse_update(), 761 TokenType.USE: lambda self: self.expression( 762 exp.Use, 763 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 764 this=self._parse_table(schema=False), 765 ), 766 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 767 } 768 769 UNARY_PARSERS = { 770 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 771 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 772 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 773 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 774 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 775 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 776 } 777 778 STRING_PARSERS = { 779 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 780 exp.RawString, this=token.text 781 ), 782 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 783 exp.National, this=token.text 784 ), 785 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 786 TokenType.STRING: lambda self, token: self.expression( 787 exp.Literal, this=token.text, is_string=True 788 ), 789 TokenType.UNICODE_STRING: lambda self, token: self.expression( 790 exp.UnicodeString, 791 this=token.text, 792 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 793 ), 794 } 795 796 NUMERIC_PARSERS = { 797 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 798 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 799 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 800 TokenType.NUMBER: lambda self, token: self.expression( 801 exp.Literal, this=token.text, is_string=False 802 ), 803 } 804 805 PRIMARY_PARSERS = { 806 **STRING_PARSERS, 807 **NUMERIC_PARSERS, 808 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 809 TokenType.NULL: lambda self, _: self.expression(exp.Null), 810 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 811 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 812 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 813 TokenType.STAR: lambda self, _: self.expression( 814 exp.Star, 815 **{ 816 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 817 "replace": self._parse_star_op("REPLACE"), 818 "rename": self._parse_star_op("RENAME"), 819 }, 820 ), 821 } 822 823 PLACEHOLDER_PARSERS = { 824 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 825 TokenType.PARAMETER: lambda self: self._parse_parameter(), 826 TokenType.COLON: lambda self: ( 827 self.expression(exp.Placeholder, this=self._prev.text) 828 if self._match_set(self.ID_VAR_TOKENS) 829 else None 830 ), 831 } 832 833 RANGE_PARSERS = { 834 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 835 TokenType.GLOB: binary_range_parser(exp.Glob), 836 TokenType.ILIKE: binary_range_parser(exp.ILike), 837 TokenType.IN: lambda self, this: self._parse_in(this), 838 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 839 TokenType.IS: lambda self, this: self._parse_is(this), 840 TokenType.LIKE: binary_range_parser(exp.Like), 841 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 842 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 843 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 844 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 845 } 846 847 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 848 "ALLOWED_VALUES": lambda self: self.expression( 849 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 850 ), 851 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 852 "AUTO": lambda self: self._parse_auto_property(), 853 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 854 "BACKUP": lambda self: self.expression( 855 exp.BackupProperty, this=self._parse_var(any_token=True) 856 ), 857 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 858 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 859 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 860 "CHECKSUM": lambda self: self._parse_checksum(), 861 "CLUSTER BY": lambda self: self._parse_cluster(), 862 "CLUSTERED": lambda self: self._parse_clustered_by(), 863 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 864 exp.CollateProperty, **kwargs 865 ), 866 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 867 "CONTAINS": lambda self: self._parse_contains_property(), 868 "COPY": lambda self: self._parse_copy_property(), 869 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 870 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 871 "DEFINER": lambda self: self._parse_definer(), 872 "DETERMINISTIC": lambda self: self.expression( 873 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 874 ), 875 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 876 "DISTKEY": lambda self: self._parse_distkey(), 877 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 878 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 879 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 880 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 881 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 882 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 883 "FREESPACE": lambda self: self._parse_freespace(), 884 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 885 "HEAP": lambda self: self.expression(exp.HeapProperty), 886 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 887 "IMMUTABLE": lambda self: self.expression( 888 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 889 ), 890 "INHERITS": lambda self: self.expression( 891 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 892 ), 893 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 894 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 895 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 896 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 897 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 898 "LIKE": lambda self: self._parse_create_like(), 899 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 900 "LOCK": lambda self: self._parse_locking(), 901 "LOCKING": lambda self: self._parse_locking(), 902 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 903 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 904 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 905 "MODIFIES": lambda self: self._parse_modifies_property(), 906 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 907 "NO": lambda self: self._parse_no_property(), 908 "ON": lambda self: self._parse_on_property(), 909 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 910 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 911 "PARTITION": lambda self: self._parse_partitioned_of(), 912 "PARTITION BY": lambda self: self._parse_partitioned_by(), 913 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 914 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 915 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 916 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 917 "READS": lambda self: self._parse_reads_property(), 918 "REMOTE": lambda self: self._parse_remote_with_connection(), 919 "RETURNS": lambda self: self._parse_returns(), 920 "STRICT": lambda self: self.expression(exp.StrictProperty), 921 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 922 "ROW": lambda self: self._parse_row(), 923 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 924 "SAMPLE": lambda self: self.expression( 925 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 926 ), 927 "SECURE": lambda self: self.expression(exp.SecureProperty), 928 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 929 "SETTINGS": lambda self: self.expression( 930 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 931 ), 932 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 933 "SORTKEY": lambda self: self._parse_sortkey(), 934 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 935 "STABLE": lambda self: self.expression( 936 exp.StabilityProperty, this=exp.Literal.string("STABLE") 937 ), 938 "STORED": lambda self: self._parse_stored(), 939 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 940 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 941 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 942 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 943 "TO": lambda self: self._parse_to_table(), 944 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 945 "TRANSFORM": lambda self: self.expression( 946 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 947 ), 948 "TTL": lambda self: self._parse_ttl(), 949 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 950 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 951 "VOLATILE": lambda self: self._parse_volatile_property(), 952 "WITH": lambda self: self._parse_with_property(), 953 } 954 955 CONSTRAINT_PARSERS = { 956 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 957 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 958 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 959 "CHARACTER SET": lambda self: self.expression( 960 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 961 ), 962 "CHECK": lambda self: self.expression( 963 exp.CheckColumnConstraint, 964 this=self._parse_wrapped(self._parse_assignment), 965 enforced=self._match_text_seq("ENFORCED"), 966 ), 967 "COLLATE": lambda self: self.expression( 968 exp.CollateColumnConstraint, 969 this=self._parse_identifier() or self._parse_column(), 970 ), 971 "COMMENT": lambda self: self.expression( 972 exp.CommentColumnConstraint, this=self._parse_string() 973 ), 974 "COMPRESS": lambda self: self._parse_compress(), 975 "CLUSTERED": lambda self: self.expression( 976 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 977 ), 978 "NONCLUSTERED": lambda self: self.expression( 979 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 980 ), 981 "DEFAULT": lambda self: self.expression( 982 exp.DefaultColumnConstraint, this=self._parse_bitwise() 983 ), 984 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 985 "EPHEMERAL": lambda self: self.expression( 986 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 987 ), 988 "EXCLUDE": lambda self: self.expression( 989 exp.ExcludeColumnConstraint, this=self._parse_index_params() 990 ), 991 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 992 "FORMAT": lambda self: self.expression( 993 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 994 ), 995 "GENERATED": lambda self: self._parse_generated_as_identity(), 996 "IDENTITY": lambda self: self._parse_auto_increment(), 997 "INLINE": lambda self: self._parse_inline(), 998 "LIKE": lambda self: self._parse_create_like(), 999 "NOT": lambda self: self._parse_not_constraint(), 1000 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1001 "ON": lambda self: ( 1002 self._match(TokenType.UPDATE) 1003 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1004 ) 1005 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1006 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1007 "PERIOD": lambda self: self._parse_period_for_system_time(), 1008 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1009 "REFERENCES": lambda self: self._parse_references(match=False), 1010 "TITLE": lambda self: self.expression( 1011 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1012 ), 1013 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1014 "UNIQUE": lambda self: self._parse_unique(), 1015 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1016 "WITH": lambda self: self.expression( 1017 exp.Properties, expressions=self._parse_wrapped_properties() 1018 ), 1019 } 1020 1021 ALTER_PARSERS = { 1022 "ADD": lambda self: self._parse_alter_table_add(), 1023 "ALTER": lambda self: self._parse_alter_table_alter(), 1024 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1025 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1026 "DROP": lambda self: self._parse_alter_table_drop(), 1027 "RENAME": lambda self: self._parse_alter_table_rename(), 1028 "SET": lambda self: self._parse_alter_table_set(), 1029 } 1030 1031 ALTER_ALTER_PARSERS = { 1032 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1033 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1034 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1035 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1036 } 1037 1038 SCHEMA_UNNAMED_CONSTRAINTS = { 1039 "CHECK", 1040 "EXCLUDE", 1041 "FOREIGN KEY", 1042 "LIKE", 1043 "PERIOD", 1044 "PRIMARY KEY", 1045 "UNIQUE", 1046 } 1047 1048 NO_PAREN_FUNCTION_PARSERS = { 1049 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1050 "CASE": lambda self: self._parse_case(), 1051 "CONNECT_BY_ROOT": lambda self: self.expression( 1052 exp.ConnectByRoot, this=self._parse_column() 1053 ), 1054 "IF": lambda self: self._parse_if(), 1055 "NEXT": lambda self: self._parse_next_value_for(), 1056 } 1057 1058 INVALID_FUNC_NAME_TOKENS = { 1059 TokenType.IDENTIFIER, 1060 TokenType.STRING, 1061 } 1062 1063 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1064 1065 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1066 1067 FUNCTION_PARSERS = { 1068 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1069 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1070 "DECODE": lambda self: self._parse_decode(), 1071 "EXTRACT": lambda self: self._parse_extract(), 1072 "GAP_FILL": lambda self: self._parse_gap_fill(), 1073 "JSON_OBJECT": lambda self: self._parse_json_object(), 1074 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1075 "JSON_TABLE": lambda self: self._parse_json_table(), 1076 "MATCH": lambda self: self._parse_match_against(), 1077 "OPENJSON": lambda self: self._parse_open_json(), 1078 "POSITION": lambda self: self._parse_position(), 1079 "PREDICT": lambda self: self._parse_predict(), 1080 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1081 "STRING_AGG": lambda self: self._parse_string_agg(), 1082 "SUBSTRING": lambda self: self._parse_substring(), 1083 "TRIM": lambda self: self._parse_trim(), 1084 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1085 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1086 } 1087 1088 QUERY_MODIFIER_PARSERS = { 1089 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1090 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1091 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1092 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1093 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1094 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1095 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1096 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1097 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1098 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1099 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1100 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1101 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1102 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1103 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1104 TokenType.CLUSTER_BY: lambda self: ( 1105 "cluster", 1106 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1107 ), 1108 TokenType.DISTRIBUTE_BY: lambda self: ( 1109 "distribute", 1110 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1111 ), 1112 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1113 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1114 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1115 } 1116 1117 SET_PARSERS = { 1118 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1119 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1120 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1121 "TRANSACTION": lambda self: self._parse_set_transaction(), 1122 } 1123 1124 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1125 1126 TYPE_LITERAL_PARSERS = { 1127 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1128 } 1129 1130 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1131 1132 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1133 1134 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1135 1136 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1137 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1138 "ISOLATION": ( 1139 ("LEVEL", "REPEATABLE", "READ"), 1140 ("LEVEL", "READ", "COMMITTED"), 1141 ("LEVEL", "READ", "UNCOMITTED"), 1142 ("LEVEL", "SERIALIZABLE"), 1143 ), 1144 "READ": ("WRITE", "ONLY"), 1145 } 1146 1147 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1148 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1149 ) 1150 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1151 1152 CREATE_SEQUENCE: OPTIONS_TYPE = { 1153 "SCALE": ("EXTEND", "NOEXTEND"), 1154 "SHARD": ("EXTEND", "NOEXTEND"), 1155 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1156 **dict.fromkeys( 1157 ( 1158 "SESSION", 1159 "GLOBAL", 1160 "KEEP", 1161 "NOKEEP", 1162 "ORDER", 1163 "NOORDER", 1164 "NOCACHE", 1165 "CYCLE", 1166 "NOCYCLE", 1167 "NOMINVALUE", 1168 "NOMAXVALUE", 1169 "NOSCALE", 1170 "NOSHARD", 1171 ), 1172 tuple(), 1173 ), 1174 } 1175 1176 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1177 1178 USABLES: OPTIONS_TYPE = dict.fromkeys( 1179 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1180 ) 1181 1182 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1183 1184 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1185 "TYPE": ("EVOLUTION",), 1186 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1187 } 1188 1189 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1190 "NOT": ("ENFORCED",), 1191 "MATCH": ( 1192 "FULL", 1193 "PARTIAL", 1194 "SIMPLE", 1195 ), 1196 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1197 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1198 } 1199 1200 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1201 1202 CLONE_KEYWORDS = {"CLONE", "COPY"} 1203 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1204 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1205 1206 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1207 1208 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1209 1210 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1211 1212 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1213 1214 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1215 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1216 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1217 1218 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1219 1220 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1221 1222 ADD_CONSTRAINT_TOKENS = { 1223 TokenType.CONSTRAINT, 1224 TokenType.FOREIGN_KEY, 1225 TokenType.INDEX, 1226 TokenType.KEY, 1227 TokenType.PRIMARY_KEY, 1228 TokenType.UNIQUE, 1229 } 1230 1231 DISTINCT_TOKENS = {TokenType.DISTINCT} 1232 1233 NULL_TOKENS = {TokenType.NULL} 1234 1235 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1236 1237 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1238 1239 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1240 1241 STRICT_CAST = True 1242 1243 PREFIXED_PIVOT_COLUMNS = False 1244 IDENTIFY_PIVOT_STRINGS = False 1245 1246 LOG_DEFAULTS_TO_LN = False 1247 1248 # Whether ADD is present for each column added by ALTER TABLE 1249 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1250 1251 # Whether the table sample clause expects CSV syntax 1252 TABLESAMPLE_CSV = False 1253 1254 # The default method used for table sampling 1255 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1256 1257 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1258 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1259 1260 # Whether the TRIM function expects the characters to trim as its first argument 1261 TRIM_PATTERN_FIRST = False 1262 1263 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1264 STRING_ALIASES = False 1265 1266 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1267 MODIFIERS_ATTACHED_TO_SET_OP = True 1268 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1269 1270 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1271 NO_PAREN_IF_COMMANDS = True 1272 1273 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1274 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1275 1276 # Whether the `:` operator is used to extract a value from a VARIANT column 1277 COLON_IS_VARIANT_EXTRACT = False 1278 1279 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1280 # If this is True and '(' is not found, the keyword will be treated as an identifier 1281 VALUES_FOLLOWED_BY_PAREN = True 1282 1283 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1284 SUPPORTS_IMPLICIT_UNNEST = False 1285 1286 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1287 INTERVAL_SPANS = True 1288 1289 # Whether a PARTITION clause can follow a table reference 1290 SUPPORTS_PARTITION_SELECTION = False 1291 1292 __slots__ = ( 1293 "error_level", 1294 "error_message_context", 1295 "max_errors", 1296 "dialect", 1297 "sql", 1298 "errors", 1299 "_tokens", 1300 "_index", 1301 "_curr", 1302 "_next", 1303 "_prev", 1304 "_prev_comments", 1305 ) 1306 1307 # Autofilled 1308 SHOW_TRIE: t.Dict = {} 1309 SET_TRIE: t.Dict = {} 1310 1311 def __init__( 1312 self, 1313 error_level: t.Optional[ErrorLevel] = None, 1314 error_message_context: int = 100, 1315 max_errors: int = 3, 1316 dialect: DialectType = None, 1317 ): 1318 from sqlglot.dialects import Dialect 1319 1320 self.error_level = error_level or ErrorLevel.IMMEDIATE 1321 self.error_message_context = error_message_context 1322 self.max_errors = max_errors 1323 self.dialect = Dialect.get_or_raise(dialect) 1324 self.reset() 1325 1326 def reset(self): 1327 self.sql = "" 1328 self.errors = [] 1329 self._tokens = [] 1330 self._index = 0 1331 self._curr = None 1332 self._next = None 1333 self._prev = None 1334 self._prev_comments = None 1335 1336 def parse( 1337 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1338 ) -> t.List[t.Optional[exp.Expression]]: 1339 """ 1340 Parses a list of tokens and returns a list of syntax trees, one tree 1341 per parsed SQL statement. 1342 1343 Args: 1344 raw_tokens: The list of tokens. 1345 sql: The original SQL string, used to produce helpful debug messages. 1346 1347 Returns: 1348 The list of the produced syntax trees. 1349 """ 1350 return self._parse( 1351 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1352 ) 1353 1354 def parse_into( 1355 self, 1356 expression_types: exp.IntoType, 1357 raw_tokens: t.List[Token], 1358 sql: t.Optional[str] = None, 1359 ) -> t.List[t.Optional[exp.Expression]]: 1360 """ 1361 Parses a list of tokens into a given Expression type. If a collection of Expression 1362 types is given instead, this method will try to parse the token list into each one 1363 of them, stopping at the first for which the parsing succeeds. 1364 1365 Args: 1366 expression_types: The expression type(s) to try and parse the token list into. 1367 raw_tokens: The list of tokens. 1368 sql: The original SQL string, used to produce helpful debug messages. 1369 1370 Returns: 1371 The target Expression. 1372 """ 1373 errors = [] 1374 for expression_type in ensure_list(expression_types): 1375 parser = self.EXPRESSION_PARSERS.get(expression_type) 1376 if not parser: 1377 raise TypeError(f"No parser registered for {expression_type}") 1378 1379 try: 1380 return self._parse(parser, raw_tokens, sql) 1381 except ParseError as e: 1382 e.errors[0]["into_expression"] = expression_type 1383 errors.append(e) 1384 1385 raise ParseError( 1386 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1387 errors=merge_errors(errors), 1388 ) from errors[-1] 1389 1390 def _parse( 1391 self, 1392 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1393 raw_tokens: t.List[Token], 1394 sql: t.Optional[str] = None, 1395 ) -> t.List[t.Optional[exp.Expression]]: 1396 self.reset() 1397 self.sql = sql or "" 1398 1399 total = len(raw_tokens) 1400 chunks: t.List[t.List[Token]] = [[]] 1401 1402 for i, token in enumerate(raw_tokens): 1403 if token.token_type == TokenType.SEMICOLON: 1404 if token.comments: 1405 chunks.append([token]) 1406 1407 if i < total - 1: 1408 chunks.append([]) 1409 else: 1410 chunks[-1].append(token) 1411 1412 expressions = [] 1413 1414 for tokens in chunks: 1415 self._index = -1 1416 self._tokens = tokens 1417 self._advance() 1418 1419 expressions.append(parse_method(self)) 1420 1421 if self._index < len(self._tokens): 1422 self.raise_error("Invalid expression / Unexpected token") 1423 1424 self.check_errors() 1425 1426 return expressions 1427 1428 def check_errors(self) -> None: 1429 """Logs or raises any found errors, depending on the chosen error level setting.""" 1430 if self.error_level == ErrorLevel.WARN: 1431 for error in self.errors: 1432 logger.error(str(error)) 1433 elif self.error_level == ErrorLevel.RAISE and self.errors: 1434 raise ParseError( 1435 concat_messages(self.errors, self.max_errors), 1436 errors=merge_errors(self.errors), 1437 ) 1438 1439 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1440 """ 1441 Appends an error in the list of recorded errors or raises it, depending on the chosen 1442 error level setting. 1443 """ 1444 token = token or self._curr or self._prev or Token.string("") 1445 start = token.start 1446 end = token.end + 1 1447 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1448 highlight = self.sql[start:end] 1449 end_context = self.sql[end : end + self.error_message_context] 1450 1451 error = ParseError.new( 1452 f"{message}. Line {token.line}, Col: {token.col}.\n" 1453 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1454 description=message, 1455 line=token.line, 1456 col=token.col, 1457 start_context=start_context, 1458 highlight=highlight, 1459 end_context=end_context, 1460 ) 1461 1462 if self.error_level == ErrorLevel.IMMEDIATE: 1463 raise error 1464 1465 self.errors.append(error) 1466 1467 def expression( 1468 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1469 ) -> E: 1470 """ 1471 Creates a new, validated Expression. 1472 1473 Args: 1474 exp_class: The expression class to instantiate. 1475 comments: An optional list of comments to attach to the expression. 1476 kwargs: The arguments to set for the expression along with their respective values. 1477 1478 Returns: 1479 The target expression. 1480 """ 1481 instance = exp_class(**kwargs) 1482 instance.add_comments(comments) if comments else self._add_comments(instance) 1483 return self.validate_expression(instance) 1484 1485 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1486 if expression and self._prev_comments: 1487 expression.add_comments(self._prev_comments) 1488 self._prev_comments = None 1489 1490 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1491 """ 1492 Validates an Expression, making sure that all its mandatory arguments are set. 1493 1494 Args: 1495 expression: The expression to validate. 1496 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1497 1498 Returns: 1499 The validated expression. 1500 """ 1501 if self.error_level != ErrorLevel.IGNORE: 1502 for error_message in expression.error_messages(args): 1503 self.raise_error(error_message) 1504 1505 return expression 1506 1507 def _find_sql(self, start: Token, end: Token) -> str: 1508 return self.sql[start.start : end.end + 1] 1509 1510 def _is_connected(self) -> bool: 1511 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1512 1513 def _advance(self, times: int = 1) -> None: 1514 self._index += times 1515 self._curr = seq_get(self._tokens, self._index) 1516 self._next = seq_get(self._tokens, self._index + 1) 1517 1518 if self._index > 0: 1519 self._prev = self._tokens[self._index - 1] 1520 self._prev_comments = self._prev.comments 1521 else: 1522 self._prev = None 1523 self._prev_comments = None 1524 1525 def _retreat(self, index: int) -> None: 1526 if index != self._index: 1527 self._advance(index - self._index) 1528 1529 def _warn_unsupported(self) -> None: 1530 if len(self._tokens) <= 1: 1531 return 1532 1533 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1534 # interested in emitting a warning for the one being currently processed. 1535 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1536 1537 logger.warning( 1538 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1539 ) 1540 1541 def _parse_command(self) -> exp.Command: 1542 self._warn_unsupported() 1543 return self.expression( 1544 exp.Command, 1545 comments=self._prev_comments, 1546 this=self._prev.text.upper(), 1547 expression=self._parse_string(), 1548 ) 1549 1550 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1551 """ 1552 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1553 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1554 solve this by setting & resetting the parser state accordingly 1555 """ 1556 index = self._index 1557 error_level = self.error_level 1558 1559 self.error_level = ErrorLevel.IMMEDIATE 1560 try: 1561 this = parse_method() 1562 except ParseError: 1563 this = None 1564 finally: 1565 if not this or retreat: 1566 self._retreat(index) 1567 self.error_level = error_level 1568 1569 return this 1570 1571 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1572 start = self._prev 1573 exists = self._parse_exists() if allow_exists else None 1574 1575 self._match(TokenType.ON) 1576 1577 materialized = self._match_text_seq("MATERIALIZED") 1578 kind = self._match_set(self.CREATABLES) and self._prev 1579 if not kind: 1580 return self._parse_as_command(start) 1581 1582 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1583 this = self._parse_user_defined_function(kind=kind.token_type) 1584 elif kind.token_type == TokenType.TABLE: 1585 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1586 elif kind.token_type == TokenType.COLUMN: 1587 this = self._parse_column() 1588 else: 1589 this = self._parse_id_var() 1590 1591 self._match(TokenType.IS) 1592 1593 return self.expression( 1594 exp.Comment, 1595 this=this, 1596 kind=kind.text, 1597 expression=self._parse_string(), 1598 exists=exists, 1599 materialized=materialized, 1600 ) 1601 1602 def _parse_to_table( 1603 self, 1604 ) -> exp.ToTableProperty: 1605 table = self._parse_table_parts(schema=True) 1606 return self.expression(exp.ToTableProperty, this=table) 1607 1608 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1609 def _parse_ttl(self) -> exp.Expression: 1610 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1611 this = self._parse_bitwise() 1612 1613 if self._match_text_seq("DELETE"): 1614 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1615 if self._match_text_seq("RECOMPRESS"): 1616 return self.expression( 1617 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1618 ) 1619 if self._match_text_seq("TO", "DISK"): 1620 return self.expression( 1621 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1622 ) 1623 if self._match_text_seq("TO", "VOLUME"): 1624 return self.expression( 1625 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1626 ) 1627 1628 return this 1629 1630 expressions = self._parse_csv(_parse_ttl_action) 1631 where = self._parse_where() 1632 group = self._parse_group() 1633 1634 aggregates = None 1635 if group and self._match(TokenType.SET): 1636 aggregates = self._parse_csv(self._parse_set_item) 1637 1638 return self.expression( 1639 exp.MergeTreeTTL, 1640 expressions=expressions, 1641 where=where, 1642 group=group, 1643 aggregates=aggregates, 1644 ) 1645 1646 def _parse_statement(self) -> t.Optional[exp.Expression]: 1647 if self._curr is None: 1648 return None 1649 1650 if self._match_set(self.STATEMENT_PARSERS): 1651 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1652 1653 if self._match_set(self.dialect.tokenizer.COMMANDS): 1654 return self._parse_command() 1655 1656 expression = self._parse_expression() 1657 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1658 return self._parse_query_modifiers(expression) 1659 1660 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1661 start = self._prev 1662 temporary = self._match(TokenType.TEMPORARY) 1663 materialized = self._match_text_seq("MATERIALIZED") 1664 1665 kind = self._match_set(self.CREATABLES) and self._prev.text 1666 if not kind: 1667 return self._parse_as_command(start) 1668 1669 if_exists = exists or self._parse_exists() 1670 table = self._parse_table_parts( 1671 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1672 ) 1673 1674 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1675 1676 if self._match(TokenType.L_PAREN, advance=False): 1677 expressions = self._parse_wrapped_csv(self._parse_types) 1678 else: 1679 expressions = None 1680 1681 return self.expression( 1682 exp.Drop, 1683 comments=start.comments, 1684 exists=if_exists, 1685 this=table, 1686 expressions=expressions, 1687 kind=kind.upper(), 1688 temporary=temporary, 1689 materialized=materialized, 1690 cascade=self._match_text_seq("CASCADE"), 1691 constraints=self._match_text_seq("CONSTRAINTS"), 1692 purge=self._match_text_seq("PURGE"), 1693 cluster=cluster, 1694 ) 1695 1696 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1697 return ( 1698 self._match_text_seq("IF") 1699 and (not not_ or self._match(TokenType.NOT)) 1700 and self._match(TokenType.EXISTS) 1701 ) 1702 1703 def _parse_create(self) -> exp.Create | exp.Command: 1704 # Note: this can't be None because we've matched a statement parser 1705 start = self._prev 1706 comments = self._prev_comments 1707 1708 replace = ( 1709 start.token_type == TokenType.REPLACE 1710 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1711 or self._match_pair(TokenType.OR, TokenType.ALTER) 1712 ) 1713 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1714 1715 unique = self._match(TokenType.UNIQUE) 1716 1717 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1718 clustered = True 1719 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1720 "COLUMNSTORE" 1721 ): 1722 clustered = False 1723 else: 1724 clustered = None 1725 1726 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1727 self._advance() 1728 1729 properties = None 1730 create_token = self._match_set(self.CREATABLES) and self._prev 1731 1732 if not create_token: 1733 # exp.Properties.Location.POST_CREATE 1734 properties = self._parse_properties() 1735 create_token = self._match_set(self.CREATABLES) and self._prev 1736 1737 if not properties or not create_token: 1738 return self._parse_as_command(start) 1739 1740 concurrently = self._match_text_seq("CONCURRENTLY") 1741 exists = self._parse_exists(not_=True) 1742 this = None 1743 expression: t.Optional[exp.Expression] = None 1744 indexes = None 1745 no_schema_binding = None 1746 begin = None 1747 end = None 1748 clone = None 1749 1750 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1751 nonlocal properties 1752 if properties and temp_props: 1753 properties.expressions.extend(temp_props.expressions) 1754 elif temp_props: 1755 properties = temp_props 1756 1757 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1758 this = self._parse_user_defined_function(kind=create_token.token_type) 1759 1760 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1761 extend_props(self._parse_properties()) 1762 1763 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1764 extend_props(self._parse_properties()) 1765 1766 if not expression: 1767 if self._match(TokenType.COMMAND): 1768 expression = self._parse_as_command(self._prev) 1769 else: 1770 begin = self._match(TokenType.BEGIN) 1771 return_ = self._match_text_seq("RETURN") 1772 1773 if self._match(TokenType.STRING, advance=False): 1774 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1775 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1776 expression = self._parse_string() 1777 extend_props(self._parse_properties()) 1778 else: 1779 expression = self._parse_statement() 1780 1781 end = self._match_text_seq("END") 1782 1783 if return_: 1784 expression = self.expression(exp.Return, this=expression) 1785 elif create_token.token_type == TokenType.INDEX: 1786 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1787 if not self._match(TokenType.ON): 1788 index = self._parse_id_var() 1789 anonymous = False 1790 else: 1791 index = None 1792 anonymous = True 1793 1794 this = self._parse_index(index=index, anonymous=anonymous) 1795 elif create_token.token_type in self.DB_CREATABLES: 1796 table_parts = self._parse_table_parts( 1797 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1798 ) 1799 1800 # exp.Properties.Location.POST_NAME 1801 self._match(TokenType.COMMA) 1802 extend_props(self._parse_properties(before=True)) 1803 1804 this = self._parse_schema(this=table_parts) 1805 1806 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1807 extend_props(self._parse_properties()) 1808 1809 self._match(TokenType.ALIAS) 1810 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1811 # exp.Properties.Location.POST_ALIAS 1812 extend_props(self._parse_properties()) 1813 1814 if create_token.token_type == TokenType.SEQUENCE: 1815 expression = self._parse_types() 1816 extend_props(self._parse_properties()) 1817 else: 1818 expression = self._parse_ddl_select() 1819 1820 if create_token.token_type == TokenType.TABLE: 1821 # exp.Properties.Location.POST_EXPRESSION 1822 extend_props(self._parse_properties()) 1823 1824 indexes = [] 1825 while True: 1826 index = self._parse_index() 1827 1828 # exp.Properties.Location.POST_INDEX 1829 extend_props(self._parse_properties()) 1830 if not index: 1831 break 1832 else: 1833 self._match(TokenType.COMMA) 1834 indexes.append(index) 1835 elif create_token.token_type == TokenType.VIEW: 1836 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1837 no_schema_binding = True 1838 1839 shallow = self._match_text_seq("SHALLOW") 1840 1841 if self._match_texts(self.CLONE_KEYWORDS): 1842 copy = self._prev.text.lower() == "copy" 1843 clone = self.expression( 1844 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1845 ) 1846 1847 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1848 return self._parse_as_command(start) 1849 1850 return self.expression( 1851 exp.Create, 1852 comments=comments, 1853 this=this, 1854 kind=create_token.text.upper(), 1855 replace=replace, 1856 refresh=refresh, 1857 unique=unique, 1858 expression=expression, 1859 exists=exists, 1860 properties=properties, 1861 indexes=indexes, 1862 no_schema_binding=no_schema_binding, 1863 begin=begin, 1864 end=end, 1865 clone=clone, 1866 concurrently=concurrently, 1867 clustered=clustered, 1868 ) 1869 1870 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1871 seq = exp.SequenceProperties() 1872 1873 options = [] 1874 index = self._index 1875 1876 while self._curr: 1877 self._match(TokenType.COMMA) 1878 if self._match_text_seq("INCREMENT"): 1879 self._match_text_seq("BY") 1880 self._match_text_seq("=") 1881 seq.set("increment", self._parse_term()) 1882 elif self._match_text_seq("MINVALUE"): 1883 seq.set("minvalue", self._parse_term()) 1884 elif self._match_text_seq("MAXVALUE"): 1885 seq.set("maxvalue", self._parse_term()) 1886 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1887 self._match_text_seq("=") 1888 seq.set("start", self._parse_term()) 1889 elif self._match_text_seq("CACHE"): 1890 # T-SQL allows empty CACHE which is initialized dynamically 1891 seq.set("cache", self._parse_number() or True) 1892 elif self._match_text_seq("OWNED", "BY"): 1893 # "OWNED BY NONE" is the default 1894 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1895 else: 1896 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1897 if opt: 1898 options.append(opt) 1899 else: 1900 break 1901 1902 seq.set("options", options if options else None) 1903 return None if self._index == index else seq 1904 1905 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1906 # only used for teradata currently 1907 self._match(TokenType.COMMA) 1908 1909 kwargs = { 1910 "no": self._match_text_seq("NO"), 1911 "dual": self._match_text_seq("DUAL"), 1912 "before": self._match_text_seq("BEFORE"), 1913 "default": self._match_text_seq("DEFAULT"), 1914 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1915 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1916 "after": self._match_text_seq("AFTER"), 1917 "minimum": self._match_texts(("MIN", "MINIMUM")), 1918 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1919 } 1920 1921 if self._match_texts(self.PROPERTY_PARSERS): 1922 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1923 try: 1924 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1925 except TypeError: 1926 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1927 1928 return None 1929 1930 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1931 return self._parse_wrapped_csv(self._parse_property) 1932 1933 def _parse_property(self) -> t.Optional[exp.Expression]: 1934 if self._match_texts(self.PROPERTY_PARSERS): 1935 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1936 1937 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1938 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1939 1940 if self._match_text_seq("COMPOUND", "SORTKEY"): 1941 return self._parse_sortkey(compound=True) 1942 1943 if self._match_text_seq("SQL", "SECURITY"): 1944 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1945 1946 index = self._index 1947 key = self._parse_column() 1948 1949 if not self._match(TokenType.EQ): 1950 self._retreat(index) 1951 return self._parse_sequence_properties() 1952 1953 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1954 if isinstance(key, exp.Column): 1955 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1956 1957 value = self._parse_bitwise() or self._parse_var(any_token=True) 1958 1959 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1960 if isinstance(value, exp.Column): 1961 value = exp.var(value.name) 1962 1963 return self.expression(exp.Property, this=key, value=value) 1964 1965 def _parse_stored(self) -> exp.FileFormatProperty: 1966 self._match(TokenType.ALIAS) 1967 1968 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1969 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1970 1971 return self.expression( 1972 exp.FileFormatProperty, 1973 this=( 1974 self.expression( 1975 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1976 ) 1977 if input_format or output_format 1978 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1979 ), 1980 ) 1981 1982 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1983 field = self._parse_field() 1984 if isinstance(field, exp.Identifier) and not field.quoted: 1985 field = exp.var(field) 1986 1987 return field 1988 1989 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1990 self._match(TokenType.EQ) 1991 self._match(TokenType.ALIAS) 1992 1993 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1994 1995 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1996 properties = [] 1997 while True: 1998 if before: 1999 prop = self._parse_property_before() 2000 else: 2001 prop = self._parse_property() 2002 if not prop: 2003 break 2004 for p in ensure_list(prop): 2005 properties.append(p) 2006 2007 if properties: 2008 return self.expression(exp.Properties, expressions=properties) 2009 2010 return None 2011 2012 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2013 return self.expression( 2014 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2015 ) 2016 2017 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2018 if self._index >= 2: 2019 pre_volatile_token = self._tokens[self._index - 2] 2020 else: 2021 pre_volatile_token = None 2022 2023 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2024 return exp.VolatileProperty() 2025 2026 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2027 2028 def _parse_retention_period(self) -> exp.Var: 2029 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2030 number = self._parse_number() 2031 number_str = f"{number} " if number else "" 2032 unit = self._parse_var(any_token=True) 2033 return exp.var(f"{number_str}{unit}") 2034 2035 def _parse_system_versioning_property( 2036 self, with_: bool = False 2037 ) -> exp.WithSystemVersioningProperty: 2038 self._match(TokenType.EQ) 2039 prop = self.expression( 2040 exp.WithSystemVersioningProperty, 2041 **{ # type: ignore 2042 "on": True, 2043 "with": with_, 2044 }, 2045 ) 2046 2047 if self._match_text_seq("OFF"): 2048 prop.set("on", False) 2049 return prop 2050 2051 self._match(TokenType.ON) 2052 if self._match(TokenType.L_PAREN): 2053 while self._curr and not self._match(TokenType.R_PAREN): 2054 if self._match_text_seq("HISTORY_TABLE", "="): 2055 prop.set("this", self._parse_table_parts()) 2056 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2057 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2058 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2059 prop.set("retention_period", self._parse_retention_period()) 2060 2061 self._match(TokenType.COMMA) 2062 2063 return prop 2064 2065 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2066 self._match(TokenType.EQ) 2067 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2068 prop = self.expression(exp.DataDeletionProperty, on=on) 2069 2070 if self._match(TokenType.L_PAREN): 2071 while self._curr and not self._match(TokenType.R_PAREN): 2072 if self._match_text_seq("FILTER_COLUMN", "="): 2073 prop.set("filter_column", self._parse_column()) 2074 elif self._match_text_seq("RETENTION_PERIOD", "="): 2075 prop.set("retention_period", self._parse_retention_period()) 2076 2077 self._match(TokenType.COMMA) 2078 2079 return prop 2080 2081 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2082 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2083 prop = self._parse_system_versioning_property(with_=True) 2084 self._match_r_paren() 2085 return prop 2086 2087 if self._match(TokenType.L_PAREN, advance=False): 2088 return self._parse_wrapped_properties() 2089 2090 if self._match_text_seq("JOURNAL"): 2091 return self._parse_withjournaltable() 2092 2093 if self._match_texts(self.VIEW_ATTRIBUTES): 2094 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2095 2096 if self._match_text_seq("DATA"): 2097 return self._parse_withdata(no=False) 2098 elif self._match_text_seq("NO", "DATA"): 2099 return self._parse_withdata(no=True) 2100 2101 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2102 return self._parse_serde_properties(with_=True) 2103 2104 if self._match(TokenType.SCHEMA): 2105 return self.expression( 2106 exp.WithSchemaBindingProperty, 2107 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2108 ) 2109 2110 if not self._next: 2111 return None 2112 2113 return self._parse_withisolatedloading() 2114 2115 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2116 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2117 self._match(TokenType.EQ) 2118 2119 user = self._parse_id_var() 2120 self._match(TokenType.PARAMETER) 2121 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2122 2123 if not user or not host: 2124 return None 2125 2126 return exp.DefinerProperty(this=f"{user}@{host}") 2127 2128 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2129 self._match(TokenType.TABLE) 2130 self._match(TokenType.EQ) 2131 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2132 2133 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2134 return self.expression(exp.LogProperty, no=no) 2135 2136 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2137 return self.expression(exp.JournalProperty, **kwargs) 2138 2139 def _parse_checksum(self) -> exp.ChecksumProperty: 2140 self._match(TokenType.EQ) 2141 2142 on = None 2143 if self._match(TokenType.ON): 2144 on = True 2145 elif self._match_text_seq("OFF"): 2146 on = False 2147 2148 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2149 2150 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2151 return self.expression( 2152 exp.Cluster, 2153 expressions=( 2154 self._parse_wrapped_csv(self._parse_ordered) 2155 if wrapped 2156 else self._parse_csv(self._parse_ordered) 2157 ), 2158 ) 2159 2160 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2161 self._match_text_seq("BY") 2162 2163 self._match_l_paren() 2164 expressions = self._parse_csv(self._parse_column) 2165 self._match_r_paren() 2166 2167 if self._match_text_seq("SORTED", "BY"): 2168 self._match_l_paren() 2169 sorted_by = self._parse_csv(self._parse_ordered) 2170 self._match_r_paren() 2171 else: 2172 sorted_by = None 2173 2174 self._match(TokenType.INTO) 2175 buckets = self._parse_number() 2176 self._match_text_seq("BUCKETS") 2177 2178 return self.expression( 2179 exp.ClusteredByProperty, 2180 expressions=expressions, 2181 sorted_by=sorted_by, 2182 buckets=buckets, 2183 ) 2184 2185 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2186 if not self._match_text_seq("GRANTS"): 2187 self._retreat(self._index - 1) 2188 return None 2189 2190 return self.expression(exp.CopyGrantsProperty) 2191 2192 def _parse_freespace(self) -> exp.FreespaceProperty: 2193 self._match(TokenType.EQ) 2194 return self.expression( 2195 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2196 ) 2197 2198 def _parse_mergeblockratio( 2199 self, no: bool = False, default: bool = False 2200 ) -> exp.MergeBlockRatioProperty: 2201 if self._match(TokenType.EQ): 2202 return self.expression( 2203 exp.MergeBlockRatioProperty, 2204 this=self._parse_number(), 2205 percent=self._match(TokenType.PERCENT), 2206 ) 2207 2208 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2209 2210 def _parse_datablocksize( 2211 self, 2212 default: t.Optional[bool] = None, 2213 minimum: t.Optional[bool] = None, 2214 maximum: t.Optional[bool] = None, 2215 ) -> exp.DataBlocksizeProperty: 2216 self._match(TokenType.EQ) 2217 size = self._parse_number() 2218 2219 units = None 2220 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2221 units = self._prev.text 2222 2223 return self.expression( 2224 exp.DataBlocksizeProperty, 2225 size=size, 2226 units=units, 2227 default=default, 2228 minimum=minimum, 2229 maximum=maximum, 2230 ) 2231 2232 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2233 self._match(TokenType.EQ) 2234 always = self._match_text_seq("ALWAYS") 2235 manual = self._match_text_seq("MANUAL") 2236 never = self._match_text_seq("NEVER") 2237 default = self._match_text_seq("DEFAULT") 2238 2239 autotemp = None 2240 if self._match_text_seq("AUTOTEMP"): 2241 autotemp = self._parse_schema() 2242 2243 return self.expression( 2244 exp.BlockCompressionProperty, 2245 always=always, 2246 manual=manual, 2247 never=never, 2248 default=default, 2249 autotemp=autotemp, 2250 ) 2251 2252 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2253 index = self._index 2254 no = self._match_text_seq("NO") 2255 concurrent = self._match_text_seq("CONCURRENT") 2256 2257 if not self._match_text_seq("ISOLATED", "LOADING"): 2258 self._retreat(index) 2259 return None 2260 2261 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2262 return self.expression( 2263 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2264 ) 2265 2266 def _parse_locking(self) -> exp.LockingProperty: 2267 if self._match(TokenType.TABLE): 2268 kind = "TABLE" 2269 elif self._match(TokenType.VIEW): 2270 kind = "VIEW" 2271 elif self._match(TokenType.ROW): 2272 kind = "ROW" 2273 elif self._match_text_seq("DATABASE"): 2274 kind = "DATABASE" 2275 else: 2276 kind = None 2277 2278 if kind in ("DATABASE", "TABLE", "VIEW"): 2279 this = self._parse_table_parts() 2280 else: 2281 this = None 2282 2283 if self._match(TokenType.FOR): 2284 for_or_in = "FOR" 2285 elif self._match(TokenType.IN): 2286 for_or_in = "IN" 2287 else: 2288 for_or_in = None 2289 2290 if self._match_text_seq("ACCESS"): 2291 lock_type = "ACCESS" 2292 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2293 lock_type = "EXCLUSIVE" 2294 elif self._match_text_seq("SHARE"): 2295 lock_type = "SHARE" 2296 elif self._match_text_seq("READ"): 2297 lock_type = "READ" 2298 elif self._match_text_seq("WRITE"): 2299 lock_type = "WRITE" 2300 elif self._match_text_seq("CHECKSUM"): 2301 lock_type = "CHECKSUM" 2302 else: 2303 lock_type = None 2304 2305 override = self._match_text_seq("OVERRIDE") 2306 2307 return self.expression( 2308 exp.LockingProperty, 2309 this=this, 2310 kind=kind, 2311 for_or_in=for_or_in, 2312 lock_type=lock_type, 2313 override=override, 2314 ) 2315 2316 def _parse_partition_by(self) -> t.List[exp.Expression]: 2317 if self._match(TokenType.PARTITION_BY): 2318 return self._parse_csv(self._parse_assignment) 2319 return [] 2320 2321 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2322 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2323 if self._match_text_seq("MINVALUE"): 2324 return exp.var("MINVALUE") 2325 if self._match_text_seq("MAXVALUE"): 2326 return exp.var("MAXVALUE") 2327 return self._parse_bitwise() 2328 2329 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2330 expression = None 2331 from_expressions = None 2332 to_expressions = None 2333 2334 if self._match(TokenType.IN): 2335 this = self._parse_wrapped_csv(self._parse_bitwise) 2336 elif self._match(TokenType.FROM): 2337 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2338 self._match_text_seq("TO") 2339 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2340 elif self._match_text_seq("WITH", "(", "MODULUS"): 2341 this = self._parse_number() 2342 self._match_text_seq(",", "REMAINDER") 2343 expression = self._parse_number() 2344 self._match_r_paren() 2345 else: 2346 self.raise_error("Failed to parse partition bound spec.") 2347 2348 return self.expression( 2349 exp.PartitionBoundSpec, 2350 this=this, 2351 expression=expression, 2352 from_expressions=from_expressions, 2353 to_expressions=to_expressions, 2354 ) 2355 2356 # https://www.postgresql.org/docs/current/sql-createtable.html 2357 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2358 if not self._match_text_seq("OF"): 2359 self._retreat(self._index - 1) 2360 return None 2361 2362 this = self._parse_table(schema=True) 2363 2364 if self._match(TokenType.DEFAULT): 2365 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2366 elif self._match_text_seq("FOR", "VALUES"): 2367 expression = self._parse_partition_bound_spec() 2368 else: 2369 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2370 2371 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2372 2373 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2374 self._match(TokenType.EQ) 2375 return self.expression( 2376 exp.PartitionedByProperty, 2377 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2378 ) 2379 2380 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2381 if self._match_text_seq("AND", "STATISTICS"): 2382 statistics = True 2383 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2384 statistics = False 2385 else: 2386 statistics = None 2387 2388 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2389 2390 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2391 if self._match_text_seq("SQL"): 2392 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2393 return None 2394 2395 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2396 if self._match_text_seq("SQL", "DATA"): 2397 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2398 return None 2399 2400 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2401 if self._match_text_seq("PRIMARY", "INDEX"): 2402 return exp.NoPrimaryIndexProperty() 2403 if self._match_text_seq("SQL"): 2404 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2405 return None 2406 2407 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2408 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2409 return exp.OnCommitProperty() 2410 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2411 return exp.OnCommitProperty(delete=True) 2412 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2413 2414 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2415 if self._match_text_seq("SQL", "DATA"): 2416 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2417 return None 2418 2419 def _parse_distkey(self) -> exp.DistKeyProperty: 2420 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2421 2422 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2423 table = self._parse_table(schema=True) 2424 2425 options = [] 2426 while self._match_texts(("INCLUDING", "EXCLUDING")): 2427 this = self._prev.text.upper() 2428 2429 id_var = self._parse_id_var() 2430 if not id_var: 2431 return None 2432 2433 options.append( 2434 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2435 ) 2436 2437 return self.expression(exp.LikeProperty, this=table, expressions=options) 2438 2439 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2440 return self.expression( 2441 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2442 ) 2443 2444 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2445 self._match(TokenType.EQ) 2446 return self.expression( 2447 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2448 ) 2449 2450 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2451 self._match_text_seq("WITH", "CONNECTION") 2452 return self.expression( 2453 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2454 ) 2455 2456 def _parse_returns(self) -> exp.ReturnsProperty: 2457 value: t.Optional[exp.Expression] 2458 null = None 2459 is_table = self._match(TokenType.TABLE) 2460 2461 if is_table: 2462 if self._match(TokenType.LT): 2463 value = self.expression( 2464 exp.Schema, 2465 this="TABLE", 2466 expressions=self._parse_csv(self._parse_struct_types), 2467 ) 2468 if not self._match(TokenType.GT): 2469 self.raise_error("Expecting >") 2470 else: 2471 value = self._parse_schema(exp.var("TABLE")) 2472 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2473 null = True 2474 value = None 2475 else: 2476 value = self._parse_types() 2477 2478 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2479 2480 def _parse_describe(self) -> exp.Describe: 2481 kind = self._match_set(self.CREATABLES) and self._prev.text 2482 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2483 if self._match(TokenType.DOT): 2484 style = None 2485 self._retreat(self._index - 2) 2486 this = self._parse_table(schema=True) 2487 properties = self._parse_properties() 2488 expressions = properties.expressions if properties else None 2489 return self.expression( 2490 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2491 ) 2492 2493 def _parse_insert(self) -> exp.Insert: 2494 comments = ensure_list(self._prev_comments) 2495 hint = self._parse_hint() 2496 overwrite = self._match(TokenType.OVERWRITE) 2497 ignore = self._match(TokenType.IGNORE) 2498 local = self._match_text_seq("LOCAL") 2499 alternative = None 2500 is_function = None 2501 2502 if self._match_text_seq("DIRECTORY"): 2503 this: t.Optional[exp.Expression] = self.expression( 2504 exp.Directory, 2505 this=self._parse_var_or_string(), 2506 local=local, 2507 row_format=self._parse_row_format(match_row=True), 2508 ) 2509 else: 2510 if self._match(TokenType.OR): 2511 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2512 2513 self._match(TokenType.INTO) 2514 comments += ensure_list(self._prev_comments) 2515 self._match(TokenType.TABLE) 2516 is_function = self._match(TokenType.FUNCTION) 2517 2518 this = ( 2519 self._parse_table(schema=True, parse_partition=True) 2520 if not is_function 2521 else self._parse_function() 2522 ) 2523 2524 returning = self._parse_returning() 2525 2526 return self.expression( 2527 exp.Insert, 2528 comments=comments, 2529 hint=hint, 2530 is_function=is_function, 2531 this=this, 2532 stored=self._match_text_seq("STORED") and self._parse_stored(), 2533 by_name=self._match_text_seq("BY", "NAME"), 2534 exists=self._parse_exists(), 2535 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2536 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2537 conflict=self._parse_on_conflict(), 2538 returning=returning or self._parse_returning(), 2539 overwrite=overwrite, 2540 alternative=alternative, 2541 ignore=ignore, 2542 ) 2543 2544 def _parse_kill(self) -> exp.Kill: 2545 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2546 2547 return self.expression( 2548 exp.Kill, 2549 this=self._parse_primary(), 2550 kind=kind, 2551 ) 2552 2553 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2554 conflict = self._match_text_seq("ON", "CONFLICT") 2555 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2556 2557 if not conflict and not duplicate: 2558 return None 2559 2560 conflict_keys = None 2561 constraint = None 2562 2563 if conflict: 2564 if self._match_text_seq("ON", "CONSTRAINT"): 2565 constraint = self._parse_id_var() 2566 elif self._match(TokenType.L_PAREN): 2567 conflict_keys = self._parse_csv(self._parse_id_var) 2568 self._match_r_paren() 2569 2570 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2571 if self._prev.token_type == TokenType.UPDATE: 2572 self._match(TokenType.SET) 2573 expressions = self._parse_csv(self._parse_equality) 2574 else: 2575 expressions = None 2576 2577 return self.expression( 2578 exp.OnConflict, 2579 duplicate=duplicate, 2580 expressions=expressions, 2581 action=action, 2582 conflict_keys=conflict_keys, 2583 constraint=constraint, 2584 ) 2585 2586 def _parse_returning(self) -> t.Optional[exp.Returning]: 2587 if not self._match(TokenType.RETURNING): 2588 return None 2589 return self.expression( 2590 exp.Returning, 2591 expressions=self._parse_csv(self._parse_expression), 2592 into=self._match(TokenType.INTO) and self._parse_table_part(), 2593 ) 2594 2595 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2596 if not self._match(TokenType.FORMAT): 2597 return None 2598 return self._parse_row_format() 2599 2600 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2601 index = self._index 2602 with_ = with_ or self._match_text_seq("WITH") 2603 2604 if not self._match(TokenType.SERDE_PROPERTIES): 2605 self._retreat(index) 2606 return None 2607 return self.expression( 2608 exp.SerdeProperties, 2609 **{ # type: ignore 2610 "expressions": self._parse_wrapped_properties(), 2611 "with": with_, 2612 }, 2613 ) 2614 2615 def _parse_row_format( 2616 self, match_row: bool = False 2617 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2618 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2619 return None 2620 2621 if self._match_text_seq("SERDE"): 2622 this = self._parse_string() 2623 2624 serde_properties = self._parse_serde_properties() 2625 2626 return self.expression( 2627 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2628 ) 2629 2630 self._match_text_seq("DELIMITED") 2631 2632 kwargs = {} 2633 2634 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2635 kwargs["fields"] = self._parse_string() 2636 if self._match_text_seq("ESCAPED", "BY"): 2637 kwargs["escaped"] = self._parse_string() 2638 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2639 kwargs["collection_items"] = self._parse_string() 2640 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2641 kwargs["map_keys"] = self._parse_string() 2642 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2643 kwargs["lines"] = self._parse_string() 2644 if self._match_text_seq("NULL", "DEFINED", "AS"): 2645 kwargs["null"] = self._parse_string() 2646 2647 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2648 2649 def _parse_load(self) -> exp.LoadData | exp.Command: 2650 if self._match_text_seq("DATA"): 2651 local = self._match_text_seq("LOCAL") 2652 self._match_text_seq("INPATH") 2653 inpath = self._parse_string() 2654 overwrite = self._match(TokenType.OVERWRITE) 2655 self._match_pair(TokenType.INTO, TokenType.TABLE) 2656 2657 return self.expression( 2658 exp.LoadData, 2659 this=self._parse_table(schema=True), 2660 local=local, 2661 overwrite=overwrite, 2662 inpath=inpath, 2663 partition=self._parse_partition(), 2664 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2665 serde=self._match_text_seq("SERDE") and self._parse_string(), 2666 ) 2667 return self._parse_as_command(self._prev) 2668 2669 def _parse_delete(self) -> exp.Delete: 2670 # This handles MySQL's "Multiple-Table Syntax" 2671 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2672 tables = None 2673 comments = self._prev_comments 2674 if not self._match(TokenType.FROM, advance=False): 2675 tables = self._parse_csv(self._parse_table) or None 2676 2677 returning = self._parse_returning() 2678 2679 return self.expression( 2680 exp.Delete, 2681 comments=comments, 2682 tables=tables, 2683 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2684 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2685 where=self._parse_where(), 2686 returning=returning or self._parse_returning(), 2687 limit=self._parse_limit(), 2688 ) 2689 2690 def _parse_update(self) -> exp.Update: 2691 comments = self._prev_comments 2692 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2693 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2694 returning = self._parse_returning() 2695 return self.expression( 2696 exp.Update, 2697 comments=comments, 2698 **{ # type: ignore 2699 "this": this, 2700 "expressions": expressions, 2701 "from": self._parse_from(joins=True), 2702 "where": self._parse_where(), 2703 "returning": returning or self._parse_returning(), 2704 "order": self._parse_order(), 2705 "limit": self._parse_limit(), 2706 }, 2707 ) 2708 2709 def _parse_uncache(self) -> exp.Uncache: 2710 if not self._match(TokenType.TABLE): 2711 self.raise_error("Expecting TABLE after UNCACHE") 2712 2713 return self.expression( 2714 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2715 ) 2716 2717 def _parse_cache(self) -> exp.Cache: 2718 lazy = self._match_text_seq("LAZY") 2719 self._match(TokenType.TABLE) 2720 table = self._parse_table(schema=True) 2721 2722 options = [] 2723 if self._match_text_seq("OPTIONS"): 2724 self._match_l_paren() 2725 k = self._parse_string() 2726 self._match(TokenType.EQ) 2727 v = self._parse_string() 2728 options = [k, v] 2729 self._match_r_paren() 2730 2731 self._match(TokenType.ALIAS) 2732 return self.expression( 2733 exp.Cache, 2734 this=table, 2735 lazy=lazy, 2736 options=options, 2737 expression=self._parse_select(nested=True), 2738 ) 2739 2740 def _parse_partition(self) -> t.Optional[exp.Partition]: 2741 if not self._match(TokenType.PARTITION): 2742 return None 2743 2744 return self.expression( 2745 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2746 ) 2747 2748 def _parse_value(self) -> t.Optional[exp.Tuple]: 2749 if self._match(TokenType.L_PAREN): 2750 expressions = self._parse_csv(self._parse_expression) 2751 self._match_r_paren() 2752 return self.expression(exp.Tuple, expressions=expressions) 2753 2754 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2755 expression = self._parse_expression() 2756 if expression: 2757 return self.expression(exp.Tuple, expressions=[expression]) 2758 return None 2759 2760 def _parse_projections(self) -> t.List[exp.Expression]: 2761 return self._parse_expressions() 2762 2763 def _parse_select( 2764 self, 2765 nested: bool = False, 2766 table: bool = False, 2767 parse_subquery_alias: bool = True, 2768 parse_set_operation: bool = True, 2769 ) -> t.Optional[exp.Expression]: 2770 cte = self._parse_with() 2771 2772 if cte: 2773 this = self._parse_statement() 2774 2775 if not this: 2776 self.raise_error("Failed to parse any statement following CTE") 2777 return cte 2778 2779 if "with" in this.arg_types: 2780 this.set("with", cte) 2781 else: 2782 self.raise_error(f"{this.key} does not support CTE") 2783 this = cte 2784 2785 return this 2786 2787 # duckdb supports leading with FROM x 2788 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2789 2790 if self._match(TokenType.SELECT): 2791 comments = self._prev_comments 2792 2793 hint = self._parse_hint() 2794 2795 if self._next and not self._next.token_type == TokenType.DOT: 2796 all_ = self._match(TokenType.ALL) 2797 distinct = self._match_set(self.DISTINCT_TOKENS) 2798 else: 2799 all_, distinct = None, None 2800 2801 kind = ( 2802 self._match(TokenType.ALIAS) 2803 and self._match_texts(("STRUCT", "VALUE")) 2804 and self._prev.text.upper() 2805 ) 2806 2807 if distinct: 2808 distinct = self.expression( 2809 exp.Distinct, 2810 on=self._parse_value() if self._match(TokenType.ON) else None, 2811 ) 2812 2813 if all_ and distinct: 2814 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2815 2816 limit = self._parse_limit(top=True) 2817 projections = self._parse_projections() 2818 2819 this = self.expression( 2820 exp.Select, 2821 kind=kind, 2822 hint=hint, 2823 distinct=distinct, 2824 expressions=projections, 2825 limit=limit, 2826 ) 2827 this.comments = comments 2828 2829 into = self._parse_into() 2830 if into: 2831 this.set("into", into) 2832 2833 if not from_: 2834 from_ = self._parse_from() 2835 2836 if from_: 2837 this.set("from", from_) 2838 2839 this = self._parse_query_modifiers(this) 2840 elif (table or nested) and self._match(TokenType.L_PAREN): 2841 if self._match(TokenType.PIVOT): 2842 this = self._parse_simplified_pivot() 2843 elif self._match(TokenType.FROM): 2844 this = exp.select("*").from_( 2845 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2846 ) 2847 else: 2848 this = ( 2849 self._parse_table() 2850 if table 2851 else self._parse_select(nested=True, parse_set_operation=False) 2852 ) 2853 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2854 2855 self._match_r_paren() 2856 2857 # We return early here so that the UNION isn't attached to the subquery by the 2858 # following call to _parse_set_operations, but instead becomes the parent node 2859 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2860 elif self._match(TokenType.VALUES, advance=False): 2861 this = self._parse_derived_table_values() 2862 elif from_: 2863 this = exp.select("*").from_(from_.this, copy=False) 2864 elif self._match(TokenType.SUMMARIZE): 2865 table = self._match(TokenType.TABLE) 2866 this = self._parse_select() or self._parse_string() or self._parse_table() 2867 return self.expression(exp.Summarize, this=this, table=table) 2868 elif self._match(TokenType.DESCRIBE): 2869 this = self._parse_describe() 2870 elif self._match_text_seq("STREAM"): 2871 this = self.expression(exp.Stream, this=self._parse_function()) 2872 else: 2873 this = None 2874 2875 return self._parse_set_operations(this) if parse_set_operation else this 2876 2877 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2878 if not skip_with_token and not self._match(TokenType.WITH): 2879 return None 2880 2881 comments = self._prev_comments 2882 recursive = self._match(TokenType.RECURSIVE) 2883 2884 expressions = [] 2885 while True: 2886 expressions.append(self._parse_cte()) 2887 2888 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2889 break 2890 else: 2891 self._match(TokenType.WITH) 2892 2893 return self.expression( 2894 exp.With, comments=comments, expressions=expressions, recursive=recursive 2895 ) 2896 2897 def _parse_cte(self) -> exp.CTE: 2898 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2899 if not alias or not alias.this: 2900 self.raise_error("Expected CTE to have alias") 2901 2902 self._match(TokenType.ALIAS) 2903 comments = self._prev_comments 2904 2905 if self._match_text_seq("NOT", "MATERIALIZED"): 2906 materialized = False 2907 elif self._match_text_seq("MATERIALIZED"): 2908 materialized = True 2909 else: 2910 materialized = None 2911 2912 return self.expression( 2913 exp.CTE, 2914 this=self._parse_wrapped(self._parse_statement), 2915 alias=alias, 2916 materialized=materialized, 2917 comments=comments, 2918 ) 2919 2920 def _parse_table_alias( 2921 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2922 ) -> t.Optional[exp.TableAlias]: 2923 any_token = self._match(TokenType.ALIAS) 2924 alias = ( 2925 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2926 or self._parse_string_as_identifier() 2927 ) 2928 2929 index = self._index 2930 if self._match(TokenType.L_PAREN): 2931 columns = self._parse_csv(self._parse_function_parameter) 2932 self._match_r_paren() if columns else self._retreat(index) 2933 else: 2934 columns = None 2935 2936 if not alias and not columns: 2937 return None 2938 2939 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2940 2941 # We bubble up comments from the Identifier to the TableAlias 2942 if isinstance(alias, exp.Identifier): 2943 table_alias.add_comments(alias.pop_comments()) 2944 2945 return table_alias 2946 2947 def _parse_subquery( 2948 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2949 ) -> t.Optional[exp.Subquery]: 2950 if not this: 2951 return None 2952 2953 return self.expression( 2954 exp.Subquery, 2955 this=this, 2956 pivots=self._parse_pivots(), 2957 alias=self._parse_table_alias() if parse_alias else None, 2958 ) 2959 2960 def _implicit_unnests_to_explicit(self, this: E) -> E: 2961 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2962 2963 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2964 for i, join in enumerate(this.args.get("joins") or []): 2965 table = join.this 2966 normalized_table = table.copy() 2967 normalized_table.meta["maybe_column"] = True 2968 normalized_table = _norm(normalized_table, dialect=self.dialect) 2969 2970 if isinstance(table, exp.Table) and not join.args.get("on"): 2971 if normalized_table.parts[0].name in refs: 2972 table_as_column = table.to_column() 2973 unnest = exp.Unnest(expressions=[table_as_column]) 2974 2975 # Table.to_column creates a parent Alias node that we want to convert to 2976 # a TableAlias and attach to the Unnest, so it matches the parser's output 2977 if isinstance(table.args.get("alias"), exp.TableAlias): 2978 table_as_column.replace(table_as_column.this) 2979 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2980 2981 table.replace(unnest) 2982 2983 refs.add(normalized_table.alias_or_name) 2984 2985 return this 2986 2987 def _parse_query_modifiers( 2988 self, this: t.Optional[exp.Expression] 2989 ) -> t.Optional[exp.Expression]: 2990 if isinstance(this, (exp.Query, exp.Table)): 2991 for join in self._parse_joins(): 2992 this.append("joins", join) 2993 for lateral in iter(self._parse_lateral, None): 2994 this.append("laterals", lateral) 2995 2996 while True: 2997 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2998 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2999 key, expression = parser(self) 3000 3001 if expression: 3002 this.set(key, expression) 3003 if key == "limit": 3004 offset = expression.args.pop("offset", None) 3005 3006 if offset: 3007 offset = exp.Offset(expression=offset) 3008 this.set("offset", offset) 3009 3010 limit_by_expressions = expression.expressions 3011 expression.set("expressions", None) 3012 offset.set("expressions", limit_by_expressions) 3013 continue 3014 break 3015 3016 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3017 this = self._implicit_unnests_to_explicit(this) 3018 3019 return this 3020 3021 def _parse_hint(self) -> t.Optional[exp.Hint]: 3022 if self._match(TokenType.HINT): 3023 hints = [] 3024 for hint in iter( 3025 lambda: self._parse_csv( 3026 lambda: self._parse_function() or self._parse_var(upper=True) 3027 ), 3028 [], 3029 ): 3030 hints.extend(hint) 3031 3032 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3033 self.raise_error("Expected */ after HINT") 3034 3035 return self.expression(exp.Hint, expressions=hints) 3036 3037 return None 3038 3039 def _parse_into(self) -> t.Optional[exp.Into]: 3040 if not self._match(TokenType.INTO): 3041 return None 3042 3043 temp = self._match(TokenType.TEMPORARY) 3044 unlogged = self._match_text_seq("UNLOGGED") 3045 self._match(TokenType.TABLE) 3046 3047 return self.expression( 3048 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3049 ) 3050 3051 def _parse_from( 3052 self, joins: bool = False, skip_from_token: bool = False 3053 ) -> t.Optional[exp.From]: 3054 if not skip_from_token and not self._match(TokenType.FROM): 3055 return None 3056 3057 return self.expression( 3058 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3059 ) 3060 3061 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3062 return self.expression( 3063 exp.MatchRecognizeMeasure, 3064 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3065 this=self._parse_expression(), 3066 ) 3067 3068 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3069 if not self._match(TokenType.MATCH_RECOGNIZE): 3070 return None 3071 3072 self._match_l_paren() 3073 3074 partition = self._parse_partition_by() 3075 order = self._parse_order() 3076 3077 measures = ( 3078 self._parse_csv(self._parse_match_recognize_measure) 3079 if self._match_text_seq("MEASURES") 3080 else None 3081 ) 3082 3083 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3084 rows = exp.var("ONE ROW PER MATCH") 3085 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3086 text = "ALL ROWS PER MATCH" 3087 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3088 text += " SHOW EMPTY MATCHES" 3089 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3090 text += " OMIT EMPTY MATCHES" 3091 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3092 text += " WITH UNMATCHED ROWS" 3093 rows = exp.var(text) 3094 else: 3095 rows = None 3096 3097 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3098 text = "AFTER MATCH SKIP" 3099 if self._match_text_seq("PAST", "LAST", "ROW"): 3100 text += " PAST LAST ROW" 3101 elif self._match_text_seq("TO", "NEXT", "ROW"): 3102 text += " TO NEXT ROW" 3103 elif self._match_text_seq("TO", "FIRST"): 3104 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3105 elif self._match_text_seq("TO", "LAST"): 3106 text += f" TO LAST {self._advance_any().text}" # type: ignore 3107 after = exp.var(text) 3108 else: 3109 after = None 3110 3111 if self._match_text_seq("PATTERN"): 3112 self._match_l_paren() 3113 3114 if not self._curr: 3115 self.raise_error("Expecting )", self._curr) 3116 3117 paren = 1 3118 start = self._curr 3119 3120 while self._curr and paren > 0: 3121 if self._curr.token_type == TokenType.L_PAREN: 3122 paren += 1 3123 if self._curr.token_type == TokenType.R_PAREN: 3124 paren -= 1 3125 3126 end = self._prev 3127 self._advance() 3128 3129 if paren > 0: 3130 self.raise_error("Expecting )", self._curr) 3131 3132 pattern = exp.var(self._find_sql(start, end)) 3133 else: 3134 pattern = None 3135 3136 define = ( 3137 self._parse_csv(self._parse_name_as_expression) 3138 if self._match_text_seq("DEFINE") 3139 else None 3140 ) 3141 3142 self._match_r_paren() 3143 3144 return self.expression( 3145 exp.MatchRecognize, 3146 partition_by=partition, 3147 order=order, 3148 measures=measures, 3149 rows=rows, 3150 after=after, 3151 pattern=pattern, 3152 define=define, 3153 alias=self._parse_table_alias(), 3154 ) 3155 3156 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3157 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3158 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3159 cross_apply = False 3160 3161 if cross_apply is not None: 3162 this = self._parse_select(table=True) 3163 view = None 3164 outer = None 3165 elif self._match(TokenType.LATERAL): 3166 this = self._parse_select(table=True) 3167 view = self._match(TokenType.VIEW) 3168 outer = self._match(TokenType.OUTER) 3169 else: 3170 return None 3171 3172 if not this: 3173 this = ( 3174 self._parse_unnest() 3175 or self._parse_function() 3176 or self._parse_id_var(any_token=False) 3177 ) 3178 3179 while self._match(TokenType.DOT): 3180 this = exp.Dot( 3181 this=this, 3182 expression=self._parse_function() or self._parse_id_var(any_token=False), 3183 ) 3184 3185 if view: 3186 table = self._parse_id_var(any_token=False) 3187 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3188 table_alias: t.Optional[exp.TableAlias] = self.expression( 3189 exp.TableAlias, this=table, columns=columns 3190 ) 3191 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3192 # We move the alias from the lateral's child node to the lateral itself 3193 table_alias = this.args["alias"].pop() 3194 else: 3195 table_alias = self._parse_table_alias() 3196 3197 return self.expression( 3198 exp.Lateral, 3199 this=this, 3200 view=view, 3201 outer=outer, 3202 alias=table_alias, 3203 cross_apply=cross_apply, 3204 ) 3205 3206 def _parse_join_parts( 3207 self, 3208 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3209 return ( 3210 self._match_set(self.JOIN_METHODS) and self._prev, 3211 self._match_set(self.JOIN_SIDES) and self._prev, 3212 self._match_set(self.JOIN_KINDS) and self._prev, 3213 ) 3214 3215 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3216 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3217 this = self._parse_column() 3218 if isinstance(this, exp.Column): 3219 return this.this 3220 return this 3221 3222 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3223 3224 def _parse_join( 3225 self, skip_join_token: bool = False, parse_bracket: bool = False 3226 ) -> t.Optional[exp.Join]: 3227 if self._match(TokenType.COMMA): 3228 return self.expression(exp.Join, this=self._parse_table()) 3229 3230 index = self._index 3231 method, side, kind = self._parse_join_parts() 3232 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3233 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3234 3235 if not skip_join_token and not join: 3236 self._retreat(index) 3237 kind = None 3238 method = None 3239 side = None 3240 3241 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3242 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3243 3244 if not skip_join_token and not join and not outer_apply and not cross_apply: 3245 return None 3246 3247 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3248 3249 if method: 3250 kwargs["method"] = method.text 3251 if side: 3252 kwargs["side"] = side.text 3253 if kind: 3254 kwargs["kind"] = kind.text 3255 if hint: 3256 kwargs["hint"] = hint 3257 3258 if self._match(TokenType.MATCH_CONDITION): 3259 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3260 3261 if self._match(TokenType.ON): 3262 kwargs["on"] = self._parse_assignment() 3263 elif self._match(TokenType.USING): 3264 kwargs["using"] = self._parse_using_identifiers() 3265 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3266 kind and kind.token_type == TokenType.CROSS 3267 ): 3268 index = self._index 3269 joins: t.Optional[list] = list(self._parse_joins()) 3270 3271 if joins and self._match(TokenType.ON): 3272 kwargs["on"] = self._parse_assignment() 3273 elif joins and self._match(TokenType.USING): 3274 kwargs["using"] = self._parse_using_identifiers() 3275 else: 3276 joins = None 3277 self._retreat(index) 3278 3279 kwargs["this"].set("joins", joins if joins else None) 3280 3281 comments = [c for token in (method, side, kind) if token for c in token.comments] 3282 return self.expression(exp.Join, comments=comments, **kwargs) 3283 3284 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3285 this = self._parse_assignment() 3286 3287 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3288 return this 3289 3290 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3291 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3292 3293 return this 3294 3295 def _parse_index_params(self) -> exp.IndexParameters: 3296 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3297 3298 if self._match(TokenType.L_PAREN, advance=False): 3299 columns = self._parse_wrapped_csv(self._parse_with_operator) 3300 else: 3301 columns = None 3302 3303 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3304 partition_by = self._parse_partition_by() 3305 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3306 tablespace = ( 3307 self._parse_var(any_token=True) 3308 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3309 else None 3310 ) 3311 where = self._parse_where() 3312 3313 on = self._parse_field() if self._match(TokenType.ON) else None 3314 3315 return self.expression( 3316 exp.IndexParameters, 3317 using=using, 3318 columns=columns, 3319 include=include, 3320 partition_by=partition_by, 3321 where=where, 3322 with_storage=with_storage, 3323 tablespace=tablespace, 3324 on=on, 3325 ) 3326 3327 def _parse_index( 3328 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3329 ) -> t.Optional[exp.Index]: 3330 if index or anonymous: 3331 unique = None 3332 primary = None 3333 amp = None 3334 3335 self._match(TokenType.ON) 3336 self._match(TokenType.TABLE) # hive 3337 table = self._parse_table_parts(schema=True) 3338 else: 3339 unique = self._match(TokenType.UNIQUE) 3340 primary = self._match_text_seq("PRIMARY") 3341 amp = self._match_text_seq("AMP") 3342 3343 if not self._match(TokenType.INDEX): 3344 return None 3345 3346 index = self._parse_id_var() 3347 table = None 3348 3349 params = self._parse_index_params() 3350 3351 return self.expression( 3352 exp.Index, 3353 this=index, 3354 table=table, 3355 unique=unique, 3356 primary=primary, 3357 amp=amp, 3358 params=params, 3359 ) 3360 3361 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3362 hints: t.List[exp.Expression] = [] 3363 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3364 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3365 hints.append( 3366 self.expression( 3367 exp.WithTableHint, 3368 expressions=self._parse_csv( 3369 lambda: self._parse_function() or self._parse_var(any_token=True) 3370 ), 3371 ) 3372 ) 3373 self._match_r_paren() 3374 else: 3375 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3376 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3377 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3378 3379 self._match_set((TokenType.INDEX, TokenType.KEY)) 3380 if self._match(TokenType.FOR): 3381 hint.set("target", self._advance_any() and self._prev.text.upper()) 3382 3383 hint.set("expressions", self._parse_wrapped_id_vars()) 3384 hints.append(hint) 3385 3386 return hints or None 3387 3388 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3389 return ( 3390 (not schema and self._parse_function(optional_parens=False)) 3391 or self._parse_id_var(any_token=False) 3392 or self._parse_string_as_identifier() 3393 or self._parse_placeholder() 3394 ) 3395 3396 def _parse_table_parts( 3397 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3398 ) -> exp.Table: 3399 catalog = None 3400 db = None 3401 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3402 3403 while self._match(TokenType.DOT): 3404 if catalog: 3405 # This allows nesting the table in arbitrarily many dot expressions if needed 3406 table = self.expression( 3407 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3408 ) 3409 else: 3410 catalog = db 3411 db = table 3412 # "" used for tsql FROM a..b case 3413 table = self._parse_table_part(schema=schema) or "" 3414 3415 if ( 3416 wildcard 3417 and self._is_connected() 3418 and (isinstance(table, exp.Identifier) or not table) 3419 and self._match(TokenType.STAR) 3420 ): 3421 if isinstance(table, exp.Identifier): 3422 table.args["this"] += "*" 3423 else: 3424 table = exp.Identifier(this="*") 3425 3426 # We bubble up comments from the Identifier to the Table 3427 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3428 3429 if is_db_reference: 3430 catalog = db 3431 db = table 3432 table = None 3433 3434 if not table and not is_db_reference: 3435 self.raise_error(f"Expected table name but got {self._curr}") 3436 if not db and is_db_reference: 3437 self.raise_error(f"Expected database name but got {self._curr}") 3438 3439 table = self.expression( 3440 exp.Table, 3441 comments=comments, 3442 this=table, 3443 db=db, 3444 catalog=catalog, 3445 ) 3446 3447 changes = self._parse_changes() 3448 if changes: 3449 table.set("changes", changes) 3450 3451 at_before = self._parse_historical_data() 3452 if at_before: 3453 table.set("when", at_before) 3454 3455 pivots = self._parse_pivots() 3456 if pivots: 3457 table.set("pivots", pivots) 3458 3459 return table 3460 3461 def _parse_table( 3462 self, 3463 schema: bool = False, 3464 joins: bool = False, 3465 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3466 parse_bracket: bool = False, 3467 is_db_reference: bool = False, 3468 parse_partition: bool = False, 3469 ) -> t.Optional[exp.Expression]: 3470 lateral = self._parse_lateral() 3471 if lateral: 3472 return lateral 3473 3474 unnest = self._parse_unnest() 3475 if unnest: 3476 return unnest 3477 3478 values = self._parse_derived_table_values() 3479 if values: 3480 return values 3481 3482 subquery = self._parse_select(table=True) 3483 if subquery: 3484 if not subquery.args.get("pivots"): 3485 subquery.set("pivots", self._parse_pivots()) 3486 return subquery 3487 3488 bracket = parse_bracket and self._parse_bracket(None) 3489 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3490 3491 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3492 self._parse_table 3493 ) 3494 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3495 3496 only = self._match(TokenType.ONLY) 3497 3498 this = t.cast( 3499 exp.Expression, 3500 bracket 3501 or rows_from 3502 or self._parse_bracket( 3503 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3504 ), 3505 ) 3506 3507 if only: 3508 this.set("only", only) 3509 3510 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3511 self._match_text_seq("*") 3512 3513 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3514 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3515 this.set("partition", self._parse_partition()) 3516 3517 if schema: 3518 return self._parse_schema(this=this) 3519 3520 version = self._parse_version() 3521 3522 if version: 3523 this.set("version", version) 3524 3525 if self.dialect.ALIAS_POST_TABLESAMPLE: 3526 table_sample = self._parse_table_sample() 3527 3528 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3529 if alias: 3530 this.set("alias", alias) 3531 3532 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3533 return self.expression( 3534 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3535 ) 3536 3537 this.set("hints", self._parse_table_hints()) 3538 3539 if not this.args.get("pivots"): 3540 this.set("pivots", self._parse_pivots()) 3541 3542 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3543 table_sample = self._parse_table_sample() 3544 3545 if table_sample: 3546 table_sample.set("this", this) 3547 this = table_sample 3548 3549 if joins: 3550 for join in self._parse_joins(): 3551 this.append("joins", join) 3552 3553 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3554 this.set("ordinality", True) 3555 this.set("alias", self._parse_table_alias()) 3556 3557 return this 3558 3559 def _parse_version(self) -> t.Optional[exp.Version]: 3560 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3561 this = "TIMESTAMP" 3562 elif self._match(TokenType.VERSION_SNAPSHOT): 3563 this = "VERSION" 3564 else: 3565 return None 3566 3567 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3568 kind = self._prev.text.upper() 3569 start = self._parse_bitwise() 3570 self._match_texts(("TO", "AND")) 3571 end = self._parse_bitwise() 3572 expression: t.Optional[exp.Expression] = self.expression( 3573 exp.Tuple, expressions=[start, end] 3574 ) 3575 elif self._match_text_seq("CONTAINED", "IN"): 3576 kind = "CONTAINED IN" 3577 expression = self.expression( 3578 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3579 ) 3580 elif self._match(TokenType.ALL): 3581 kind = "ALL" 3582 expression = None 3583 else: 3584 self._match_text_seq("AS", "OF") 3585 kind = "AS OF" 3586 expression = self._parse_type() 3587 3588 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3589 3590 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3591 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3592 index = self._index 3593 historical_data = None 3594 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3595 this = self._prev.text.upper() 3596 kind = ( 3597 self._match(TokenType.L_PAREN) 3598 and self._match_texts(self.HISTORICAL_DATA_KIND) 3599 and self._prev.text.upper() 3600 ) 3601 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3602 3603 if expression: 3604 self._match_r_paren() 3605 historical_data = self.expression( 3606 exp.HistoricalData, this=this, kind=kind, expression=expression 3607 ) 3608 else: 3609 self._retreat(index) 3610 3611 return historical_data 3612 3613 def _parse_changes(self) -> t.Optional[exp.Changes]: 3614 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3615 return None 3616 3617 information = self._parse_var(any_token=True) 3618 self._match_r_paren() 3619 3620 return self.expression( 3621 exp.Changes, 3622 information=information, 3623 at_before=self._parse_historical_data(), 3624 end=self._parse_historical_data(), 3625 ) 3626 3627 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3628 if not self._match(TokenType.UNNEST): 3629 return None 3630 3631 expressions = self._parse_wrapped_csv(self._parse_equality) 3632 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3633 3634 alias = self._parse_table_alias() if with_alias else None 3635 3636 if alias: 3637 if self.dialect.UNNEST_COLUMN_ONLY: 3638 if alias.args.get("columns"): 3639 self.raise_error("Unexpected extra column alias in unnest.") 3640 3641 alias.set("columns", [alias.this]) 3642 alias.set("this", None) 3643 3644 columns = alias.args.get("columns") or [] 3645 if offset and len(expressions) < len(columns): 3646 offset = columns.pop() 3647 3648 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3649 self._match(TokenType.ALIAS) 3650 offset = self._parse_id_var( 3651 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3652 ) or exp.to_identifier("offset") 3653 3654 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3655 3656 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3657 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3658 if not is_derived and not self._match_text_seq("VALUES"): 3659 return None 3660 3661 expressions = self._parse_csv(self._parse_value) 3662 alias = self._parse_table_alias() 3663 3664 if is_derived: 3665 self._match_r_paren() 3666 3667 return self.expression( 3668 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3669 ) 3670 3671 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3672 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3673 as_modifier and self._match_text_seq("USING", "SAMPLE") 3674 ): 3675 return None 3676 3677 bucket_numerator = None 3678 bucket_denominator = None 3679 bucket_field = None 3680 percent = None 3681 size = None 3682 seed = None 3683 3684 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3685 matched_l_paren = self._match(TokenType.L_PAREN) 3686 3687 if self.TABLESAMPLE_CSV: 3688 num = None 3689 expressions = self._parse_csv(self._parse_primary) 3690 else: 3691 expressions = None 3692 num = ( 3693 self._parse_factor() 3694 if self._match(TokenType.NUMBER, advance=False) 3695 else self._parse_primary() or self._parse_placeholder() 3696 ) 3697 3698 if self._match_text_seq("BUCKET"): 3699 bucket_numerator = self._parse_number() 3700 self._match_text_seq("OUT", "OF") 3701 bucket_denominator = bucket_denominator = self._parse_number() 3702 self._match(TokenType.ON) 3703 bucket_field = self._parse_field() 3704 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3705 percent = num 3706 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3707 size = num 3708 else: 3709 percent = num 3710 3711 if matched_l_paren: 3712 self._match_r_paren() 3713 3714 if self._match(TokenType.L_PAREN): 3715 method = self._parse_var(upper=True) 3716 seed = self._match(TokenType.COMMA) and self._parse_number() 3717 self._match_r_paren() 3718 elif self._match_texts(("SEED", "REPEATABLE")): 3719 seed = self._parse_wrapped(self._parse_number) 3720 3721 if not method and self.DEFAULT_SAMPLING_METHOD: 3722 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3723 3724 return self.expression( 3725 exp.TableSample, 3726 expressions=expressions, 3727 method=method, 3728 bucket_numerator=bucket_numerator, 3729 bucket_denominator=bucket_denominator, 3730 bucket_field=bucket_field, 3731 percent=percent, 3732 size=size, 3733 seed=seed, 3734 ) 3735 3736 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3737 return list(iter(self._parse_pivot, None)) or None 3738 3739 def _parse_joins(self) -> t.Iterator[exp.Join]: 3740 return iter(self._parse_join, None) 3741 3742 # https://duckdb.org/docs/sql/statements/pivot 3743 def _parse_simplified_pivot(self) -> exp.Pivot: 3744 def _parse_on() -> t.Optional[exp.Expression]: 3745 this = self._parse_bitwise() 3746 return self._parse_in(this) if self._match(TokenType.IN) else this 3747 3748 this = self._parse_table() 3749 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3750 using = self._match(TokenType.USING) and self._parse_csv( 3751 lambda: self._parse_alias(self._parse_function()) 3752 ) 3753 group = self._parse_group() 3754 return self.expression( 3755 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3756 ) 3757 3758 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3759 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3760 this = self._parse_select_or_expression() 3761 3762 self._match(TokenType.ALIAS) 3763 alias = self._parse_field() 3764 if alias: 3765 return self.expression(exp.PivotAlias, this=this, alias=alias) 3766 3767 return this 3768 3769 value = self._parse_column() 3770 3771 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3772 self.raise_error("Expecting IN (") 3773 3774 if self._match(TokenType.ANY): 3775 expr: exp.PivotAny | exp.In = self.expression(exp.PivotAny, this=self._parse_order()) 3776 else: 3777 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3778 expr = self.expression(exp.In, this=value, expressions=aliased_expressions) 3779 3780 self._match_r_paren() 3781 return expr 3782 3783 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3784 index = self._index 3785 include_nulls = None 3786 3787 if self._match(TokenType.PIVOT): 3788 unpivot = False 3789 elif self._match(TokenType.UNPIVOT): 3790 unpivot = True 3791 3792 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3793 if self._match_text_seq("INCLUDE", "NULLS"): 3794 include_nulls = True 3795 elif self._match_text_seq("EXCLUDE", "NULLS"): 3796 include_nulls = False 3797 else: 3798 return None 3799 3800 expressions = [] 3801 3802 if not self._match(TokenType.L_PAREN): 3803 self._retreat(index) 3804 return None 3805 3806 if unpivot: 3807 expressions = self._parse_csv(self._parse_column) 3808 else: 3809 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3810 3811 if not expressions: 3812 self.raise_error("Failed to parse PIVOT's aggregation list") 3813 3814 if not self._match(TokenType.FOR): 3815 self.raise_error("Expecting FOR") 3816 3817 field = self._parse_pivot_in() 3818 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3819 self._parse_bitwise 3820 ) 3821 3822 self._match_r_paren() 3823 3824 pivot = self.expression( 3825 exp.Pivot, 3826 expressions=expressions, 3827 field=field, 3828 unpivot=unpivot, 3829 include_nulls=include_nulls, 3830 default_on_null=default_on_null, 3831 ) 3832 3833 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3834 pivot.set("alias", self._parse_table_alias()) 3835 3836 if not unpivot: 3837 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3838 3839 columns: t.List[exp.Expression] = [] 3840 for fld in pivot.args["field"].expressions: 3841 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3842 for name in names: 3843 if self.PREFIXED_PIVOT_COLUMNS: 3844 name = f"{name}_{field_name}" if name else field_name 3845 else: 3846 name = f"{field_name}_{name}" if name else field_name 3847 3848 columns.append(exp.to_identifier(name)) 3849 3850 pivot.set("columns", columns) 3851 3852 return pivot 3853 3854 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3855 return [agg.alias for agg in aggregations] 3856 3857 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3858 if not skip_where_token and not self._match(TokenType.PREWHERE): 3859 return None 3860 3861 return self.expression( 3862 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3863 ) 3864 3865 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3866 if not skip_where_token and not self._match(TokenType.WHERE): 3867 return None 3868 3869 return self.expression( 3870 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3871 ) 3872 3873 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3874 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3875 return None 3876 3877 elements: t.Dict[str, t.Any] = defaultdict(list) 3878 3879 if self._match(TokenType.ALL): 3880 elements["all"] = True 3881 elif self._match(TokenType.DISTINCT): 3882 elements["all"] = False 3883 3884 while True: 3885 expressions = self._parse_csv( 3886 lambda: None 3887 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 3888 else self._parse_assignment() 3889 ) 3890 if expressions: 3891 elements["expressions"].extend(expressions) 3892 3893 grouping_sets = self._parse_grouping_sets() 3894 if grouping_sets: 3895 elements["grouping_sets"].extend(grouping_sets) 3896 3897 rollup = None 3898 cube = None 3899 totals = None 3900 3901 index = self._index 3902 with_ = self._match(TokenType.WITH) 3903 if self._match(TokenType.ROLLUP): 3904 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3905 elements["rollup"].extend(ensure_list(rollup)) 3906 3907 if self._match(TokenType.CUBE): 3908 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3909 elements["cube"].extend(ensure_list(cube)) 3910 3911 if self._match_text_seq("TOTALS"): 3912 totals = True 3913 elements["totals"] = True # type: ignore 3914 3915 if not (grouping_sets or rollup or cube or totals): 3916 if with_: 3917 self._retreat(index) 3918 break 3919 3920 return self.expression(exp.Group, **elements) # type: ignore 3921 3922 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3923 if not self._match(TokenType.GROUPING_SETS): 3924 return None 3925 3926 return self._parse_wrapped_csv(self._parse_grouping_set) 3927 3928 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3929 if self._match(TokenType.L_PAREN): 3930 grouping_set = self._parse_csv(self._parse_column) 3931 self._match_r_paren() 3932 return self.expression(exp.Tuple, expressions=grouping_set) 3933 3934 return self._parse_column() 3935 3936 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3937 if not skip_having_token and not self._match(TokenType.HAVING): 3938 return None 3939 return self.expression(exp.Having, this=self._parse_assignment()) 3940 3941 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3942 if not self._match(TokenType.QUALIFY): 3943 return None 3944 return self.expression(exp.Qualify, this=self._parse_assignment()) 3945 3946 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3947 if skip_start_token: 3948 start = None 3949 elif self._match(TokenType.START_WITH): 3950 start = self._parse_assignment() 3951 else: 3952 return None 3953 3954 self._match(TokenType.CONNECT_BY) 3955 nocycle = self._match_text_seq("NOCYCLE") 3956 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3957 exp.Prior, this=self._parse_bitwise() 3958 ) 3959 connect = self._parse_assignment() 3960 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3961 3962 if not start and self._match(TokenType.START_WITH): 3963 start = self._parse_assignment() 3964 3965 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3966 3967 def _parse_name_as_expression(self) -> exp.Alias: 3968 return self.expression( 3969 exp.Alias, 3970 alias=self._parse_id_var(any_token=True), 3971 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3972 ) 3973 3974 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3975 if self._match_text_seq("INTERPOLATE"): 3976 return self._parse_wrapped_csv(self._parse_name_as_expression) 3977 return None 3978 3979 def _parse_order( 3980 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3981 ) -> t.Optional[exp.Expression]: 3982 siblings = None 3983 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3984 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3985 return this 3986 3987 siblings = True 3988 3989 return self.expression( 3990 exp.Order, 3991 this=this, 3992 expressions=self._parse_csv(self._parse_ordered), 3993 interpolate=self._parse_interpolate(), 3994 siblings=siblings, 3995 ) 3996 3997 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3998 if not self._match(token): 3999 return None 4000 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4001 4002 def _parse_ordered( 4003 self, parse_method: t.Optional[t.Callable] = None 4004 ) -> t.Optional[exp.Ordered]: 4005 this = parse_method() if parse_method else self._parse_assignment() 4006 if not this: 4007 return None 4008 4009 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4010 this = exp.var("ALL") 4011 4012 asc = self._match(TokenType.ASC) 4013 desc = self._match(TokenType.DESC) or (asc and False) 4014 4015 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4016 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4017 4018 nulls_first = is_nulls_first or False 4019 explicitly_null_ordered = is_nulls_first or is_nulls_last 4020 4021 if ( 4022 not explicitly_null_ordered 4023 and ( 4024 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4025 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4026 ) 4027 and self.dialect.NULL_ORDERING != "nulls_are_last" 4028 ): 4029 nulls_first = True 4030 4031 if self._match_text_seq("WITH", "FILL"): 4032 with_fill = self.expression( 4033 exp.WithFill, 4034 **{ # type: ignore 4035 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4036 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4037 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4038 }, 4039 ) 4040 else: 4041 with_fill = None 4042 4043 return self.expression( 4044 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4045 ) 4046 4047 def _parse_limit( 4048 self, 4049 this: t.Optional[exp.Expression] = None, 4050 top: bool = False, 4051 skip_limit_token: bool = False, 4052 ) -> t.Optional[exp.Expression]: 4053 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4054 comments = self._prev_comments 4055 if top: 4056 limit_paren = self._match(TokenType.L_PAREN) 4057 expression = self._parse_term() if limit_paren else self._parse_number() 4058 4059 if limit_paren: 4060 self._match_r_paren() 4061 else: 4062 expression = self._parse_term() 4063 4064 if self._match(TokenType.COMMA): 4065 offset = expression 4066 expression = self._parse_term() 4067 else: 4068 offset = None 4069 4070 limit_exp = self.expression( 4071 exp.Limit, 4072 this=this, 4073 expression=expression, 4074 offset=offset, 4075 comments=comments, 4076 expressions=self._parse_limit_by(), 4077 ) 4078 4079 return limit_exp 4080 4081 if self._match(TokenType.FETCH): 4082 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4083 direction = self._prev.text.upper() if direction else "FIRST" 4084 4085 count = self._parse_field(tokens=self.FETCH_TOKENS) 4086 percent = self._match(TokenType.PERCENT) 4087 4088 self._match_set((TokenType.ROW, TokenType.ROWS)) 4089 4090 only = self._match_text_seq("ONLY") 4091 with_ties = self._match_text_seq("WITH", "TIES") 4092 4093 if only and with_ties: 4094 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4095 4096 return self.expression( 4097 exp.Fetch, 4098 direction=direction, 4099 count=count, 4100 percent=percent, 4101 with_ties=with_ties, 4102 ) 4103 4104 return this 4105 4106 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4107 if not self._match(TokenType.OFFSET): 4108 return this 4109 4110 count = self._parse_term() 4111 self._match_set((TokenType.ROW, TokenType.ROWS)) 4112 4113 return self.expression( 4114 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4115 ) 4116 4117 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4118 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4119 4120 def _parse_locks(self) -> t.List[exp.Lock]: 4121 locks = [] 4122 while True: 4123 if self._match_text_seq("FOR", "UPDATE"): 4124 update = True 4125 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4126 "LOCK", "IN", "SHARE", "MODE" 4127 ): 4128 update = False 4129 else: 4130 break 4131 4132 expressions = None 4133 if self._match_text_seq("OF"): 4134 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4135 4136 wait: t.Optional[bool | exp.Expression] = None 4137 if self._match_text_seq("NOWAIT"): 4138 wait = True 4139 elif self._match_text_seq("WAIT"): 4140 wait = self._parse_primary() 4141 elif self._match_text_seq("SKIP", "LOCKED"): 4142 wait = False 4143 4144 locks.append( 4145 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4146 ) 4147 4148 return locks 4149 4150 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4151 while this and self._match_set(self.SET_OPERATIONS): 4152 token_type = self._prev.token_type 4153 4154 if token_type == TokenType.UNION: 4155 operation: t.Type[exp.SetOperation] = exp.Union 4156 elif token_type == TokenType.EXCEPT: 4157 operation = exp.Except 4158 else: 4159 operation = exp.Intersect 4160 4161 comments = self._prev.comments 4162 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4163 by_name = self._match_text_seq("BY", "NAME") 4164 expression = self._parse_select(nested=True, parse_set_operation=False) 4165 4166 this = self.expression( 4167 operation, 4168 comments=comments, 4169 this=this, 4170 distinct=distinct, 4171 by_name=by_name, 4172 expression=expression, 4173 ) 4174 4175 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4176 expression = this.expression 4177 4178 if expression: 4179 for arg in self.SET_OP_MODIFIERS: 4180 expr = expression.args.get(arg) 4181 if expr: 4182 this.set(arg, expr.pop()) 4183 4184 return this 4185 4186 def _parse_expression(self) -> t.Optional[exp.Expression]: 4187 return self._parse_alias(self._parse_assignment()) 4188 4189 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4190 this = self._parse_disjunction() 4191 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4192 # This allows us to parse <non-identifier token> := <expr> 4193 this = exp.column( 4194 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4195 ) 4196 4197 while self._match_set(self.ASSIGNMENT): 4198 this = self.expression( 4199 self.ASSIGNMENT[self._prev.token_type], 4200 this=this, 4201 comments=self._prev_comments, 4202 expression=self._parse_assignment(), 4203 ) 4204 4205 return this 4206 4207 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4208 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4209 4210 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4211 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4212 4213 def _parse_equality(self) -> t.Optional[exp.Expression]: 4214 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4215 4216 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4217 return self._parse_tokens(self._parse_range, self.COMPARISON) 4218 4219 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4220 this = this or self._parse_bitwise() 4221 negate = self._match(TokenType.NOT) 4222 4223 if self._match_set(self.RANGE_PARSERS): 4224 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4225 if not expression: 4226 return this 4227 4228 this = expression 4229 elif self._match(TokenType.ISNULL): 4230 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4231 4232 # Postgres supports ISNULL and NOTNULL for conditions. 4233 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4234 if self._match(TokenType.NOTNULL): 4235 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4236 this = self.expression(exp.Not, this=this) 4237 4238 if negate: 4239 this = self.expression(exp.Not, this=this) 4240 4241 if self._match(TokenType.IS): 4242 this = self._parse_is(this) 4243 4244 return this 4245 4246 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4247 index = self._index - 1 4248 negate = self._match(TokenType.NOT) 4249 4250 if self._match_text_seq("DISTINCT", "FROM"): 4251 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4252 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4253 4254 expression = self._parse_null() or self._parse_boolean() 4255 if not expression: 4256 self._retreat(index) 4257 return None 4258 4259 this = self.expression(exp.Is, this=this, expression=expression) 4260 return self.expression(exp.Not, this=this) if negate else this 4261 4262 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4263 unnest = self._parse_unnest(with_alias=False) 4264 if unnest: 4265 this = self.expression(exp.In, this=this, unnest=unnest) 4266 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4267 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4268 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4269 4270 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4271 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4272 else: 4273 this = self.expression(exp.In, this=this, expressions=expressions) 4274 4275 if matched_l_paren: 4276 self._match_r_paren(this) 4277 elif not self._match(TokenType.R_BRACKET, expression=this): 4278 self.raise_error("Expecting ]") 4279 else: 4280 this = self.expression(exp.In, this=this, field=self._parse_field()) 4281 4282 return this 4283 4284 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4285 low = self._parse_bitwise() 4286 self._match(TokenType.AND) 4287 high = self._parse_bitwise() 4288 return self.expression(exp.Between, this=this, low=low, high=high) 4289 4290 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4291 if not self._match(TokenType.ESCAPE): 4292 return this 4293 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4294 4295 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4296 index = self._index 4297 4298 if not self._match(TokenType.INTERVAL) and match_interval: 4299 return None 4300 4301 if self._match(TokenType.STRING, advance=False): 4302 this = self._parse_primary() 4303 else: 4304 this = self._parse_term() 4305 4306 if not this or ( 4307 isinstance(this, exp.Column) 4308 and not this.table 4309 and not this.this.quoted 4310 and this.name.upper() == "IS" 4311 ): 4312 self._retreat(index) 4313 return None 4314 4315 unit = self._parse_function() or ( 4316 not self._match(TokenType.ALIAS, advance=False) 4317 and self._parse_var(any_token=True, upper=True) 4318 ) 4319 4320 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4321 # each INTERVAL expression into this canonical form so it's easy to transpile 4322 if this and this.is_number: 4323 this = exp.Literal.string(this.to_py()) 4324 elif this and this.is_string: 4325 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4326 if len(parts) == 1: 4327 if unit: 4328 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4329 self._retreat(self._index - 1) 4330 4331 this = exp.Literal.string(parts[0][0]) 4332 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4333 4334 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4335 unit = self.expression( 4336 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4337 ) 4338 4339 interval = self.expression(exp.Interval, this=this, unit=unit) 4340 4341 index = self._index 4342 self._match(TokenType.PLUS) 4343 4344 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4345 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4346 return self.expression( 4347 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4348 ) 4349 4350 self._retreat(index) 4351 return interval 4352 4353 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4354 this = self._parse_term() 4355 4356 while True: 4357 if self._match_set(self.BITWISE): 4358 this = self.expression( 4359 self.BITWISE[self._prev.token_type], 4360 this=this, 4361 expression=self._parse_term(), 4362 ) 4363 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4364 this = self.expression( 4365 exp.DPipe, 4366 this=this, 4367 expression=self._parse_term(), 4368 safe=not self.dialect.STRICT_STRING_CONCAT, 4369 ) 4370 elif self._match(TokenType.DQMARK): 4371 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4372 elif self._match_pair(TokenType.LT, TokenType.LT): 4373 this = self.expression( 4374 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4375 ) 4376 elif self._match_pair(TokenType.GT, TokenType.GT): 4377 this = self.expression( 4378 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4379 ) 4380 else: 4381 break 4382 4383 return this 4384 4385 def _parse_term(self) -> t.Optional[exp.Expression]: 4386 this = self._parse_factor() 4387 4388 while self._match_set(self.TERM): 4389 klass = self.TERM[self._prev.token_type] 4390 comments = self._prev_comments 4391 expression = self._parse_factor() 4392 4393 this = self.expression(klass, this=this, comments=comments, expression=expression) 4394 4395 if isinstance(this, exp.Collate): 4396 expr = this.expression 4397 4398 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4399 # fallback to Identifier / Var 4400 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4401 ident = expr.this 4402 if isinstance(ident, exp.Identifier): 4403 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4404 4405 return this 4406 4407 def _parse_factor(self) -> t.Optional[exp.Expression]: 4408 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4409 this = parse_method() 4410 4411 while self._match_set(self.FACTOR): 4412 klass = self.FACTOR[self._prev.token_type] 4413 comments = self._prev_comments 4414 expression = parse_method() 4415 4416 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4417 self._retreat(self._index - 1) 4418 return this 4419 4420 this = self.expression(klass, this=this, comments=comments, expression=expression) 4421 4422 if isinstance(this, exp.Div): 4423 this.args["typed"] = self.dialect.TYPED_DIVISION 4424 this.args["safe"] = self.dialect.SAFE_DIVISION 4425 4426 return this 4427 4428 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4429 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4430 4431 def _parse_unary(self) -> t.Optional[exp.Expression]: 4432 if self._match_set(self.UNARY_PARSERS): 4433 return self.UNARY_PARSERS[self._prev.token_type](self) 4434 return self._parse_at_time_zone(self._parse_type()) 4435 4436 def _parse_type( 4437 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4438 ) -> t.Optional[exp.Expression]: 4439 interval = parse_interval and self._parse_interval() 4440 if interval: 4441 return interval 4442 4443 index = self._index 4444 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4445 4446 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4447 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4448 if isinstance(data_type, exp.Cast): 4449 # This constructor can contain ops directly after it, for instance struct unnesting: 4450 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4451 return self._parse_column_ops(data_type) 4452 4453 if data_type: 4454 index2 = self._index 4455 this = self._parse_primary() 4456 4457 if isinstance(this, exp.Literal): 4458 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4459 if parser: 4460 return parser(self, this, data_type) 4461 4462 return self.expression(exp.Cast, this=this, to=data_type) 4463 4464 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4465 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4466 # 4467 # If the index difference here is greater than 1, that means the parser itself must have 4468 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4469 # 4470 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4471 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4472 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4473 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4474 # 4475 # In these cases, we don't really want to return the converted type, but instead retreat 4476 # and try to parse a Column or Identifier in the section below. 4477 if data_type.expressions and index2 - index > 1: 4478 self._retreat(index2) 4479 return self._parse_column_ops(data_type) 4480 4481 self._retreat(index) 4482 4483 if fallback_to_identifier: 4484 return self._parse_id_var() 4485 4486 this = self._parse_column() 4487 return this and self._parse_column_ops(this) 4488 4489 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4490 this = self._parse_type() 4491 if not this: 4492 return None 4493 4494 if isinstance(this, exp.Column) and not this.table: 4495 this = exp.var(this.name.upper()) 4496 4497 return self.expression( 4498 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4499 ) 4500 4501 def _parse_types( 4502 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4503 ) -> t.Optional[exp.Expression]: 4504 index = self._index 4505 4506 this: t.Optional[exp.Expression] = None 4507 prefix = self._match_text_seq("SYSUDTLIB", ".") 4508 4509 if not self._match_set(self.TYPE_TOKENS): 4510 identifier = allow_identifiers and self._parse_id_var( 4511 any_token=False, tokens=(TokenType.VAR,) 4512 ) 4513 if isinstance(identifier, exp.Identifier): 4514 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4515 4516 if len(tokens) != 1: 4517 self.raise_error("Unexpected identifier", self._prev) 4518 4519 if tokens[0].token_type in self.TYPE_TOKENS: 4520 self._prev = tokens[0] 4521 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4522 type_name = identifier.name 4523 4524 while self._match(TokenType.DOT): 4525 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4526 4527 this = exp.DataType.build(type_name, udt=True) 4528 else: 4529 self._retreat(self._index - 1) 4530 return None 4531 else: 4532 return None 4533 4534 type_token = self._prev.token_type 4535 4536 if type_token == TokenType.PSEUDO_TYPE: 4537 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4538 4539 if type_token == TokenType.OBJECT_IDENTIFIER: 4540 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4541 4542 # https://materialize.com/docs/sql/types/map/ 4543 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4544 key_type = self._parse_types( 4545 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4546 ) 4547 if not self._match(TokenType.FARROW): 4548 self._retreat(index) 4549 return None 4550 4551 value_type = self._parse_types( 4552 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4553 ) 4554 if not self._match(TokenType.R_BRACKET): 4555 self._retreat(index) 4556 return None 4557 4558 return exp.DataType( 4559 this=exp.DataType.Type.MAP, 4560 expressions=[key_type, value_type], 4561 nested=True, 4562 prefix=prefix, 4563 ) 4564 4565 nested = type_token in self.NESTED_TYPE_TOKENS 4566 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4567 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4568 expressions = None 4569 maybe_func = False 4570 4571 if self._match(TokenType.L_PAREN): 4572 if is_struct: 4573 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4574 elif nested: 4575 expressions = self._parse_csv( 4576 lambda: self._parse_types( 4577 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4578 ) 4579 ) 4580 elif type_token in self.ENUM_TYPE_TOKENS: 4581 expressions = self._parse_csv(self._parse_equality) 4582 elif is_aggregate: 4583 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4584 any_token=False, tokens=(TokenType.VAR,) 4585 ) 4586 if not func_or_ident or not self._match(TokenType.COMMA): 4587 return None 4588 expressions = self._parse_csv( 4589 lambda: self._parse_types( 4590 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4591 ) 4592 ) 4593 expressions.insert(0, func_or_ident) 4594 else: 4595 expressions = self._parse_csv(self._parse_type_size) 4596 4597 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4598 if type_token == TokenType.VECTOR and len(expressions) == 2: 4599 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4600 4601 if not expressions or not self._match(TokenType.R_PAREN): 4602 self._retreat(index) 4603 return None 4604 4605 maybe_func = True 4606 4607 values: t.Optional[t.List[exp.Expression]] = None 4608 4609 if nested and self._match(TokenType.LT): 4610 if is_struct: 4611 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4612 else: 4613 expressions = self._parse_csv( 4614 lambda: self._parse_types( 4615 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4616 ) 4617 ) 4618 4619 if not self._match(TokenType.GT): 4620 self.raise_error("Expecting >") 4621 4622 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4623 values = self._parse_csv(self._parse_assignment) 4624 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4625 4626 if type_token in self.TIMESTAMPS: 4627 if self._match_text_seq("WITH", "TIME", "ZONE"): 4628 maybe_func = False 4629 tz_type = ( 4630 exp.DataType.Type.TIMETZ 4631 if type_token in self.TIMES 4632 else exp.DataType.Type.TIMESTAMPTZ 4633 ) 4634 this = exp.DataType(this=tz_type, expressions=expressions) 4635 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4636 maybe_func = False 4637 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4638 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4639 maybe_func = False 4640 elif type_token == TokenType.INTERVAL: 4641 unit = self._parse_var(upper=True) 4642 if unit: 4643 if self._match_text_seq("TO"): 4644 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4645 4646 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4647 else: 4648 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4649 4650 if maybe_func and check_func: 4651 index2 = self._index 4652 peek = self._parse_string() 4653 4654 if not peek: 4655 self._retreat(index) 4656 return None 4657 4658 self._retreat(index2) 4659 4660 if not this: 4661 if self._match_text_seq("UNSIGNED"): 4662 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4663 if not unsigned_type_token: 4664 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4665 4666 type_token = unsigned_type_token or type_token 4667 4668 this = exp.DataType( 4669 this=exp.DataType.Type[type_token.value], 4670 expressions=expressions, 4671 nested=nested, 4672 prefix=prefix, 4673 ) 4674 4675 # Empty arrays/structs are allowed 4676 if values is not None: 4677 cls = exp.Struct if is_struct else exp.Array 4678 this = exp.cast(cls(expressions=values), this, copy=False) 4679 4680 elif expressions: 4681 this.set("expressions", expressions) 4682 4683 # https://materialize.com/docs/sql/types/list/#type-name 4684 while self._match(TokenType.LIST): 4685 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4686 4687 index = self._index 4688 4689 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4690 matched_array = self._match(TokenType.ARRAY) 4691 4692 while self._curr: 4693 datatype_token = self._prev.token_type 4694 matched_l_bracket = self._match(TokenType.L_BRACKET) 4695 if not matched_l_bracket and not matched_array: 4696 break 4697 4698 matched_array = False 4699 values = self._parse_csv(self._parse_assignment) or None 4700 if ( 4701 values 4702 and not schema 4703 and ( 4704 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4705 ) 4706 ): 4707 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4708 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4709 self._retreat(index) 4710 break 4711 4712 this = exp.DataType( 4713 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4714 ) 4715 self._match(TokenType.R_BRACKET) 4716 4717 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4718 converter = self.TYPE_CONVERTERS.get(this.this) 4719 if converter: 4720 this = converter(t.cast(exp.DataType, this)) 4721 4722 return this 4723 4724 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4725 index = self._index 4726 4727 if ( 4728 self._curr 4729 and self._next 4730 and self._curr.token_type in self.TYPE_TOKENS 4731 and self._next.token_type in self.TYPE_TOKENS 4732 ): 4733 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4734 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4735 this = self._parse_id_var() 4736 else: 4737 this = ( 4738 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4739 or self._parse_id_var() 4740 ) 4741 4742 self._match(TokenType.COLON) 4743 4744 if ( 4745 type_required 4746 and not isinstance(this, exp.DataType) 4747 and not self._match_set(self.TYPE_TOKENS, advance=False) 4748 ): 4749 self._retreat(index) 4750 return self._parse_types() 4751 4752 return self._parse_column_def(this) 4753 4754 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4755 if not self._match_text_seq("AT", "TIME", "ZONE"): 4756 return this 4757 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4758 4759 def _parse_column(self) -> t.Optional[exp.Expression]: 4760 this = self._parse_column_reference() 4761 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4762 4763 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4764 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4765 4766 return column 4767 4768 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4769 this = self._parse_field() 4770 if ( 4771 not this 4772 and self._match(TokenType.VALUES, advance=False) 4773 and self.VALUES_FOLLOWED_BY_PAREN 4774 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4775 ): 4776 this = self._parse_id_var() 4777 4778 if isinstance(this, exp.Identifier): 4779 # We bubble up comments from the Identifier to the Column 4780 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4781 4782 return this 4783 4784 def _parse_colon_as_variant_extract( 4785 self, this: t.Optional[exp.Expression] 4786 ) -> t.Optional[exp.Expression]: 4787 casts = [] 4788 json_path = [] 4789 4790 while self._match(TokenType.COLON): 4791 start_index = self._index 4792 4793 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4794 path = self._parse_column_ops( 4795 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4796 ) 4797 4798 # The cast :: operator has a lower precedence than the extraction operator :, so 4799 # we rearrange the AST appropriately to avoid casting the JSON path 4800 while isinstance(path, exp.Cast): 4801 casts.append(path.to) 4802 path = path.this 4803 4804 if casts: 4805 dcolon_offset = next( 4806 i 4807 for i, t in enumerate(self._tokens[start_index:]) 4808 if t.token_type == TokenType.DCOLON 4809 ) 4810 end_token = self._tokens[start_index + dcolon_offset - 1] 4811 else: 4812 end_token = self._prev 4813 4814 if path: 4815 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4816 4817 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4818 # Databricks transforms it back to the colon/dot notation 4819 if json_path: 4820 this = self.expression( 4821 exp.JSONExtract, 4822 this=this, 4823 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4824 variant_extract=True, 4825 ) 4826 4827 while casts: 4828 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4829 4830 return this 4831 4832 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4833 return self._parse_types() 4834 4835 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4836 this = self._parse_bracket(this) 4837 4838 while self._match_set(self.COLUMN_OPERATORS): 4839 op_token = self._prev.token_type 4840 op = self.COLUMN_OPERATORS.get(op_token) 4841 4842 if op_token == TokenType.DCOLON: 4843 field = self._parse_dcolon() 4844 if not field: 4845 self.raise_error("Expected type") 4846 elif op and self._curr: 4847 field = self._parse_column_reference() 4848 else: 4849 field = self._parse_field(any_token=True, anonymous_func=True) 4850 4851 if isinstance(field, exp.Func) and this: 4852 # bigquery allows function calls like x.y.count(...) 4853 # SAFE.SUBSTR(...) 4854 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4855 this = exp.replace_tree( 4856 this, 4857 lambda n: ( 4858 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4859 if n.table 4860 else n.this 4861 ) 4862 if isinstance(n, exp.Column) 4863 else n, 4864 ) 4865 4866 if op: 4867 this = op(self, this, field) 4868 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4869 this = self.expression( 4870 exp.Column, 4871 this=field, 4872 table=this.this, 4873 db=this.args.get("table"), 4874 catalog=this.args.get("db"), 4875 ) 4876 else: 4877 this = self.expression(exp.Dot, this=this, expression=field) 4878 4879 this = self._parse_bracket(this) 4880 4881 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4882 4883 def _parse_primary(self) -> t.Optional[exp.Expression]: 4884 if self._match_set(self.PRIMARY_PARSERS): 4885 token_type = self._prev.token_type 4886 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4887 4888 if token_type == TokenType.STRING: 4889 expressions = [primary] 4890 while self._match(TokenType.STRING): 4891 expressions.append(exp.Literal.string(self._prev.text)) 4892 4893 if len(expressions) > 1: 4894 return self.expression(exp.Concat, expressions=expressions) 4895 4896 return primary 4897 4898 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4899 return exp.Literal.number(f"0.{self._prev.text}") 4900 4901 if self._match(TokenType.L_PAREN): 4902 comments = self._prev_comments 4903 query = self._parse_select() 4904 4905 if query: 4906 expressions = [query] 4907 else: 4908 expressions = self._parse_expressions() 4909 4910 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4911 4912 if not this and self._match(TokenType.R_PAREN, advance=False): 4913 this = self.expression(exp.Tuple) 4914 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4915 this = self._parse_subquery(this=this, parse_alias=False) 4916 elif isinstance(this, exp.Subquery): 4917 this = self._parse_subquery( 4918 this=self._parse_set_operations(this), parse_alias=False 4919 ) 4920 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4921 this = self.expression(exp.Tuple, expressions=expressions) 4922 else: 4923 this = self.expression(exp.Paren, this=this) 4924 4925 if this: 4926 this.add_comments(comments) 4927 4928 self._match_r_paren(expression=this) 4929 return this 4930 4931 return None 4932 4933 def _parse_field( 4934 self, 4935 any_token: bool = False, 4936 tokens: t.Optional[t.Collection[TokenType]] = None, 4937 anonymous_func: bool = False, 4938 ) -> t.Optional[exp.Expression]: 4939 if anonymous_func: 4940 field = ( 4941 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4942 or self._parse_primary() 4943 ) 4944 else: 4945 field = self._parse_primary() or self._parse_function( 4946 anonymous=anonymous_func, any_token=any_token 4947 ) 4948 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4949 4950 def _parse_function( 4951 self, 4952 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4953 anonymous: bool = False, 4954 optional_parens: bool = True, 4955 any_token: bool = False, 4956 ) -> t.Optional[exp.Expression]: 4957 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4958 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4959 fn_syntax = False 4960 if ( 4961 self._match(TokenType.L_BRACE, advance=False) 4962 and self._next 4963 and self._next.text.upper() == "FN" 4964 ): 4965 self._advance(2) 4966 fn_syntax = True 4967 4968 func = self._parse_function_call( 4969 functions=functions, 4970 anonymous=anonymous, 4971 optional_parens=optional_parens, 4972 any_token=any_token, 4973 ) 4974 4975 if fn_syntax: 4976 self._match(TokenType.R_BRACE) 4977 4978 return func 4979 4980 def _parse_function_call( 4981 self, 4982 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4983 anonymous: bool = False, 4984 optional_parens: bool = True, 4985 any_token: bool = False, 4986 ) -> t.Optional[exp.Expression]: 4987 if not self._curr: 4988 return None 4989 4990 comments = self._curr.comments 4991 token_type = self._curr.token_type 4992 this = self._curr.text 4993 upper = this.upper() 4994 4995 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4996 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4997 self._advance() 4998 return self._parse_window(parser(self)) 4999 5000 if not self._next or self._next.token_type != TokenType.L_PAREN: 5001 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5002 self._advance() 5003 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5004 5005 return None 5006 5007 if any_token: 5008 if token_type in self.RESERVED_TOKENS: 5009 return None 5010 elif token_type not in self.FUNC_TOKENS: 5011 return None 5012 5013 self._advance(2) 5014 5015 parser = self.FUNCTION_PARSERS.get(upper) 5016 if parser and not anonymous: 5017 this = parser(self) 5018 else: 5019 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5020 5021 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5022 this = self.expression(subquery_predicate, this=self._parse_select()) 5023 self._match_r_paren() 5024 return this 5025 5026 if functions is None: 5027 functions = self.FUNCTIONS 5028 5029 function = functions.get(upper) 5030 5031 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5032 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5033 5034 if alias: 5035 args = self._kv_to_prop_eq(args) 5036 5037 if function and not anonymous: 5038 if "dialect" in function.__code__.co_varnames: 5039 func = function(args, dialect=self.dialect) 5040 else: 5041 func = function(args) 5042 5043 func = self.validate_expression(func, args) 5044 if not self.dialect.NORMALIZE_FUNCTIONS: 5045 func.meta["name"] = this 5046 5047 this = func 5048 else: 5049 if token_type == TokenType.IDENTIFIER: 5050 this = exp.Identifier(this=this, quoted=True) 5051 this = self.expression(exp.Anonymous, this=this, expressions=args) 5052 5053 if isinstance(this, exp.Expression): 5054 this.add_comments(comments) 5055 5056 self._match_r_paren(this) 5057 return self._parse_window(this) 5058 5059 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5060 transformed = [] 5061 5062 for e in expressions: 5063 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5064 if isinstance(e, exp.Alias): 5065 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5066 5067 if not isinstance(e, exp.PropertyEQ): 5068 e = self.expression( 5069 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5070 ) 5071 5072 if isinstance(e.this, exp.Column): 5073 e.this.replace(e.this.this) 5074 5075 transformed.append(e) 5076 5077 return transformed 5078 5079 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5080 return self._parse_column_def(self._parse_id_var()) 5081 5082 def _parse_user_defined_function( 5083 self, kind: t.Optional[TokenType] = None 5084 ) -> t.Optional[exp.Expression]: 5085 this = self._parse_id_var() 5086 5087 while self._match(TokenType.DOT): 5088 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5089 5090 if not self._match(TokenType.L_PAREN): 5091 return this 5092 5093 expressions = self._parse_csv(self._parse_function_parameter) 5094 self._match_r_paren() 5095 return self.expression( 5096 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5097 ) 5098 5099 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5100 literal = self._parse_primary() 5101 if literal: 5102 return self.expression(exp.Introducer, this=token.text, expression=literal) 5103 5104 return self.expression(exp.Identifier, this=token.text) 5105 5106 def _parse_session_parameter(self) -> exp.SessionParameter: 5107 kind = None 5108 this = self._parse_id_var() or self._parse_primary() 5109 5110 if this and self._match(TokenType.DOT): 5111 kind = this.name 5112 this = self._parse_var() or self._parse_primary() 5113 5114 return self.expression(exp.SessionParameter, this=this, kind=kind) 5115 5116 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5117 return self._parse_id_var() 5118 5119 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5120 index = self._index 5121 5122 if self._match(TokenType.L_PAREN): 5123 expressions = t.cast( 5124 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5125 ) 5126 5127 if not self._match(TokenType.R_PAREN): 5128 self._retreat(index) 5129 else: 5130 expressions = [self._parse_lambda_arg()] 5131 5132 if self._match_set(self.LAMBDAS): 5133 return self.LAMBDAS[self._prev.token_type](self, expressions) 5134 5135 self._retreat(index) 5136 5137 this: t.Optional[exp.Expression] 5138 5139 if self._match(TokenType.DISTINCT): 5140 this = self.expression( 5141 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5142 ) 5143 else: 5144 this = self._parse_select_or_expression(alias=alias) 5145 5146 return self._parse_limit( 5147 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5148 ) 5149 5150 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5151 index = self._index 5152 if not self._match(TokenType.L_PAREN): 5153 return this 5154 5155 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5156 # expr can be of both types 5157 if self._match_set(self.SELECT_START_TOKENS): 5158 self._retreat(index) 5159 return this 5160 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5161 self._match_r_paren() 5162 return self.expression(exp.Schema, this=this, expressions=args) 5163 5164 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5165 return self._parse_column_def(self._parse_field(any_token=True)) 5166 5167 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5168 # column defs are not really columns, they're identifiers 5169 if isinstance(this, exp.Column): 5170 this = this.this 5171 5172 kind = self._parse_types(schema=True) 5173 5174 if self._match_text_seq("FOR", "ORDINALITY"): 5175 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5176 5177 constraints: t.List[exp.Expression] = [] 5178 5179 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5180 ("ALIAS", "MATERIALIZED") 5181 ): 5182 persisted = self._prev.text.upper() == "MATERIALIZED" 5183 constraints.append( 5184 self.expression( 5185 exp.ComputedColumnConstraint, 5186 this=self._parse_assignment(), 5187 persisted=persisted or self._match_text_seq("PERSISTED"), 5188 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5189 ) 5190 ) 5191 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5192 self._match(TokenType.ALIAS) 5193 constraints.append( 5194 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5195 ) 5196 5197 while True: 5198 constraint = self._parse_column_constraint() 5199 if not constraint: 5200 break 5201 constraints.append(constraint) 5202 5203 if not kind and not constraints: 5204 return this 5205 5206 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5207 5208 def _parse_auto_increment( 5209 self, 5210 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5211 start = None 5212 increment = None 5213 5214 if self._match(TokenType.L_PAREN, advance=False): 5215 args = self._parse_wrapped_csv(self._parse_bitwise) 5216 start = seq_get(args, 0) 5217 increment = seq_get(args, 1) 5218 elif self._match_text_seq("START"): 5219 start = self._parse_bitwise() 5220 self._match_text_seq("INCREMENT") 5221 increment = self._parse_bitwise() 5222 5223 if start and increment: 5224 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5225 5226 return exp.AutoIncrementColumnConstraint() 5227 5228 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5229 if not self._match_text_seq("REFRESH"): 5230 self._retreat(self._index - 1) 5231 return None 5232 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5233 5234 def _parse_compress(self) -> exp.CompressColumnConstraint: 5235 if self._match(TokenType.L_PAREN, advance=False): 5236 return self.expression( 5237 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5238 ) 5239 5240 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5241 5242 def _parse_generated_as_identity( 5243 self, 5244 ) -> ( 5245 exp.GeneratedAsIdentityColumnConstraint 5246 | exp.ComputedColumnConstraint 5247 | exp.GeneratedAsRowColumnConstraint 5248 ): 5249 if self._match_text_seq("BY", "DEFAULT"): 5250 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5251 this = self.expression( 5252 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5253 ) 5254 else: 5255 self._match_text_seq("ALWAYS") 5256 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5257 5258 self._match(TokenType.ALIAS) 5259 5260 if self._match_text_seq("ROW"): 5261 start = self._match_text_seq("START") 5262 if not start: 5263 self._match(TokenType.END) 5264 hidden = self._match_text_seq("HIDDEN") 5265 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5266 5267 identity = self._match_text_seq("IDENTITY") 5268 5269 if self._match(TokenType.L_PAREN): 5270 if self._match(TokenType.START_WITH): 5271 this.set("start", self._parse_bitwise()) 5272 if self._match_text_seq("INCREMENT", "BY"): 5273 this.set("increment", self._parse_bitwise()) 5274 if self._match_text_seq("MINVALUE"): 5275 this.set("minvalue", self._parse_bitwise()) 5276 if self._match_text_seq("MAXVALUE"): 5277 this.set("maxvalue", self._parse_bitwise()) 5278 5279 if self._match_text_seq("CYCLE"): 5280 this.set("cycle", True) 5281 elif self._match_text_seq("NO", "CYCLE"): 5282 this.set("cycle", False) 5283 5284 if not identity: 5285 this.set("expression", self._parse_range()) 5286 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5287 args = self._parse_csv(self._parse_bitwise) 5288 this.set("start", seq_get(args, 0)) 5289 this.set("increment", seq_get(args, 1)) 5290 5291 self._match_r_paren() 5292 5293 return this 5294 5295 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5296 self._match_text_seq("LENGTH") 5297 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5298 5299 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5300 if self._match_text_seq("NULL"): 5301 return self.expression(exp.NotNullColumnConstraint) 5302 if self._match_text_seq("CASESPECIFIC"): 5303 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5304 if self._match_text_seq("FOR", "REPLICATION"): 5305 return self.expression(exp.NotForReplicationColumnConstraint) 5306 return None 5307 5308 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5309 if self._match(TokenType.CONSTRAINT): 5310 this = self._parse_id_var() 5311 else: 5312 this = None 5313 5314 if self._match_texts(self.CONSTRAINT_PARSERS): 5315 return self.expression( 5316 exp.ColumnConstraint, 5317 this=this, 5318 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5319 ) 5320 5321 return this 5322 5323 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5324 if not self._match(TokenType.CONSTRAINT): 5325 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5326 5327 return self.expression( 5328 exp.Constraint, 5329 this=self._parse_id_var(), 5330 expressions=self._parse_unnamed_constraints(), 5331 ) 5332 5333 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5334 constraints = [] 5335 while True: 5336 constraint = self._parse_unnamed_constraint() or self._parse_function() 5337 if not constraint: 5338 break 5339 constraints.append(constraint) 5340 5341 return constraints 5342 5343 def _parse_unnamed_constraint( 5344 self, constraints: t.Optional[t.Collection[str]] = None 5345 ) -> t.Optional[exp.Expression]: 5346 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5347 constraints or self.CONSTRAINT_PARSERS 5348 ): 5349 return None 5350 5351 constraint = self._prev.text.upper() 5352 if constraint not in self.CONSTRAINT_PARSERS: 5353 self.raise_error(f"No parser found for schema constraint {constraint}.") 5354 5355 return self.CONSTRAINT_PARSERS[constraint](self) 5356 5357 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5358 return self._parse_id_var(any_token=False) 5359 5360 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5361 self._match_text_seq("KEY") 5362 return self.expression( 5363 exp.UniqueColumnConstraint, 5364 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5365 this=self._parse_schema(self._parse_unique_key()), 5366 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5367 on_conflict=self._parse_on_conflict(), 5368 ) 5369 5370 def _parse_key_constraint_options(self) -> t.List[str]: 5371 options = [] 5372 while True: 5373 if not self._curr: 5374 break 5375 5376 if self._match(TokenType.ON): 5377 action = None 5378 on = self._advance_any() and self._prev.text 5379 5380 if self._match_text_seq("NO", "ACTION"): 5381 action = "NO ACTION" 5382 elif self._match_text_seq("CASCADE"): 5383 action = "CASCADE" 5384 elif self._match_text_seq("RESTRICT"): 5385 action = "RESTRICT" 5386 elif self._match_pair(TokenType.SET, TokenType.NULL): 5387 action = "SET NULL" 5388 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5389 action = "SET DEFAULT" 5390 else: 5391 self.raise_error("Invalid key constraint") 5392 5393 options.append(f"ON {on} {action}") 5394 else: 5395 var = self._parse_var_from_options( 5396 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5397 ) 5398 if not var: 5399 break 5400 options.append(var.name) 5401 5402 return options 5403 5404 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5405 if match and not self._match(TokenType.REFERENCES): 5406 return None 5407 5408 expressions = None 5409 this = self._parse_table(schema=True) 5410 options = self._parse_key_constraint_options() 5411 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5412 5413 def _parse_foreign_key(self) -> exp.ForeignKey: 5414 expressions = self._parse_wrapped_id_vars() 5415 reference = self._parse_references() 5416 options = {} 5417 5418 while self._match(TokenType.ON): 5419 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5420 self.raise_error("Expected DELETE or UPDATE") 5421 5422 kind = self._prev.text.lower() 5423 5424 if self._match_text_seq("NO", "ACTION"): 5425 action = "NO ACTION" 5426 elif self._match(TokenType.SET): 5427 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5428 action = "SET " + self._prev.text.upper() 5429 else: 5430 self._advance() 5431 action = self._prev.text.upper() 5432 5433 options[kind] = action 5434 5435 return self.expression( 5436 exp.ForeignKey, 5437 expressions=expressions, 5438 reference=reference, 5439 **options, # type: ignore 5440 ) 5441 5442 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5443 return self._parse_field() 5444 5445 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5446 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5447 self._retreat(self._index - 1) 5448 return None 5449 5450 id_vars = self._parse_wrapped_id_vars() 5451 return self.expression( 5452 exp.PeriodForSystemTimeConstraint, 5453 this=seq_get(id_vars, 0), 5454 expression=seq_get(id_vars, 1), 5455 ) 5456 5457 def _parse_primary_key( 5458 self, wrapped_optional: bool = False, in_props: bool = False 5459 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5460 desc = ( 5461 self._match_set((TokenType.ASC, TokenType.DESC)) 5462 and self._prev.token_type == TokenType.DESC 5463 ) 5464 5465 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5466 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5467 5468 expressions = self._parse_wrapped_csv( 5469 self._parse_primary_key_part, optional=wrapped_optional 5470 ) 5471 options = self._parse_key_constraint_options() 5472 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5473 5474 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5475 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5476 5477 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5478 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5479 return this 5480 5481 bracket_kind = self._prev.token_type 5482 expressions = self._parse_csv( 5483 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5484 ) 5485 5486 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5487 self.raise_error("Expected ]") 5488 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5489 self.raise_error("Expected }") 5490 5491 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5492 if bracket_kind == TokenType.L_BRACE: 5493 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5494 elif not this: 5495 this = build_array_constructor( 5496 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5497 ) 5498 else: 5499 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5500 if constructor_type: 5501 return build_array_constructor( 5502 constructor_type, 5503 args=expressions, 5504 bracket_kind=bracket_kind, 5505 dialect=self.dialect, 5506 ) 5507 5508 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5509 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5510 5511 self._add_comments(this) 5512 return self._parse_bracket(this) 5513 5514 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5515 if self._match(TokenType.COLON): 5516 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5517 return this 5518 5519 def _parse_case(self) -> t.Optional[exp.Expression]: 5520 ifs = [] 5521 default = None 5522 5523 comments = self._prev_comments 5524 expression = self._parse_assignment() 5525 5526 while self._match(TokenType.WHEN): 5527 this = self._parse_assignment() 5528 self._match(TokenType.THEN) 5529 then = self._parse_assignment() 5530 ifs.append(self.expression(exp.If, this=this, true=then)) 5531 5532 if self._match(TokenType.ELSE): 5533 default = self._parse_assignment() 5534 5535 if not self._match(TokenType.END): 5536 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5537 default = exp.column("interval") 5538 else: 5539 self.raise_error("Expected END after CASE", self._prev) 5540 5541 return self.expression( 5542 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5543 ) 5544 5545 def _parse_if(self) -> t.Optional[exp.Expression]: 5546 if self._match(TokenType.L_PAREN): 5547 args = self._parse_csv(self._parse_assignment) 5548 this = self.validate_expression(exp.If.from_arg_list(args), args) 5549 self._match_r_paren() 5550 else: 5551 index = self._index - 1 5552 5553 if self.NO_PAREN_IF_COMMANDS and index == 0: 5554 return self._parse_as_command(self._prev) 5555 5556 condition = self._parse_assignment() 5557 5558 if not condition: 5559 self._retreat(index) 5560 return None 5561 5562 self._match(TokenType.THEN) 5563 true = self._parse_assignment() 5564 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5565 self._match(TokenType.END) 5566 this = self.expression(exp.If, this=condition, true=true, false=false) 5567 5568 return this 5569 5570 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5571 if not self._match_text_seq("VALUE", "FOR"): 5572 self._retreat(self._index - 1) 5573 return None 5574 5575 return self.expression( 5576 exp.NextValueFor, 5577 this=self._parse_column(), 5578 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5579 ) 5580 5581 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5582 this = self._parse_function() or self._parse_var_or_string(upper=True) 5583 5584 if self._match(TokenType.FROM): 5585 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5586 5587 if not self._match(TokenType.COMMA): 5588 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5589 5590 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5591 5592 def _parse_gap_fill(self) -> exp.GapFill: 5593 self._match(TokenType.TABLE) 5594 this = self._parse_table() 5595 5596 self._match(TokenType.COMMA) 5597 args = [this, *self._parse_csv(self._parse_lambda)] 5598 5599 gap_fill = exp.GapFill.from_arg_list(args) 5600 return self.validate_expression(gap_fill, args) 5601 5602 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5603 this = self._parse_assignment() 5604 5605 if not self._match(TokenType.ALIAS): 5606 if self._match(TokenType.COMMA): 5607 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5608 5609 self.raise_error("Expected AS after CAST") 5610 5611 fmt = None 5612 to = self._parse_types() 5613 5614 if self._match(TokenType.FORMAT): 5615 fmt_string = self._parse_string() 5616 fmt = self._parse_at_time_zone(fmt_string) 5617 5618 if not to: 5619 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5620 if to.this in exp.DataType.TEMPORAL_TYPES: 5621 this = self.expression( 5622 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5623 this=this, 5624 format=exp.Literal.string( 5625 format_time( 5626 fmt_string.this if fmt_string else "", 5627 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5628 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5629 ) 5630 ), 5631 safe=safe, 5632 ) 5633 5634 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5635 this.set("zone", fmt.args["zone"]) 5636 return this 5637 elif not to: 5638 self.raise_error("Expected TYPE after CAST") 5639 elif isinstance(to, exp.Identifier): 5640 to = exp.DataType.build(to.name, udt=True) 5641 elif to.this == exp.DataType.Type.CHAR: 5642 if self._match(TokenType.CHARACTER_SET): 5643 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5644 5645 return self.expression( 5646 exp.Cast if strict else exp.TryCast, 5647 this=this, 5648 to=to, 5649 format=fmt, 5650 safe=safe, 5651 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5652 ) 5653 5654 def _parse_string_agg(self) -> exp.Expression: 5655 if self._match(TokenType.DISTINCT): 5656 args: t.List[t.Optional[exp.Expression]] = [ 5657 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5658 ] 5659 if self._match(TokenType.COMMA): 5660 args.extend(self._parse_csv(self._parse_assignment)) 5661 else: 5662 args = self._parse_csv(self._parse_assignment) # type: ignore 5663 5664 index = self._index 5665 if not self._match(TokenType.R_PAREN) and args: 5666 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5667 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5668 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5669 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5670 5671 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5672 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5673 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5674 if not self._match_text_seq("WITHIN", "GROUP"): 5675 self._retreat(index) 5676 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5677 5678 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5679 order = self._parse_order(this=seq_get(args, 0)) 5680 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5681 5682 def _parse_convert( 5683 self, strict: bool, safe: t.Optional[bool] = None 5684 ) -> t.Optional[exp.Expression]: 5685 this = self._parse_bitwise() 5686 5687 if self._match(TokenType.USING): 5688 to: t.Optional[exp.Expression] = self.expression( 5689 exp.CharacterSet, this=self._parse_var() 5690 ) 5691 elif self._match(TokenType.COMMA): 5692 to = self._parse_types() 5693 else: 5694 to = None 5695 5696 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5697 5698 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5699 """ 5700 There are generally two variants of the DECODE function: 5701 5702 - DECODE(bin, charset) 5703 - DECODE(expression, search, result [, search, result] ... [, default]) 5704 5705 The second variant will always be parsed into a CASE expression. Note that NULL 5706 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5707 instead of relying on pattern matching. 5708 """ 5709 args = self._parse_csv(self._parse_assignment) 5710 5711 if len(args) < 3: 5712 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5713 5714 expression, *expressions = args 5715 if not expression: 5716 return None 5717 5718 ifs = [] 5719 for search, result in zip(expressions[::2], expressions[1::2]): 5720 if not search or not result: 5721 return None 5722 5723 if isinstance(search, exp.Literal): 5724 ifs.append( 5725 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5726 ) 5727 elif isinstance(search, exp.Null): 5728 ifs.append( 5729 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5730 ) 5731 else: 5732 cond = exp.or_( 5733 exp.EQ(this=expression.copy(), expression=search), 5734 exp.and_( 5735 exp.Is(this=expression.copy(), expression=exp.Null()), 5736 exp.Is(this=search.copy(), expression=exp.Null()), 5737 copy=False, 5738 ), 5739 copy=False, 5740 ) 5741 ifs.append(exp.If(this=cond, true=result)) 5742 5743 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5744 5745 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5746 self._match_text_seq("KEY") 5747 key = self._parse_column() 5748 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5749 self._match_text_seq("VALUE") 5750 value = self._parse_bitwise() 5751 5752 if not key and not value: 5753 return None 5754 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5755 5756 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5757 if not this or not self._match_text_seq("FORMAT", "JSON"): 5758 return this 5759 5760 return self.expression(exp.FormatJson, this=this) 5761 5762 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5763 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5764 for value in values: 5765 if self._match_text_seq(value, "ON", on): 5766 return f"{value} ON {on}" 5767 5768 return None 5769 5770 @t.overload 5771 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5772 5773 @t.overload 5774 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5775 5776 def _parse_json_object(self, agg=False): 5777 star = self._parse_star() 5778 expressions = ( 5779 [star] 5780 if star 5781 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5782 ) 5783 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5784 5785 unique_keys = None 5786 if self._match_text_seq("WITH", "UNIQUE"): 5787 unique_keys = True 5788 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5789 unique_keys = False 5790 5791 self._match_text_seq("KEYS") 5792 5793 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5794 self._parse_type() 5795 ) 5796 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5797 5798 return self.expression( 5799 exp.JSONObjectAgg if agg else exp.JSONObject, 5800 expressions=expressions, 5801 null_handling=null_handling, 5802 unique_keys=unique_keys, 5803 return_type=return_type, 5804 encoding=encoding, 5805 ) 5806 5807 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5808 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5809 if not self._match_text_seq("NESTED"): 5810 this = self._parse_id_var() 5811 kind = self._parse_types(allow_identifiers=False) 5812 nested = None 5813 else: 5814 this = None 5815 kind = None 5816 nested = True 5817 5818 path = self._match_text_seq("PATH") and self._parse_string() 5819 nested_schema = nested and self._parse_json_schema() 5820 5821 return self.expression( 5822 exp.JSONColumnDef, 5823 this=this, 5824 kind=kind, 5825 path=path, 5826 nested_schema=nested_schema, 5827 ) 5828 5829 def _parse_json_schema(self) -> exp.JSONSchema: 5830 self._match_text_seq("COLUMNS") 5831 return self.expression( 5832 exp.JSONSchema, 5833 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5834 ) 5835 5836 def _parse_json_table(self) -> exp.JSONTable: 5837 this = self._parse_format_json(self._parse_bitwise()) 5838 path = self._match(TokenType.COMMA) and self._parse_string() 5839 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5840 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5841 schema = self._parse_json_schema() 5842 5843 return exp.JSONTable( 5844 this=this, 5845 schema=schema, 5846 path=path, 5847 error_handling=error_handling, 5848 empty_handling=empty_handling, 5849 ) 5850 5851 def _parse_match_against(self) -> exp.MatchAgainst: 5852 expressions = self._parse_csv(self._parse_column) 5853 5854 self._match_text_seq(")", "AGAINST", "(") 5855 5856 this = self._parse_string() 5857 5858 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5859 modifier = "IN NATURAL LANGUAGE MODE" 5860 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5861 modifier = f"{modifier} WITH QUERY EXPANSION" 5862 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5863 modifier = "IN BOOLEAN MODE" 5864 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5865 modifier = "WITH QUERY EXPANSION" 5866 else: 5867 modifier = None 5868 5869 return self.expression( 5870 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5871 ) 5872 5873 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5874 def _parse_open_json(self) -> exp.OpenJSON: 5875 this = self._parse_bitwise() 5876 path = self._match(TokenType.COMMA) and self._parse_string() 5877 5878 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5879 this = self._parse_field(any_token=True) 5880 kind = self._parse_types() 5881 path = self._parse_string() 5882 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5883 5884 return self.expression( 5885 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5886 ) 5887 5888 expressions = None 5889 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5890 self._match_l_paren() 5891 expressions = self._parse_csv(_parse_open_json_column_def) 5892 5893 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5894 5895 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5896 args = self._parse_csv(self._parse_bitwise) 5897 5898 if self._match(TokenType.IN): 5899 return self.expression( 5900 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5901 ) 5902 5903 if haystack_first: 5904 haystack = seq_get(args, 0) 5905 needle = seq_get(args, 1) 5906 else: 5907 needle = seq_get(args, 0) 5908 haystack = seq_get(args, 1) 5909 5910 return self.expression( 5911 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5912 ) 5913 5914 def _parse_predict(self) -> exp.Predict: 5915 self._match_text_seq("MODEL") 5916 this = self._parse_table() 5917 5918 self._match(TokenType.COMMA) 5919 self._match_text_seq("TABLE") 5920 5921 return self.expression( 5922 exp.Predict, 5923 this=this, 5924 expression=self._parse_table(), 5925 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5926 ) 5927 5928 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5929 args = self._parse_csv(self._parse_table) 5930 return exp.JoinHint(this=func_name.upper(), expressions=args) 5931 5932 def _parse_substring(self) -> exp.Substring: 5933 # Postgres supports the form: substring(string [from int] [for int]) 5934 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5935 5936 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5937 5938 if self._match(TokenType.FROM): 5939 args.append(self._parse_bitwise()) 5940 if self._match(TokenType.FOR): 5941 if len(args) == 1: 5942 args.append(exp.Literal.number(1)) 5943 args.append(self._parse_bitwise()) 5944 5945 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5946 5947 def _parse_trim(self) -> exp.Trim: 5948 # https://www.w3resource.com/sql/character-functions/trim.php 5949 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5950 5951 position = None 5952 collation = None 5953 expression = None 5954 5955 if self._match_texts(self.TRIM_TYPES): 5956 position = self._prev.text.upper() 5957 5958 this = self._parse_bitwise() 5959 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5960 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5961 expression = self._parse_bitwise() 5962 5963 if invert_order: 5964 this, expression = expression, this 5965 5966 if self._match(TokenType.COLLATE): 5967 collation = self._parse_bitwise() 5968 5969 return self.expression( 5970 exp.Trim, this=this, position=position, expression=expression, collation=collation 5971 ) 5972 5973 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5974 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5975 5976 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5977 return self._parse_window(self._parse_id_var(), alias=True) 5978 5979 def _parse_respect_or_ignore_nulls( 5980 self, this: t.Optional[exp.Expression] 5981 ) -> t.Optional[exp.Expression]: 5982 if self._match_text_seq("IGNORE", "NULLS"): 5983 return self.expression(exp.IgnoreNulls, this=this) 5984 if self._match_text_seq("RESPECT", "NULLS"): 5985 return self.expression(exp.RespectNulls, this=this) 5986 return this 5987 5988 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5989 if self._match(TokenType.HAVING): 5990 self._match_texts(("MAX", "MIN")) 5991 max = self._prev.text.upper() != "MIN" 5992 return self.expression( 5993 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5994 ) 5995 5996 return this 5997 5998 def _parse_window( 5999 self, this: t.Optional[exp.Expression], alias: bool = False 6000 ) -> t.Optional[exp.Expression]: 6001 func = this 6002 comments = func.comments if isinstance(func, exp.Expression) else None 6003 6004 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6005 self._match(TokenType.WHERE) 6006 this = self.expression( 6007 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6008 ) 6009 self._match_r_paren() 6010 6011 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6012 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6013 if self._match_text_seq("WITHIN", "GROUP"): 6014 order = self._parse_wrapped(self._parse_order) 6015 this = self.expression(exp.WithinGroup, this=this, expression=order) 6016 6017 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6018 # Some dialects choose to implement and some do not. 6019 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6020 6021 # There is some code above in _parse_lambda that handles 6022 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6023 6024 # The below changes handle 6025 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6026 6027 # Oracle allows both formats 6028 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6029 # and Snowflake chose to do the same for familiarity 6030 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6031 if isinstance(this, exp.AggFunc): 6032 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6033 6034 if ignore_respect and ignore_respect is not this: 6035 ignore_respect.replace(ignore_respect.this) 6036 this = self.expression(ignore_respect.__class__, this=this) 6037 6038 this = self._parse_respect_or_ignore_nulls(this) 6039 6040 # bigquery select from window x AS (partition by ...) 6041 if alias: 6042 over = None 6043 self._match(TokenType.ALIAS) 6044 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6045 return this 6046 else: 6047 over = self._prev.text.upper() 6048 6049 if comments and isinstance(func, exp.Expression): 6050 func.pop_comments() 6051 6052 if not self._match(TokenType.L_PAREN): 6053 return self.expression( 6054 exp.Window, 6055 comments=comments, 6056 this=this, 6057 alias=self._parse_id_var(False), 6058 over=over, 6059 ) 6060 6061 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6062 6063 first = self._match(TokenType.FIRST) 6064 if self._match_text_seq("LAST"): 6065 first = False 6066 6067 partition, order = self._parse_partition_and_order() 6068 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6069 6070 if kind: 6071 self._match(TokenType.BETWEEN) 6072 start = self._parse_window_spec() 6073 self._match(TokenType.AND) 6074 end = self._parse_window_spec() 6075 6076 spec = self.expression( 6077 exp.WindowSpec, 6078 kind=kind, 6079 start=start["value"], 6080 start_side=start["side"], 6081 end=end["value"], 6082 end_side=end["side"], 6083 ) 6084 else: 6085 spec = None 6086 6087 self._match_r_paren() 6088 6089 window = self.expression( 6090 exp.Window, 6091 comments=comments, 6092 this=this, 6093 partition_by=partition, 6094 order=order, 6095 spec=spec, 6096 alias=window_alias, 6097 over=over, 6098 first=first, 6099 ) 6100 6101 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6102 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6103 return self._parse_window(window, alias=alias) 6104 6105 return window 6106 6107 def _parse_partition_and_order( 6108 self, 6109 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6110 return self._parse_partition_by(), self._parse_order() 6111 6112 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6113 self._match(TokenType.BETWEEN) 6114 6115 return { 6116 "value": ( 6117 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6118 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6119 or self._parse_bitwise() 6120 ), 6121 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6122 } 6123 6124 def _parse_alias( 6125 self, this: t.Optional[exp.Expression], explicit: bool = False 6126 ) -> t.Optional[exp.Expression]: 6127 any_token = self._match(TokenType.ALIAS) 6128 comments = self._prev_comments or [] 6129 6130 if explicit and not any_token: 6131 return this 6132 6133 if self._match(TokenType.L_PAREN): 6134 aliases = self.expression( 6135 exp.Aliases, 6136 comments=comments, 6137 this=this, 6138 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6139 ) 6140 self._match_r_paren(aliases) 6141 return aliases 6142 6143 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6144 self.STRING_ALIASES and self._parse_string_as_identifier() 6145 ) 6146 6147 if alias: 6148 comments.extend(alias.pop_comments()) 6149 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6150 column = this.this 6151 6152 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6153 if not this.comments and column and column.comments: 6154 this.comments = column.pop_comments() 6155 6156 return this 6157 6158 def _parse_id_var( 6159 self, 6160 any_token: bool = True, 6161 tokens: t.Optional[t.Collection[TokenType]] = None, 6162 ) -> t.Optional[exp.Expression]: 6163 expression = self._parse_identifier() 6164 if not expression and ( 6165 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6166 ): 6167 quoted = self._prev.token_type == TokenType.STRING 6168 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6169 6170 return expression 6171 6172 def _parse_string(self) -> t.Optional[exp.Expression]: 6173 if self._match_set(self.STRING_PARSERS): 6174 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6175 return self._parse_placeholder() 6176 6177 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6178 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6179 6180 def _parse_number(self) -> t.Optional[exp.Expression]: 6181 if self._match_set(self.NUMERIC_PARSERS): 6182 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6183 return self._parse_placeholder() 6184 6185 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6186 if self._match(TokenType.IDENTIFIER): 6187 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6188 return self._parse_placeholder() 6189 6190 def _parse_var( 6191 self, 6192 any_token: bool = False, 6193 tokens: t.Optional[t.Collection[TokenType]] = None, 6194 upper: bool = False, 6195 ) -> t.Optional[exp.Expression]: 6196 if ( 6197 (any_token and self._advance_any()) 6198 or self._match(TokenType.VAR) 6199 or (self._match_set(tokens) if tokens else False) 6200 ): 6201 return self.expression( 6202 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6203 ) 6204 return self._parse_placeholder() 6205 6206 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6207 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6208 self._advance() 6209 return self._prev 6210 return None 6211 6212 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6213 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6214 6215 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6216 return self._parse_primary() or self._parse_var(any_token=True) 6217 6218 def _parse_null(self) -> t.Optional[exp.Expression]: 6219 if self._match_set(self.NULL_TOKENS): 6220 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6221 return self._parse_placeholder() 6222 6223 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6224 if self._match(TokenType.TRUE): 6225 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6226 if self._match(TokenType.FALSE): 6227 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6228 return self._parse_placeholder() 6229 6230 def _parse_star(self) -> t.Optional[exp.Expression]: 6231 if self._match(TokenType.STAR): 6232 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6233 return self._parse_placeholder() 6234 6235 def _parse_parameter(self) -> exp.Parameter: 6236 this = self._parse_identifier() or self._parse_primary_or_var() 6237 return self.expression(exp.Parameter, this=this) 6238 6239 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6240 if self._match_set(self.PLACEHOLDER_PARSERS): 6241 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6242 if placeholder: 6243 return placeholder 6244 self._advance(-1) 6245 return None 6246 6247 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6248 if not self._match_texts(keywords): 6249 return None 6250 if self._match(TokenType.L_PAREN, advance=False): 6251 return self._parse_wrapped_csv(self._parse_expression) 6252 6253 expression = self._parse_expression() 6254 return [expression] if expression else None 6255 6256 def _parse_csv( 6257 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6258 ) -> t.List[exp.Expression]: 6259 parse_result = parse_method() 6260 items = [parse_result] if parse_result is not None else [] 6261 6262 while self._match(sep): 6263 self._add_comments(parse_result) 6264 parse_result = parse_method() 6265 if parse_result is not None: 6266 items.append(parse_result) 6267 6268 return items 6269 6270 def _parse_tokens( 6271 self, parse_method: t.Callable, expressions: t.Dict 6272 ) -> t.Optional[exp.Expression]: 6273 this = parse_method() 6274 6275 while self._match_set(expressions): 6276 this = self.expression( 6277 expressions[self._prev.token_type], 6278 this=this, 6279 comments=self._prev_comments, 6280 expression=parse_method(), 6281 ) 6282 6283 return this 6284 6285 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6286 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6287 6288 def _parse_wrapped_csv( 6289 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6290 ) -> t.List[exp.Expression]: 6291 return self._parse_wrapped( 6292 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6293 ) 6294 6295 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6296 wrapped = self._match(TokenType.L_PAREN) 6297 if not wrapped and not optional: 6298 self.raise_error("Expecting (") 6299 parse_result = parse_method() 6300 if wrapped: 6301 self._match_r_paren() 6302 return parse_result 6303 6304 def _parse_expressions(self) -> t.List[exp.Expression]: 6305 return self._parse_csv(self._parse_expression) 6306 6307 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6308 return self._parse_select() or self._parse_set_operations( 6309 self._parse_expression() if alias else self._parse_assignment() 6310 ) 6311 6312 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6313 return self._parse_query_modifiers( 6314 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6315 ) 6316 6317 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6318 this = None 6319 if self._match_texts(self.TRANSACTION_KIND): 6320 this = self._prev.text 6321 6322 self._match_texts(("TRANSACTION", "WORK")) 6323 6324 modes = [] 6325 while True: 6326 mode = [] 6327 while self._match(TokenType.VAR): 6328 mode.append(self._prev.text) 6329 6330 if mode: 6331 modes.append(" ".join(mode)) 6332 if not self._match(TokenType.COMMA): 6333 break 6334 6335 return self.expression(exp.Transaction, this=this, modes=modes) 6336 6337 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6338 chain = None 6339 savepoint = None 6340 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6341 6342 self._match_texts(("TRANSACTION", "WORK")) 6343 6344 if self._match_text_seq("TO"): 6345 self._match_text_seq("SAVEPOINT") 6346 savepoint = self._parse_id_var() 6347 6348 if self._match(TokenType.AND): 6349 chain = not self._match_text_seq("NO") 6350 self._match_text_seq("CHAIN") 6351 6352 if is_rollback: 6353 return self.expression(exp.Rollback, savepoint=savepoint) 6354 6355 return self.expression(exp.Commit, chain=chain) 6356 6357 def _parse_refresh(self) -> exp.Refresh: 6358 self._match(TokenType.TABLE) 6359 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6360 6361 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6362 if not self._match_text_seq("ADD"): 6363 return None 6364 6365 self._match(TokenType.COLUMN) 6366 exists_column = self._parse_exists(not_=True) 6367 expression = self._parse_field_def() 6368 6369 if expression: 6370 expression.set("exists", exists_column) 6371 6372 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6373 if self._match_texts(("FIRST", "AFTER")): 6374 position = self._prev.text 6375 column_position = self.expression( 6376 exp.ColumnPosition, this=self._parse_column(), position=position 6377 ) 6378 expression.set("position", column_position) 6379 6380 return expression 6381 6382 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6383 drop = self._match(TokenType.DROP) and self._parse_drop() 6384 if drop and not isinstance(drop, exp.Command): 6385 drop.set("kind", drop.args.get("kind", "COLUMN")) 6386 return drop 6387 6388 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6389 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6390 return self.expression( 6391 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6392 ) 6393 6394 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6395 index = self._index - 1 6396 6397 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6398 return self._parse_csv( 6399 lambda: self.expression( 6400 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6401 ) 6402 ) 6403 6404 self._retreat(index) 6405 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6406 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6407 6408 if self._match_text_seq("ADD", "COLUMNS"): 6409 schema = self._parse_schema() 6410 if schema: 6411 return [schema] 6412 return [] 6413 6414 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6415 6416 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6417 if self._match_texts(self.ALTER_ALTER_PARSERS): 6418 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6419 6420 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6421 # keyword after ALTER we default to parsing this statement 6422 self._match(TokenType.COLUMN) 6423 column = self._parse_field(any_token=True) 6424 6425 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6426 return self.expression(exp.AlterColumn, this=column, drop=True) 6427 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6428 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6429 if self._match(TokenType.COMMENT): 6430 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6431 if self._match_text_seq("DROP", "NOT", "NULL"): 6432 return self.expression( 6433 exp.AlterColumn, 6434 this=column, 6435 drop=True, 6436 allow_null=True, 6437 ) 6438 if self._match_text_seq("SET", "NOT", "NULL"): 6439 return self.expression( 6440 exp.AlterColumn, 6441 this=column, 6442 allow_null=False, 6443 ) 6444 self._match_text_seq("SET", "DATA") 6445 self._match_text_seq("TYPE") 6446 return self.expression( 6447 exp.AlterColumn, 6448 this=column, 6449 dtype=self._parse_types(), 6450 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6451 using=self._match(TokenType.USING) and self._parse_assignment(), 6452 ) 6453 6454 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6455 if self._match_texts(("ALL", "EVEN", "AUTO")): 6456 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6457 6458 self._match_text_seq("KEY", "DISTKEY") 6459 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6460 6461 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6462 if compound: 6463 self._match_text_seq("SORTKEY") 6464 6465 if self._match(TokenType.L_PAREN, advance=False): 6466 return self.expression( 6467 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6468 ) 6469 6470 self._match_texts(("AUTO", "NONE")) 6471 return self.expression( 6472 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6473 ) 6474 6475 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6476 index = self._index - 1 6477 6478 partition_exists = self._parse_exists() 6479 if self._match(TokenType.PARTITION, advance=False): 6480 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6481 6482 self._retreat(index) 6483 return self._parse_csv(self._parse_drop_column) 6484 6485 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6486 if self._match(TokenType.COLUMN): 6487 exists = self._parse_exists() 6488 old_column = self._parse_column() 6489 to = self._match_text_seq("TO") 6490 new_column = self._parse_column() 6491 6492 if old_column is None or to is None or new_column is None: 6493 return None 6494 6495 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6496 6497 self._match_text_seq("TO") 6498 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6499 6500 def _parse_alter_table_set(self) -> exp.AlterSet: 6501 alter_set = self.expression(exp.AlterSet) 6502 6503 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6504 "TABLE", "PROPERTIES" 6505 ): 6506 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6507 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6508 alter_set.set("expressions", [self._parse_assignment()]) 6509 elif self._match_texts(("LOGGED", "UNLOGGED")): 6510 alter_set.set("option", exp.var(self._prev.text.upper())) 6511 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6512 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6513 elif self._match_text_seq("LOCATION"): 6514 alter_set.set("location", self._parse_field()) 6515 elif self._match_text_seq("ACCESS", "METHOD"): 6516 alter_set.set("access_method", self._parse_field()) 6517 elif self._match_text_seq("TABLESPACE"): 6518 alter_set.set("tablespace", self._parse_field()) 6519 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6520 alter_set.set("file_format", [self._parse_field()]) 6521 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6522 alter_set.set("file_format", self._parse_wrapped_options()) 6523 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6524 alter_set.set("copy_options", self._parse_wrapped_options()) 6525 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6526 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6527 else: 6528 if self._match_text_seq("SERDE"): 6529 alter_set.set("serde", self._parse_field()) 6530 6531 alter_set.set("expressions", [self._parse_properties()]) 6532 6533 return alter_set 6534 6535 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6536 start = self._prev 6537 6538 if not self._match(TokenType.TABLE): 6539 return self._parse_as_command(start) 6540 6541 exists = self._parse_exists() 6542 only = self._match_text_seq("ONLY") 6543 this = self._parse_table(schema=True) 6544 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6545 6546 if self._next: 6547 self._advance() 6548 6549 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6550 if parser: 6551 actions = ensure_list(parser(self)) 6552 options = self._parse_csv(self._parse_property) 6553 6554 if not self._curr and actions: 6555 return self.expression( 6556 exp.AlterTable, 6557 this=this, 6558 exists=exists, 6559 actions=actions, 6560 only=only, 6561 options=options, 6562 cluster=cluster, 6563 ) 6564 6565 return self._parse_as_command(start) 6566 6567 def _parse_merge(self) -> exp.Merge: 6568 self._match(TokenType.INTO) 6569 target = self._parse_table() 6570 6571 if target and self._match(TokenType.ALIAS, advance=False): 6572 target.set("alias", self._parse_table_alias()) 6573 6574 self._match(TokenType.USING) 6575 using = self._parse_table() 6576 6577 self._match(TokenType.ON) 6578 on = self._parse_assignment() 6579 6580 return self.expression( 6581 exp.Merge, 6582 this=target, 6583 using=using, 6584 on=on, 6585 expressions=self._parse_when_matched(), 6586 ) 6587 6588 def _parse_when_matched(self) -> t.List[exp.When]: 6589 whens = [] 6590 6591 while self._match(TokenType.WHEN): 6592 matched = not self._match(TokenType.NOT) 6593 self._match_text_seq("MATCHED") 6594 source = ( 6595 False 6596 if self._match_text_seq("BY", "TARGET") 6597 else self._match_text_seq("BY", "SOURCE") 6598 ) 6599 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6600 6601 self._match(TokenType.THEN) 6602 6603 if self._match(TokenType.INSERT): 6604 _this = self._parse_star() 6605 if _this: 6606 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6607 else: 6608 then = self.expression( 6609 exp.Insert, 6610 this=self._parse_value(), 6611 expression=self._match_text_seq("VALUES") and self._parse_value(), 6612 ) 6613 elif self._match(TokenType.UPDATE): 6614 expressions = self._parse_star() 6615 if expressions: 6616 then = self.expression(exp.Update, expressions=expressions) 6617 else: 6618 then = self.expression( 6619 exp.Update, 6620 expressions=self._match(TokenType.SET) 6621 and self._parse_csv(self._parse_equality), 6622 ) 6623 elif self._match(TokenType.DELETE): 6624 then = self.expression(exp.Var, this=self._prev.text) 6625 else: 6626 then = None 6627 6628 whens.append( 6629 self.expression( 6630 exp.When, 6631 matched=matched, 6632 source=source, 6633 condition=condition, 6634 then=then, 6635 ) 6636 ) 6637 return whens 6638 6639 def _parse_show(self) -> t.Optional[exp.Expression]: 6640 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6641 if parser: 6642 return parser(self) 6643 return self._parse_as_command(self._prev) 6644 6645 def _parse_set_item_assignment( 6646 self, kind: t.Optional[str] = None 6647 ) -> t.Optional[exp.Expression]: 6648 index = self._index 6649 6650 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6651 return self._parse_set_transaction(global_=kind == "GLOBAL") 6652 6653 left = self._parse_primary() or self._parse_column() 6654 assignment_delimiter = self._match_texts(("=", "TO")) 6655 6656 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6657 self._retreat(index) 6658 return None 6659 6660 right = self._parse_statement() or self._parse_id_var() 6661 if isinstance(right, (exp.Column, exp.Identifier)): 6662 right = exp.var(right.name) 6663 6664 this = self.expression(exp.EQ, this=left, expression=right) 6665 return self.expression(exp.SetItem, this=this, kind=kind) 6666 6667 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6668 self._match_text_seq("TRANSACTION") 6669 characteristics = self._parse_csv( 6670 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6671 ) 6672 return self.expression( 6673 exp.SetItem, 6674 expressions=characteristics, 6675 kind="TRANSACTION", 6676 **{"global": global_}, # type: ignore 6677 ) 6678 6679 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6680 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6681 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6682 6683 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6684 index = self._index 6685 set_ = self.expression( 6686 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6687 ) 6688 6689 if self._curr: 6690 self._retreat(index) 6691 return self._parse_as_command(self._prev) 6692 6693 return set_ 6694 6695 def _parse_var_from_options( 6696 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6697 ) -> t.Optional[exp.Var]: 6698 start = self._curr 6699 if not start: 6700 return None 6701 6702 option = start.text.upper() 6703 continuations = options.get(option) 6704 6705 index = self._index 6706 self._advance() 6707 for keywords in continuations or []: 6708 if isinstance(keywords, str): 6709 keywords = (keywords,) 6710 6711 if self._match_text_seq(*keywords): 6712 option = f"{option} {' '.join(keywords)}" 6713 break 6714 else: 6715 if continuations or continuations is None: 6716 if raise_unmatched: 6717 self.raise_error(f"Unknown option {option}") 6718 6719 self._retreat(index) 6720 return None 6721 6722 return exp.var(option) 6723 6724 def _parse_as_command(self, start: Token) -> exp.Command: 6725 while self._curr: 6726 self._advance() 6727 text = self._find_sql(start, self._prev) 6728 size = len(start.text) 6729 self._warn_unsupported() 6730 return exp.Command(this=text[:size], expression=text[size:]) 6731 6732 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6733 settings = [] 6734 6735 self._match_l_paren() 6736 kind = self._parse_id_var() 6737 6738 if self._match(TokenType.L_PAREN): 6739 while True: 6740 key = self._parse_id_var() 6741 value = self._parse_primary() 6742 6743 if not key and value is None: 6744 break 6745 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6746 self._match(TokenType.R_PAREN) 6747 6748 self._match_r_paren() 6749 6750 return self.expression( 6751 exp.DictProperty, 6752 this=this, 6753 kind=kind.this if kind else None, 6754 settings=settings, 6755 ) 6756 6757 def _parse_dict_range(self, this: str) -> exp.DictRange: 6758 self._match_l_paren() 6759 has_min = self._match_text_seq("MIN") 6760 if has_min: 6761 min = self._parse_var() or self._parse_primary() 6762 self._match_text_seq("MAX") 6763 max = self._parse_var() or self._parse_primary() 6764 else: 6765 max = self._parse_var() or self._parse_primary() 6766 min = exp.Literal.number(0) 6767 self._match_r_paren() 6768 return self.expression(exp.DictRange, this=this, min=min, max=max) 6769 6770 def _parse_comprehension( 6771 self, this: t.Optional[exp.Expression] 6772 ) -> t.Optional[exp.Comprehension]: 6773 index = self._index 6774 expression = self._parse_column() 6775 if not self._match(TokenType.IN): 6776 self._retreat(index - 1) 6777 return None 6778 iterator = self._parse_column() 6779 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6780 return self.expression( 6781 exp.Comprehension, 6782 this=this, 6783 expression=expression, 6784 iterator=iterator, 6785 condition=condition, 6786 ) 6787 6788 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6789 if self._match(TokenType.HEREDOC_STRING): 6790 return self.expression(exp.Heredoc, this=self._prev.text) 6791 6792 if not self._match_text_seq("$"): 6793 return None 6794 6795 tags = ["$"] 6796 tag_text = None 6797 6798 if self._is_connected(): 6799 self._advance() 6800 tags.append(self._prev.text.upper()) 6801 else: 6802 self.raise_error("No closing $ found") 6803 6804 if tags[-1] != "$": 6805 if self._is_connected() and self._match_text_seq("$"): 6806 tag_text = tags[-1] 6807 tags.append("$") 6808 else: 6809 self.raise_error("No closing $ found") 6810 6811 heredoc_start = self._curr 6812 6813 while self._curr: 6814 if self._match_text_seq(*tags, advance=False): 6815 this = self._find_sql(heredoc_start, self._prev) 6816 self._advance(len(tags)) 6817 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6818 6819 self._advance() 6820 6821 self.raise_error(f"No closing {''.join(tags)} found") 6822 return None 6823 6824 def _find_parser( 6825 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6826 ) -> t.Optional[t.Callable]: 6827 if not self._curr: 6828 return None 6829 6830 index = self._index 6831 this = [] 6832 while True: 6833 # The current token might be multiple words 6834 curr = self._curr.text.upper() 6835 key = curr.split(" ") 6836 this.append(curr) 6837 6838 self._advance() 6839 result, trie = in_trie(trie, key) 6840 if result == TrieResult.FAILED: 6841 break 6842 6843 if result == TrieResult.EXISTS: 6844 subparser = parsers[" ".join(this)] 6845 return subparser 6846 6847 self._retreat(index) 6848 return None 6849 6850 def _match(self, token_type, advance=True, expression=None): 6851 if not self._curr: 6852 return None 6853 6854 if self._curr.token_type == token_type: 6855 if advance: 6856 self._advance() 6857 self._add_comments(expression) 6858 return True 6859 6860 return None 6861 6862 def _match_set(self, types, advance=True): 6863 if not self._curr: 6864 return None 6865 6866 if self._curr.token_type in types: 6867 if advance: 6868 self._advance() 6869 return True 6870 6871 return None 6872 6873 def _match_pair(self, token_type_a, token_type_b, advance=True): 6874 if not self._curr or not self._next: 6875 return None 6876 6877 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6878 if advance: 6879 self._advance(2) 6880 return True 6881 6882 return None 6883 6884 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6885 if not self._match(TokenType.L_PAREN, expression=expression): 6886 self.raise_error("Expecting (") 6887 6888 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6889 if not self._match(TokenType.R_PAREN, expression=expression): 6890 self.raise_error("Expecting )") 6891 6892 def _match_texts(self, texts, advance=True): 6893 if self._curr and self._curr.text.upper() in texts: 6894 if advance: 6895 self._advance() 6896 return True 6897 return None 6898 6899 def _match_text_seq(self, *texts, advance=True): 6900 index = self._index 6901 for text in texts: 6902 if self._curr and self._curr.text.upper() == text: 6903 self._advance() 6904 else: 6905 self._retreat(index) 6906 return None 6907 6908 if not advance: 6909 self._retreat(index) 6910 6911 return True 6912 6913 def _replace_lambda( 6914 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6915 ) -> t.Optional[exp.Expression]: 6916 if not node: 6917 return node 6918 6919 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6920 6921 for column in node.find_all(exp.Column): 6922 typ = lambda_types.get(column.parts[0].name) 6923 if typ is not None: 6924 dot_or_id = column.to_dot() if column.table else column.this 6925 6926 if typ: 6927 dot_or_id = self.expression( 6928 exp.Cast, 6929 this=dot_or_id, 6930 to=typ, 6931 ) 6932 6933 parent = column.parent 6934 6935 while isinstance(parent, exp.Dot): 6936 if not isinstance(parent.parent, exp.Dot): 6937 parent.replace(dot_or_id) 6938 break 6939 parent = parent.parent 6940 else: 6941 if column is node: 6942 node = dot_or_id 6943 else: 6944 column.replace(dot_or_id) 6945 return node 6946 6947 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6948 start = self._prev 6949 6950 # Not to be confused with TRUNCATE(number, decimals) function call 6951 if self._match(TokenType.L_PAREN): 6952 self._retreat(self._index - 2) 6953 return self._parse_function() 6954 6955 # Clickhouse supports TRUNCATE DATABASE as well 6956 is_database = self._match(TokenType.DATABASE) 6957 6958 self._match(TokenType.TABLE) 6959 6960 exists = self._parse_exists(not_=False) 6961 6962 expressions = self._parse_csv( 6963 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6964 ) 6965 6966 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6967 6968 if self._match_text_seq("RESTART", "IDENTITY"): 6969 identity = "RESTART" 6970 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6971 identity = "CONTINUE" 6972 else: 6973 identity = None 6974 6975 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6976 option = self._prev.text 6977 else: 6978 option = None 6979 6980 partition = self._parse_partition() 6981 6982 # Fallback case 6983 if self._curr: 6984 return self._parse_as_command(start) 6985 6986 return self.expression( 6987 exp.TruncateTable, 6988 expressions=expressions, 6989 is_database=is_database, 6990 exists=exists, 6991 cluster=cluster, 6992 identity=identity, 6993 option=option, 6994 partition=partition, 6995 ) 6996 6997 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6998 this = self._parse_ordered(self._parse_opclass) 6999 7000 if not self._match(TokenType.WITH): 7001 return this 7002 7003 op = self._parse_var(any_token=True) 7004 7005 return self.expression(exp.WithOperator, this=this, op=op) 7006 7007 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7008 self._match(TokenType.EQ) 7009 self._match(TokenType.L_PAREN) 7010 7011 opts: t.List[t.Optional[exp.Expression]] = [] 7012 while self._curr and not self._match(TokenType.R_PAREN): 7013 if self._match_text_seq("FORMAT_NAME", "="): 7014 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7015 # so we parse it separately to use _parse_field() 7016 prop = self.expression( 7017 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7018 ) 7019 opts.append(prop) 7020 else: 7021 opts.append(self._parse_property()) 7022 7023 self._match(TokenType.COMMA) 7024 7025 return opts 7026 7027 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7028 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7029 7030 options = [] 7031 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7032 option = self._parse_var(any_token=True) 7033 prev = self._prev.text.upper() 7034 7035 # Different dialects might separate options and values by white space, "=" and "AS" 7036 self._match(TokenType.EQ) 7037 self._match(TokenType.ALIAS) 7038 7039 param = self.expression(exp.CopyParameter, this=option) 7040 7041 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7042 TokenType.L_PAREN, advance=False 7043 ): 7044 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7045 param.set("expressions", self._parse_wrapped_options()) 7046 elif prev == "FILE_FORMAT": 7047 # T-SQL's external file format case 7048 param.set("expression", self._parse_field()) 7049 else: 7050 param.set("expression", self._parse_unquoted_field()) 7051 7052 options.append(param) 7053 self._match(sep) 7054 7055 return options 7056 7057 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7058 expr = self.expression(exp.Credentials) 7059 7060 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7061 expr.set("storage", self._parse_field()) 7062 if self._match_text_seq("CREDENTIALS"): 7063 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7064 creds = ( 7065 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7066 ) 7067 expr.set("credentials", creds) 7068 if self._match_text_seq("ENCRYPTION"): 7069 expr.set("encryption", self._parse_wrapped_options()) 7070 if self._match_text_seq("IAM_ROLE"): 7071 expr.set("iam_role", self._parse_field()) 7072 if self._match_text_seq("REGION"): 7073 expr.set("region", self._parse_field()) 7074 7075 return expr 7076 7077 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7078 return self._parse_field() 7079 7080 def _parse_copy(self) -> exp.Copy | exp.Command: 7081 start = self._prev 7082 7083 self._match(TokenType.INTO) 7084 7085 this = ( 7086 self._parse_select(nested=True, parse_subquery_alias=False) 7087 if self._match(TokenType.L_PAREN, advance=False) 7088 else self._parse_table(schema=True) 7089 ) 7090 7091 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7092 7093 files = self._parse_csv(self._parse_file_location) 7094 credentials = self._parse_credentials() 7095 7096 self._match_text_seq("WITH") 7097 7098 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7099 7100 # Fallback case 7101 if self._curr: 7102 return self._parse_as_command(start) 7103 7104 return self.expression( 7105 exp.Copy, 7106 this=this, 7107 kind=kind, 7108 credentials=credentials, 7109 files=files, 7110 params=params, 7111 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1311 def __init__( 1312 self, 1313 error_level: t.Optional[ErrorLevel] = None, 1314 error_message_context: int = 100, 1315 max_errors: int = 3, 1316 dialect: DialectType = None, 1317 ): 1318 from sqlglot.dialects import Dialect 1319 1320 self.error_level = error_level or ErrorLevel.IMMEDIATE 1321 self.error_message_context = error_message_context 1322 self.max_errors = max_errors 1323 self.dialect = Dialect.get_or_raise(dialect) 1324 self.reset()
1336 def parse( 1337 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1338 ) -> t.List[t.Optional[exp.Expression]]: 1339 """ 1340 Parses a list of tokens and returns a list of syntax trees, one tree 1341 per parsed SQL statement. 1342 1343 Args: 1344 raw_tokens: The list of tokens. 1345 sql: The original SQL string, used to produce helpful debug messages. 1346 1347 Returns: 1348 The list of the produced syntax trees. 1349 """ 1350 return self._parse( 1351 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1352 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1354 def parse_into( 1355 self, 1356 expression_types: exp.IntoType, 1357 raw_tokens: t.List[Token], 1358 sql: t.Optional[str] = None, 1359 ) -> t.List[t.Optional[exp.Expression]]: 1360 """ 1361 Parses a list of tokens into a given Expression type. If a collection of Expression 1362 types is given instead, this method will try to parse the token list into each one 1363 of them, stopping at the first for which the parsing succeeds. 1364 1365 Args: 1366 expression_types: The expression type(s) to try and parse the token list into. 1367 raw_tokens: The list of tokens. 1368 sql: The original SQL string, used to produce helpful debug messages. 1369 1370 Returns: 1371 The target Expression. 1372 """ 1373 errors = [] 1374 for expression_type in ensure_list(expression_types): 1375 parser = self.EXPRESSION_PARSERS.get(expression_type) 1376 if not parser: 1377 raise TypeError(f"No parser registered for {expression_type}") 1378 1379 try: 1380 return self._parse(parser, raw_tokens, sql) 1381 except ParseError as e: 1382 e.errors[0]["into_expression"] = expression_type 1383 errors.append(e) 1384 1385 raise ParseError( 1386 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1387 errors=merge_errors(errors), 1388 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1428 def check_errors(self) -> None: 1429 """Logs or raises any found errors, depending on the chosen error level setting.""" 1430 if self.error_level == ErrorLevel.WARN: 1431 for error in self.errors: 1432 logger.error(str(error)) 1433 elif self.error_level == ErrorLevel.RAISE and self.errors: 1434 raise ParseError( 1435 concat_messages(self.errors, self.max_errors), 1436 errors=merge_errors(self.errors), 1437 )
Logs or raises any found errors, depending on the chosen error level setting.
1439 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1440 """ 1441 Appends an error in the list of recorded errors or raises it, depending on the chosen 1442 error level setting. 1443 """ 1444 token = token or self._curr or self._prev or Token.string("") 1445 start = token.start 1446 end = token.end + 1 1447 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1448 highlight = self.sql[start:end] 1449 end_context = self.sql[end : end + self.error_message_context] 1450 1451 error = ParseError.new( 1452 f"{message}. Line {token.line}, Col: {token.col}.\n" 1453 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1454 description=message, 1455 line=token.line, 1456 col=token.col, 1457 start_context=start_context, 1458 highlight=highlight, 1459 end_context=end_context, 1460 ) 1461 1462 if self.error_level == ErrorLevel.IMMEDIATE: 1463 raise error 1464 1465 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1467 def expression( 1468 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1469 ) -> E: 1470 """ 1471 Creates a new, validated Expression. 1472 1473 Args: 1474 exp_class: The expression class to instantiate. 1475 comments: An optional list of comments to attach to the expression. 1476 kwargs: The arguments to set for the expression along with their respective values. 1477 1478 Returns: 1479 The target expression. 1480 """ 1481 instance = exp_class(**kwargs) 1482 instance.add_comments(comments) if comments else self._add_comments(instance) 1483 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1490 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1491 """ 1492 Validates an Expression, making sure that all its mandatory arguments are set. 1493 1494 Args: 1495 expression: The expression to validate. 1496 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1497 1498 Returns: 1499 The validated expression. 1500 """ 1501 if self.error_level != ErrorLevel.IGNORE: 1502 for error_message in expression.error_messages(args): 1503 self.raise_error(error_message) 1504 1505 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.