sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111class _Parser(type): 112 def __new__(cls, clsname, bases, attrs): 113 klass = super().__new__(cls, clsname, bases, attrs) 114 115 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 116 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 117 118 return klass 119 120 121class Parser(metaclass=_Parser): 122 """ 123 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 124 125 Args: 126 error_level: The desired error level. 127 Default: ErrorLevel.IMMEDIATE 128 error_message_context: The amount of context to capture from a query string when displaying 129 the error message (in number of characters). 130 Default: 100 131 max_errors: Maximum number of error messages to include in a raised ParseError. 132 This is only relevant if error_level is ErrorLevel.RAISE. 133 Default: 3 134 """ 135 136 FUNCTIONS: t.Dict[str, t.Callable] = { 137 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 138 "CONCAT": lambda args, dialect: exp.Concat( 139 expressions=args, 140 safe=not dialect.STRICT_STRING_CONCAT, 141 coalesce=dialect.CONCAT_COALESCE, 142 ), 143 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 144 expressions=args, 145 safe=not dialect.STRICT_STRING_CONCAT, 146 coalesce=dialect.CONCAT_COALESCE, 147 ), 148 "DATE_TO_DATE_STR": lambda args: exp.Cast( 149 this=seq_get(args, 0), 150 to=exp.DataType(this=exp.DataType.Type.TEXT), 151 ), 152 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 153 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 154 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 155 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 156 "LIKE": build_like, 157 "LOG": build_logarithm, 158 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 159 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 160 "MOD": build_mod, 161 "TIME_TO_TIME_STR": lambda args: exp.Cast( 162 this=seq_get(args, 0), 163 to=exp.DataType(this=exp.DataType.Type.TEXT), 164 ), 165 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 166 this=exp.Cast( 167 this=seq_get(args, 0), 168 to=exp.DataType(this=exp.DataType.Type.TEXT), 169 ), 170 start=exp.Literal.number(1), 171 length=exp.Literal.number(10), 172 ), 173 "VAR_MAP": build_var_map, 174 "LOWER": build_lower, 175 "UPPER": build_upper, 176 "HEX": build_hex, 177 "TO_HEX": build_hex, 178 } 179 180 NO_PAREN_FUNCTIONS = { 181 TokenType.CURRENT_DATE: exp.CurrentDate, 182 TokenType.CURRENT_DATETIME: exp.CurrentDate, 183 TokenType.CURRENT_TIME: exp.CurrentTime, 184 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 185 TokenType.CURRENT_USER: exp.CurrentUser, 186 } 187 188 STRUCT_TYPE_TOKENS = { 189 TokenType.NESTED, 190 TokenType.OBJECT, 191 TokenType.STRUCT, 192 } 193 194 NESTED_TYPE_TOKENS = { 195 TokenType.ARRAY, 196 TokenType.LOWCARDINALITY, 197 TokenType.MAP, 198 TokenType.NULLABLE, 199 *STRUCT_TYPE_TOKENS, 200 } 201 202 ENUM_TYPE_TOKENS = { 203 TokenType.ENUM, 204 TokenType.ENUM8, 205 TokenType.ENUM16, 206 } 207 208 AGGREGATE_TYPE_TOKENS = { 209 TokenType.AGGREGATEFUNCTION, 210 TokenType.SIMPLEAGGREGATEFUNCTION, 211 } 212 213 TYPE_TOKENS = { 214 TokenType.BIT, 215 TokenType.BOOLEAN, 216 TokenType.TINYINT, 217 TokenType.UTINYINT, 218 TokenType.SMALLINT, 219 TokenType.USMALLINT, 220 TokenType.INT, 221 TokenType.UINT, 222 TokenType.BIGINT, 223 TokenType.UBIGINT, 224 TokenType.INT128, 225 TokenType.UINT128, 226 TokenType.INT256, 227 TokenType.UINT256, 228 TokenType.MEDIUMINT, 229 TokenType.UMEDIUMINT, 230 TokenType.FIXEDSTRING, 231 TokenType.FLOAT, 232 TokenType.DOUBLE, 233 TokenType.CHAR, 234 TokenType.NCHAR, 235 TokenType.VARCHAR, 236 TokenType.NVARCHAR, 237 TokenType.BPCHAR, 238 TokenType.TEXT, 239 TokenType.MEDIUMTEXT, 240 TokenType.LONGTEXT, 241 TokenType.MEDIUMBLOB, 242 TokenType.LONGBLOB, 243 TokenType.BINARY, 244 TokenType.VARBINARY, 245 TokenType.JSON, 246 TokenType.JSONB, 247 TokenType.INTERVAL, 248 TokenType.TINYBLOB, 249 TokenType.TINYTEXT, 250 TokenType.TIME, 251 TokenType.TIMETZ, 252 TokenType.TIMESTAMP, 253 TokenType.TIMESTAMP_S, 254 TokenType.TIMESTAMP_MS, 255 TokenType.TIMESTAMP_NS, 256 TokenType.TIMESTAMPTZ, 257 TokenType.TIMESTAMPLTZ, 258 TokenType.TIMESTAMPNTZ, 259 TokenType.DATETIME, 260 TokenType.DATETIME64, 261 TokenType.DATE, 262 TokenType.DATE32, 263 TokenType.INT4RANGE, 264 TokenType.INT4MULTIRANGE, 265 TokenType.INT8RANGE, 266 TokenType.INT8MULTIRANGE, 267 TokenType.NUMRANGE, 268 TokenType.NUMMULTIRANGE, 269 TokenType.TSRANGE, 270 TokenType.TSMULTIRANGE, 271 TokenType.TSTZRANGE, 272 TokenType.TSTZMULTIRANGE, 273 TokenType.DATERANGE, 274 TokenType.DATEMULTIRANGE, 275 TokenType.DECIMAL, 276 TokenType.UDECIMAL, 277 TokenType.BIGDECIMAL, 278 TokenType.UUID, 279 TokenType.GEOGRAPHY, 280 TokenType.GEOMETRY, 281 TokenType.HLLSKETCH, 282 TokenType.HSTORE, 283 TokenType.PSEUDO_TYPE, 284 TokenType.SUPER, 285 TokenType.SERIAL, 286 TokenType.SMALLSERIAL, 287 TokenType.BIGSERIAL, 288 TokenType.XML, 289 TokenType.YEAR, 290 TokenType.UNIQUEIDENTIFIER, 291 TokenType.USERDEFINED, 292 TokenType.MONEY, 293 TokenType.SMALLMONEY, 294 TokenType.ROWVERSION, 295 TokenType.IMAGE, 296 TokenType.VARIANT, 297 TokenType.OBJECT, 298 TokenType.OBJECT_IDENTIFIER, 299 TokenType.INET, 300 TokenType.IPADDRESS, 301 TokenType.IPPREFIX, 302 TokenType.IPV4, 303 TokenType.IPV6, 304 TokenType.UNKNOWN, 305 TokenType.NULL, 306 TokenType.NAME, 307 TokenType.TDIGEST, 308 *ENUM_TYPE_TOKENS, 309 *NESTED_TYPE_TOKENS, 310 *AGGREGATE_TYPE_TOKENS, 311 } 312 313 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 314 TokenType.BIGINT: TokenType.UBIGINT, 315 TokenType.INT: TokenType.UINT, 316 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 317 TokenType.SMALLINT: TokenType.USMALLINT, 318 TokenType.TINYINT: TokenType.UTINYINT, 319 TokenType.DECIMAL: TokenType.UDECIMAL, 320 } 321 322 SUBQUERY_PREDICATES = { 323 TokenType.ANY: exp.Any, 324 TokenType.ALL: exp.All, 325 TokenType.EXISTS: exp.Exists, 326 TokenType.SOME: exp.Any, 327 } 328 329 RESERVED_TOKENS = { 330 *Tokenizer.SINGLE_TOKENS.values(), 331 TokenType.SELECT, 332 } - {TokenType.IDENTIFIER} 333 334 DB_CREATABLES = { 335 TokenType.DATABASE, 336 TokenType.DICTIONARY, 337 TokenType.MODEL, 338 TokenType.SCHEMA, 339 TokenType.SEQUENCE, 340 TokenType.STORAGE_INTEGRATION, 341 TokenType.TABLE, 342 TokenType.TAG, 343 TokenType.VIEW, 344 TokenType.WAREHOUSE, 345 TokenType.STREAMLIT, 346 } 347 348 CREATABLES = { 349 TokenType.COLUMN, 350 TokenType.CONSTRAINT, 351 TokenType.FOREIGN_KEY, 352 TokenType.FUNCTION, 353 TokenType.INDEX, 354 TokenType.PROCEDURE, 355 *DB_CREATABLES, 356 } 357 358 # Tokens that can represent identifiers 359 ID_VAR_TOKENS = { 360 TokenType.VAR, 361 TokenType.ANTI, 362 TokenType.APPLY, 363 TokenType.ASC, 364 TokenType.ASOF, 365 TokenType.AUTO_INCREMENT, 366 TokenType.BEGIN, 367 TokenType.BPCHAR, 368 TokenType.CACHE, 369 TokenType.CASE, 370 TokenType.COLLATE, 371 TokenType.COMMAND, 372 TokenType.COMMENT, 373 TokenType.COMMIT, 374 TokenType.CONSTRAINT, 375 TokenType.COPY, 376 TokenType.DEFAULT, 377 TokenType.DELETE, 378 TokenType.DESC, 379 TokenType.DESCRIBE, 380 TokenType.DICTIONARY, 381 TokenType.DIV, 382 TokenType.END, 383 TokenType.EXECUTE, 384 TokenType.ESCAPE, 385 TokenType.FALSE, 386 TokenType.FIRST, 387 TokenType.FILTER, 388 TokenType.FINAL, 389 TokenType.FORMAT, 390 TokenType.FULL, 391 TokenType.IDENTIFIER, 392 TokenType.IS, 393 TokenType.ISNULL, 394 TokenType.INTERVAL, 395 TokenType.KEEP, 396 TokenType.KILL, 397 TokenType.LEFT, 398 TokenType.LOAD, 399 TokenType.MERGE, 400 TokenType.NATURAL, 401 TokenType.NEXT, 402 TokenType.OFFSET, 403 TokenType.OPERATOR, 404 TokenType.ORDINALITY, 405 TokenType.OVERLAPS, 406 TokenType.OVERWRITE, 407 TokenType.PARTITION, 408 TokenType.PERCENT, 409 TokenType.PIVOT, 410 TokenType.PRAGMA, 411 TokenType.RANGE, 412 TokenType.RECURSIVE, 413 TokenType.REFERENCES, 414 TokenType.REFRESH, 415 TokenType.REPLACE, 416 TokenType.RIGHT, 417 TokenType.ROLLUP, 418 TokenType.ROW, 419 TokenType.ROWS, 420 TokenType.SEMI, 421 TokenType.SET, 422 TokenType.SETTINGS, 423 TokenType.SHOW, 424 TokenType.TEMPORARY, 425 TokenType.TOP, 426 TokenType.TRUE, 427 TokenType.TRUNCATE, 428 TokenType.UNIQUE, 429 TokenType.UNNEST, 430 TokenType.UNPIVOT, 431 TokenType.UPDATE, 432 TokenType.USE, 433 TokenType.VOLATILE, 434 TokenType.WINDOW, 435 *CREATABLES, 436 *SUBQUERY_PREDICATES, 437 *TYPE_TOKENS, 438 *NO_PAREN_FUNCTIONS, 439 } 440 441 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 442 443 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 444 TokenType.ANTI, 445 TokenType.APPLY, 446 TokenType.ASOF, 447 TokenType.FULL, 448 TokenType.LEFT, 449 TokenType.LOCK, 450 TokenType.NATURAL, 451 TokenType.OFFSET, 452 TokenType.RIGHT, 453 TokenType.SEMI, 454 TokenType.WINDOW, 455 } 456 457 ALIAS_TOKENS = ID_VAR_TOKENS 458 459 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 460 461 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 462 463 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 464 465 FUNC_TOKENS = { 466 TokenType.COLLATE, 467 TokenType.COMMAND, 468 TokenType.CURRENT_DATE, 469 TokenType.CURRENT_DATETIME, 470 TokenType.CURRENT_TIMESTAMP, 471 TokenType.CURRENT_TIME, 472 TokenType.CURRENT_USER, 473 TokenType.FILTER, 474 TokenType.FIRST, 475 TokenType.FORMAT, 476 TokenType.GLOB, 477 TokenType.IDENTIFIER, 478 TokenType.INDEX, 479 TokenType.ISNULL, 480 TokenType.ILIKE, 481 TokenType.INSERT, 482 TokenType.LIKE, 483 TokenType.MERGE, 484 TokenType.OFFSET, 485 TokenType.PRIMARY_KEY, 486 TokenType.RANGE, 487 TokenType.REPLACE, 488 TokenType.RLIKE, 489 TokenType.ROW, 490 TokenType.UNNEST, 491 TokenType.VAR, 492 TokenType.LEFT, 493 TokenType.RIGHT, 494 TokenType.SEQUENCE, 495 TokenType.DATE, 496 TokenType.DATETIME, 497 TokenType.TABLE, 498 TokenType.TIMESTAMP, 499 TokenType.TIMESTAMPTZ, 500 TokenType.TRUNCATE, 501 TokenType.WINDOW, 502 TokenType.XOR, 503 *TYPE_TOKENS, 504 *SUBQUERY_PREDICATES, 505 } 506 507 CONJUNCTION = { 508 TokenType.AND: exp.And, 509 TokenType.OR: exp.Or, 510 } 511 512 EQUALITY = { 513 TokenType.EQ: exp.EQ, 514 TokenType.NEQ: exp.NEQ, 515 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 516 } 517 518 COMPARISON = { 519 TokenType.GT: exp.GT, 520 TokenType.GTE: exp.GTE, 521 TokenType.LT: exp.LT, 522 TokenType.LTE: exp.LTE, 523 } 524 525 BITWISE = { 526 TokenType.AMP: exp.BitwiseAnd, 527 TokenType.CARET: exp.BitwiseXor, 528 TokenType.PIPE: exp.BitwiseOr, 529 } 530 531 TERM = { 532 TokenType.DASH: exp.Sub, 533 TokenType.PLUS: exp.Add, 534 TokenType.MOD: exp.Mod, 535 TokenType.COLLATE: exp.Collate, 536 } 537 538 FACTOR = { 539 TokenType.DIV: exp.IntDiv, 540 TokenType.LR_ARROW: exp.Distance, 541 TokenType.SLASH: exp.Div, 542 TokenType.STAR: exp.Mul, 543 } 544 545 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 546 547 TIMES = { 548 TokenType.TIME, 549 TokenType.TIMETZ, 550 } 551 552 TIMESTAMPS = { 553 TokenType.TIMESTAMP, 554 TokenType.TIMESTAMPTZ, 555 TokenType.TIMESTAMPLTZ, 556 *TIMES, 557 } 558 559 SET_OPERATIONS = { 560 TokenType.UNION, 561 TokenType.INTERSECT, 562 TokenType.EXCEPT, 563 } 564 565 JOIN_METHODS = { 566 TokenType.ASOF, 567 TokenType.NATURAL, 568 TokenType.POSITIONAL, 569 } 570 571 JOIN_SIDES = { 572 TokenType.LEFT, 573 TokenType.RIGHT, 574 TokenType.FULL, 575 } 576 577 JOIN_KINDS = { 578 TokenType.INNER, 579 TokenType.OUTER, 580 TokenType.CROSS, 581 TokenType.SEMI, 582 TokenType.ANTI, 583 } 584 585 JOIN_HINTS: t.Set[str] = set() 586 587 LAMBDAS = { 588 TokenType.ARROW: lambda self, expressions: self.expression( 589 exp.Lambda, 590 this=self._replace_lambda( 591 self._parse_conjunction(), 592 expressions, 593 ), 594 expressions=expressions, 595 ), 596 TokenType.FARROW: lambda self, expressions: self.expression( 597 exp.Kwarg, 598 this=exp.var(expressions[0].name), 599 expression=self._parse_conjunction(), 600 ), 601 } 602 603 COLUMN_OPERATORS = { 604 TokenType.DOT: None, 605 TokenType.DCOLON: lambda self, this, to: self.expression( 606 exp.Cast if self.STRICT_CAST else exp.TryCast, 607 this=this, 608 to=to, 609 ), 610 TokenType.ARROW: lambda self, this, path: self.expression( 611 exp.JSONExtract, 612 this=this, 613 expression=self.dialect.to_json_path(path), 614 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 615 ), 616 TokenType.DARROW: lambda self, this, path: self.expression( 617 exp.JSONExtractScalar, 618 this=this, 619 expression=self.dialect.to_json_path(path), 620 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 621 ), 622 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 623 exp.JSONBExtract, 624 this=this, 625 expression=path, 626 ), 627 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 628 exp.JSONBExtractScalar, 629 this=this, 630 expression=path, 631 ), 632 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 633 exp.JSONBContains, 634 this=this, 635 expression=key, 636 ), 637 } 638 639 EXPRESSION_PARSERS = { 640 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 641 exp.Column: lambda self: self._parse_column(), 642 exp.Condition: lambda self: self._parse_conjunction(), 643 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 644 exp.Expression: lambda self: self._parse_expression(), 645 exp.From: lambda self: self._parse_from(joins=True), 646 exp.Group: lambda self: self._parse_group(), 647 exp.Having: lambda self: self._parse_having(), 648 exp.Identifier: lambda self: self._parse_id_var(), 649 exp.Join: lambda self: self._parse_join(), 650 exp.Lambda: lambda self: self._parse_lambda(), 651 exp.Lateral: lambda self: self._parse_lateral(), 652 exp.Limit: lambda self: self._parse_limit(), 653 exp.Offset: lambda self: self._parse_offset(), 654 exp.Order: lambda self: self._parse_order(), 655 exp.Ordered: lambda self: self._parse_ordered(), 656 exp.Properties: lambda self: self._parse_properties(), 657 exp.Qualify: lambda self: self._parse_qualify(), 658 exp.Returning: lambda self: self._parse_returning(), 659 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 660 exp.Table: lambda self: self._parse_table_parts(), 661 exp.TableAlias: lambda self: self._parse_table_alias(), 662 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 663 exp.Where: lambda self: self._parse_where(), 664 exp.Window: lambda self: self._parse_named_window(), 665 exp.With: lambda self: self._parse_with(), 666 "JOIN_TYPE": lambda self: self._parse_join_parts(), 667 } 668 669 STATEMENT_PARSERS = { 670 TokenType.ALTER: lambda self: self._parse_alter(), 671 TokenType.BEGIN: lambda self: self._parse_transaction(), 672 TokenType.CACHE: lambda self: self._parse_cache(), 673 TokenType.COMMENT: lambda self: self._parse_comment(), 674 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 675 TokenType.COPY: lambda self: self._parse_copy(), 676 TokenType.CREATE: lambda self: self._parse_create(), 677 TokenType.DELETE: lambda self: self._parse_delete(), 678 TokenType.DESC: lambda self: self._parse_describe(), 679 TokenType.DESCRIBE: lambda self: self._parse_describe(), 680 TokenType.DROP: lambda self: self._parse_drop(), 681 TokenType.INSERT: lambda self: self._parse_insert(), 682 TokenType.KILL: lambda self: self._parse_kill(), 683 TokenType.LOAD: lambda self: self._parse_load(), 684 TokenType.MERGE: lambda self: self._parse_merge(), 685 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 686 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 687 TokenType.REFRESH: lambda self: self._parse_refresh(), 688 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 689 TokenType.SET: lambda self: self._parse_set(), 690 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 691 TokenType.UNCACHE: lambda self: self._parse_uncache(), 692 TokenType.UPDATE: lambda self: self._parse_update(), 693 TokenType.USE: lambda self: self.expression( 694 exp.Use, 695 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 696 this=self._parse_table(schema=False), 697 ), 698 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 699 } 700 701 UNARY_PARSERS = { 702 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 703 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 704 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 705 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 706 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 707 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 708 } 709 710 STRING_PARSERS = { 711 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 712 exp.RawString, this=token.text 713 ), 714 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 715 exp.National, this=token.text 716 ), 717 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 718 TokenType.STRING: lambda self, token: self.expression( 719 exp.Literal, this=token.text, is_string=True 720 ), 721 TokenType.UNICODE_STRING: lambda self, token: self.expression( 722 exp.UnicodeString, 723 this=token.text, 724 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 725 ), 726 } 727 728 NUMERIC_PARSERS = { 729 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 730 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 731 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 732 TokenType.NUMBER: lambda self, token: self.expression( 733 exp.Literal, this=token.text, is_string=False 734 ), 735 } 736 737 PRIMARY_PARSERS = { 738 **STRING_PARSERS, 739 **NUMERIC_PARSERS, 740 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 741 TokenType.NULL: lambda self, _: self.expression(exp.Null), 742 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 743 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 744 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 745 TokenType.STAR: lambda self, _: self.expression( 746 exp.Star, 747 **{ 748 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 749 "replace": self._parse_star_op("REPLACE"), 750 "rename": self._parse_star_op("RENAME"), 751 }, 752 ), 753 } 754 755 PLACEHOLDER_PARSERS = { 756 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 757 TokenType.PARAMETER: lambda self: self._parse_parameter(), 758 TokenType.COLON: lambda self: ( 759 self.expression(exp.Placeholder, this=self._prev.text) 760 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 761 else None 762 ), 763 } 764 765 RANGE_PARSERS = { 766 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 767 TokenType.GLOB: binary_range_parser(exp.Glob), 768 TokenType.ILIKE: binary_range_parser(exp.ILike), 769 TokenType.IN: lambda self, this: self._parse_in(this), 770 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 771 TokenType.IS: lambda self, this: self._parse_is(this), 772 TokenType.LIKE: binary_range_parser(exp.Like), 773 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 774 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 775 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 776 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 777 } 778 779 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 780 "ALLOWED_VALUES": lambda self: self.expression( 781 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 782 ), 783 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 784 "AUTO": lambda self: self._parse_auto_property(), 785 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 786 "BACKUP": lambda self: self.expression( 787 exp.BackupProperty, this=self._parse_var(any_token=True) 788 ), 789 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 790 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 791 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 792 "CHECKSUM": lambda self: self._parse_checksum(), 793 "CLUSTER BY": lambda self: self._parse_cluster(), 794 "CLUSTERED": lambda self: self._parse_clustered_by(), 795 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 796 exp.CollateProperty, **kwargs 797 ), 798 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 799 "CONTAINS": lambda self: self._parse_contains_property(), 800 "COPY": lambda self: self._parse_copy_property(), 801 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 802 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 803 "DEFINER": lambda self: self._parse_definer(), 804 "DETERMINISTIC": lambda self: self.expression( 805 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 806 ), 807 "DISTKEY": lambda self: self._parse_distkey(), 808 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 809 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 810 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 811 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 812 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 813 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 814 "FREESPACE": lambda self: self._parse_freespace(), 815 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 816 "HEAP": lambda self: self.expression(exp.HeapProperty), 817 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 818 "IMMUTABLE": lambda self: self.expression( 819 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 820 ), 821 "INHERITS": lambda self: self.expression( 822 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 823 ), 824 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 825 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 826 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 827 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 828 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 829 "LIKE": lambda self: self._parse_create_like(), 830 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 831 "LOCK": lambda self: self._parse_locking(), 832 "LOCKING": lambda self: self._parse_locking(), 833 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 834 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 835 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 836 "MODIFIES": lambda self: self._parse_modifies_property(), 837 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 838 "NO": lambda self: self._parse_no_property(), 839 "ON": lambda self: self._parse_on_property(), 840 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 841 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 842 "PARTITION": lambda self: self._parse_partitioned_of(), 843 "PARTITION BY": lambda self: self._parse_partitioned_by(), 844 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 845 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 846 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 847 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 848 "READS": lambda self: self._parse_reads_property(), 849 "REMOTE": lambda self: self._parse_remote_with_connection(), 850 "RETURNS": lambda self: self._parse_returns(), 851 "STRICT": lambda self: self.expression(exp.StrictProperty), 852 "ROW": lambda self: self._parse_row(), 853 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 854 "SAMPLE": lambda self: self.expression( 855 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 856 ), 857 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 858 "SETTINGS": lambda self: self.expression( 859 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 860 ), 861 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 862 "SORTKEY": lambda self: self._parse_sortkey(), 863 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 864 "STABLE": lambda self: self.expression( 865 exp.StabilityProperty, this=exp.Literal.string("STABLE") 866 ), 867 "STORED": lambda self: self._parse_stored(), 868 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 869 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 870 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 871 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 872 "TO": lambda self: self._parse_to_table(), 873 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 874 "TRANSFORM": lambda self: self.expression( 875 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 876 ), 877 "TTL": lambda self: self._parse_ttl(), 878 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 879 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 880 "VOLATILE": lambda self: self._parse_volatile_property(), 881 "WITH": lambda self: self._parse_with_property(), 882 } 883 884 CONSTRAINT_PARSERS = { 885 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 886 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 887 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 888 "CHARACTER SET": lambda self: self.expression( 889 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 890 ), 891 "CHECK": lambda self: self.expression( 892 exp.CheckColumnConstraint, 893 this=self._parse_wrapped(self._parse_conjunction), 894 enforced=self._match_text_seq("ENFORCED"), 895 ), 896 "COLLATE": lambda self: self.expression( 897 exp.CollateColumnConstraint, this=self._parse_var() 898 ), 899 "COMMENT": lambda self: self.expression( 900 exp.CommentColumnConstraint, this=self._parse_string() 901 ), 902 "COMPRESS": lambda self: self._parse_compress(), 903 "CLUSTERED": lambda self: self.expression( 904 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 905 ), 906 "NONCLUSTERED": lambda self: self.expression( 907 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 908 ), 909 "DEFAULT": lambda self: self.expression( 910 exp.DefaultColumnConstraint, this=self._parse_bitwise() 911 ), 912 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 913 "EPHEMERAL": lambda self: self.expression( 914 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 915 ), 916 "EXCLUDE": lambda self: self.expression( 917 exp.ExcludeColumnConstraint, this=self._parse_index_params() 918 ), 919 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 920 "FORMAT": lambda self: self.expression( 921 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 922 ), 923 "GENERATED": lambda self: self._parse_generated_as_identity(), 924 "IDENTITY": lambda self: self._parse_auto_increment(), 925 "INLINE": lambda self: self._parse_inline(), 926 "LIKE": lambda self: self._parse_create_like(), 927 "NOT": lambda self: self._parse_not_constraint(), 928 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 929 "ON": lambda self: ( 930 self._match(TokenType.UPDATE) 931 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 932 ) 933 or self.expression(exp.OnProperty, this=self._parse_id_var()), 934 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 935 "PERIOD": lambda self: self._parse_period_for_system_time(), 936 "PRIMARY KEY": lambda self: self._parse_primary_key(), 937 "REFERENCES": lambda self: self._parse_references(match=False), 938 "TITLE": lambda self: self.expression( 939 exp.TitleColumnConstraint, this=self._parse_var_or_string() 940 ), 941 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 942 "UNIQUE": lambda self: self._parse_unique(), 943 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 944 "WITH": lambda self: self.expression( 945 exp.Properties, expressions=self._parse_wrapped_properties() 946 ), 947 } 948 949 ALTER_PARSERS = { 950 "ADD": lambda self: self._parse_alter_table_add(), 951 "ALTER": lambda self: self._parse_alter_table_alter(), 952 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 953 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 954 "DROP": lambda self: self._parse_alter_table_drop(), 955 "RENAME": lambda self: self._parse_alter_table_rename(), 956 "SET": lambda self: self._parse_alter_table_set(), 957 } 958 959 ALTER_ALTER_PARSERS = { 960 "DISTKEY": lambda self: self._parse_alter_diststyle(), 961 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 962 "SORTKEY": lambda self: self._parse_alter_sortkey(), 963 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 964 } 965 966 SCHEMA_UNNAMED_CONSTRAINTS = { 967 "CHECK", 968 "EXCLUDE", 969 "FOREIGN KEY", 970 "LIKE", 971 "PERIOD", 972 "PRIMARY KEY", 973 "UNIQUE", 974 } 975 976 NO_PAREN_FUNCTION_PARSERS = { 977 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 978 "CASE": lambda self: self._parse_case(), 979 "IF": lambda self: self._parse_if(), 980 "NEXT": lambda self: self._parse_next_value_for(), 981 } 982 983 INVALID_FUNC_NAME_TOKENS = { 984 TokenType.IDENTIFIER, 985 TokenType.STRING, 986 } 987 988 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 989 990 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 991 992 FUNCTION_PARSERS = { 993 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 994 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 995 "DECODE": lambda self: self._parse_decode(), 996 "EXTRACT": lambda self: self._parse_extract(), 997 "JSON_OBJECT": lambda self: self._parse_json_object(), 998 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 999 "JSON_TABLE": lambda self: self._parse_json_table(), 1000 "MATCH": lambda self: self._parse_match_against(), 1001 "OPENJSON": lambda self: self._parse_open_json(), 1002 "POSITION": lambda self: self._parse_position(), 1003 "PREDICT": lambda self: self._parse_predict(), 1004 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1005 "STRING_AGG": lambda self: self._parse_string_agg(), 1006 "SUBSTRING": lambda self: self._parse_substring(), 1007 "TRIM": lambda self: self._parse_trim(), 1008 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1009 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1010 } 1011 1012 QUERY_MODIFIER_PARSERS = { 1013 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1014 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1015 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1016 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1017 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1018 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1019 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1020 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1021 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1022 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1023 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1024 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1025 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1026 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1027 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1028 TokenType.CLUSTER_BY: lambda self: ( 1029 "cluster", 1030 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1031 ), 1032 TokenType.DISTRIBUTE_BY: lambda self: ( 1033 "distribute", 1034 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1035 ), 1036 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1037 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1038 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1039 } 1040 1041 SET_PARSERS = { 1042 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1043 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1044 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1045 "TRANSACTION": lambda self: self._parse_set_transaction(), 1046 } 1047 1048 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1049 1050 TYPE_LITERAL_PARSERS = { 1051 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1052 } 1053 1054 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1055 1056 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1057 1058 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1059 1060 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1061 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1062 "ISOLATION": ( 1063 ("LEVEL", "REPEATABLE", "READ"), 1064 ("LEVEL", "READ", "COMMITTED"), 1065 ("LEVEL", "READ", "UNCOMITTED"), 1066 ("LEVEL", "SERIALIZABLE"), 1067 ), 1068 "READ": ("WRITE", "ONLY"), 1069 } 1070 1071 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1072 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1073 ) 1074 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1075 1076 CREATE_SEQUENCE: OPTIONS_TYPE = { 1077 "SCALE": ("EXTEND", "NOEXTEND"), 1078 "SHARD": ("EXTEND", "NOEXTEND"), 1079 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1080 **dict.fromkeys( 1081 ( 1082 "SESSION", 1083 "GLOBAL", 1084 "KEEP", 1085 "NOKEEP", 1086 "ORDER", 1087 "NOORDER", 1088 "NOCACHE", 1089 "CYCLE", 1090 "NOCYCLE", 1091 "NOMINVALUE", 1092 "NOMAXVALUE", 1093 "NOSCALE", 1094 "NOSHARD", 1095 ), 1096 tuple(), 1097 ), 1098 } 1099 1100 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1101 1102 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1103 1104 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1105 1106 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1107 1108 CLONE_KEYWORDS = {"CLONE", "COPY"} 1109 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1110 1111 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1112 1113 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1114 1115 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1116 1117 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1118 1119 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1120 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1121 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1122 1123 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1124 1125 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1126 1127 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1128 1129 DISTINCT_TOKENS = {TokenType.DISTINCT} 1130 1131 NULL_TOKENS = {TokenType.NULL} 1132 1133 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1134 1135 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1136 1137 STRICT_CAST = True 1138 1139 PREFIXED_PIVOT_COLUMNS = False 1140 IDENTIFY_PIVOT_STRINGS = False 1141 1142 LOG_DEFAULTS_TO_LN = False 1143 1144 # Whether ADD is present for each column added by ALTER TABLE 1145 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1146 1147 # Whether the table sample clause expects CSV syntax 1148 TABLESAMPLE_CSV = False 1149 1150 # The default method used for table sampling 1151 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1152 1153 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1154 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1155 1156 # Whether the TRIM function expects the characters to trim as its first argument 1157 TRIM_PATTERN_FIRST = False 1158 1159 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1160 STRING_ALIASES = False 1161 1162 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1163 MODIFIERS_ATTACHED_TO_UNION = True 1164 UNION_MODIFIERS = {"order", "limit", "offset"} 1165 1166 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1167 NO_PAREN_IF_COMMANDS = True 1168 1169 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1170 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1171 1172 # Whether the `:` operator is used to extract a value from a JSON document 1173 COLON_IS_JSON_EXTRACT = False 1174 1175 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1176 # If this is True and '(' is not found, the keyword will be treated as an identifier 1177 VALUES_FOLLOWED_BY_PAREN = True 1178 1179 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1180 SUPPORTS_IMPLICIT_UNNEST = False 1181 1182 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1183 INTERVAL_SPANS = True 1184 1185 # Whether a PARTITION clause can follow a table reference 1186 SUPPORTS_PARTITION_SELECTION = False 1187 1188 __slots__ = ( 1189 "error_level", 1190 "error_message_context", 1191 "max_errors", 1192 "dialect", 1193 "sql", 1194 "errors", 1195 "_tokens", 1196 "_index", 1197 "_curr", 1198 "_next", 1199 "_prev", 1200 "_prev_comments", 1201 ) 1202 1203 # Autofilled 1204 SHOW_TRIE: t.Dict = {} 1205 SET_TRIE: t.Dict = {} 1206 1207 def __init__( 1208 self, 1209 error_level: t.Optional[ErrorLevel] = None, 1210 error_message_context: int = 100, 1211 max_errors: int = 3, 1212 dialect: DialectType = None, 1213 ): 1214 from sqlglot.dialects import Dialect 1215 1216 self.error_level = error_level or ErrorLevel.IMMEDIATE 1217 self.error_message_context = error_message_context 1218 self.max_errors = max_errors 1219 self.dialect = Dialect.get_or_raise(dialect) 1220 self.reset() 1221 1222 def reset(self): 1223 self.sql = "" 1224 self.errors = [] 1225 self._tokens = [] 1226 self._index = 0 1227 self._curr = None 1228 self._next = None 1229 self._prev = None 1230 self._prev_comments = None 1231 1232 def parse( 1233 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1234 ) -> t.List[t.Optional[exp.Expression]]: 1235 """ 1236 Parses a list of tokens and returns a list of syntax trees, one tree 1237 per parsed SQL statement. 1238 1239 Args: 1240 raw_tokens: The list of tokens. 1241 sql: The original SQL string, used to produce helpful debug messages. 1242 1243 Returns: 1244 The list of the produced syntax trees. 1245 """ 1246 return self._parse( 1247 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1248 ) 1249 1250 def parse_into( 1251 self, 1252 expression_types: exp.IntoType, 1253 raw_tokens: t.List[Token], 1254 sql: t.Optional[str] = None, 1255 ) -> t.List[t.Optional[exp.Expression]]: 1256 """ 1257 Parses a list of tokens into a given Expression type. If a collection of Expression 1258 types is given instead, this method will try to parse the token list into each one 1259 of them, stopping at the first for which the parsing succeeds. 1260 1261 Args: 1262 expression_types: The expression type(s) to try and parse the token list into. 1263 raw_tokens: The list of tokens. 1264 sql: The original SQL string, used to produce helpful debug messages. 1265 1266 Returns: 1267 The target Expression. 1268 """ 1269 errors = [] 1270 for expression_type in ensure_list(expression_types): 1271 parser = self.EXPRESSION_PARSERS.get(expression_type) 1272 if not parser: 1273 raise TypeError(f"No parser registered for {expression_type}") 1274 1275 try: 1276 return self._parse(parser, raw_tokens, sql) 1277 except ParseError as e: 1278 e.errors[0]["into_expression"] = expression_type 1279 errors.append(e) 1280 1281 raise ParseError( 1282 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1283 errors=merge_errors(errors), 1284 ) from errors[-1] 1285 1286 def _parse( 1287 self, 1288 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1289 raw_tokens: t.List[Token], 1290 sql: t.Optional[str] = None, 1291 ) -> t.List[t.Optional[exp.Expression]]: 1292 self.reset() 1293 self.sql = sql or "" 1294 1295 total = len(raw_tokens) 1296 chunks: t.List[t.List[Token]] = [[]] 1297 1298 for i, token in enumerate(raw_tokens): 1299 if token.token_type == TokenType.SEMICOLON: 1300 if token.comments: 1301 chunks.append([token]) 1302 1303 if i < total - 1: 1304 chunks.append([]) 1305 else: 1306 chunks[-1].append(token) 1307 1308 expressions = [] 1309 1310 for tokens in chunks: 1311 self._index = -1 1312 self._tokens = tokens 1313 self._advance() 1314 1315 expressions.append(parse_method(self)) 1316 1317 if self._index < len(self._tokens): 1318 self.raise_error("Invalid expression / Unexpected token") 1319 1320 self.check_errors() 1321 1322 return expressions 1323 1324 def check_errors(self) -> None: 1325 """Logs or raises any found errors, depending on the chosen error level setting.""" 1326 if self.error_level == ErrorLevel.WARN: 1327 for error in self.errors: 1328 logger.error(str(error)) 1329 elif self.error_level == ErrorLevel.RAISE and self.errors: 1330 raise ParseError( 1331 concat_messages(self.errors, self.max_errors), 1332 errors=merge_errors(self.errors), 1333 ) 1334 1335 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1336 """ 1337 Appends an error in the list of recorded errors or raises it, depending on the chosen 1338 error level setting. 1339 """ 1340 token = token or self._curr or self._prev or Token.string("") 1341 start = token.start 1342 end = token.end + 1 1343 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1344 highlight = self.sql[start:end] 1345 end_context = self.sql[end : end + self.error_message_context] 1346 1347 error = ParseError.new( 1348 f"{message}. Line {token.line}, Col: {token.col}.\n" 1349 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1350 description=message, 1351 line=token.line, 1352 col=token.col, 1353 start_context=start_context, 1354 highlight=highlight, 1355 end_context=end_context, 1356 ) 1357 1358 if self.error_level == ErrorLevel.IMMEDIATE: 1359 raise error 1360 1361 self.errors.append(error) 1362 1363 def expression( 1364 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1365 ) -> E: 1366 """ 1367 Creates a new, validated Expression. 1368 1369 Args: 1370 exp_class: The expression class to instantiate. 1371 comments: An optional list of comments to attach to the expression. 1372 kwargs: The arguments to set for the expression along with their respective values. 1373 1374 Returns: 1375 The target expression. 1376 """ 1377 instance = exp_class(**kwargs) 1378 instance.add_comments(comments) if comments else self._add_comments(instance) 1379 return self.validate_expression(instance) 1380 1381 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1382 if expression and self._prev_comments: 1383 expression.add_comments(self._prev_comments) 1384 self._prev_comments = None 1385 1386 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1387 """ 1388 Validates an Expression, making sure that all its mandatory arguments are set. 1389 1390 Args: 1391 expression: The expression to validate. 1392 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1393 1394 Returns: 1395 The validated expression. 1396 """ 1397 if self.error_level != ErrorLevel.IGNORE: 1398 for error_message in expression.error_messages(args): 1399 self.raise_error(error_message) 1400 1401 return expression 1402 1403 def _find_sql(self, start: Token, end: Token) -> str: 1404 return self.sql[start.start : end.end + 1] 1405 1406 def _is_connected(self) -> bool: 1407 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1408 1409 def _advance(self, times: int = 1) -> None: 1410 self._index += times 1411 self._curr = seq_get(self._tokens, self._index) 1412 self._next = seq_get(self._tokens, self._index + 1) 1413 1414 if self._index > 0: 1415 self._prev = self._tokens[self._index - 1] 1416 self._prev_comments = self._prev.comments 1417 else: 1418 self._prev = None 1419 self._prev_comments = None 1420 1421 def _retreat(self, index: int) -> None: 1422 if index != self._index: 1423 self._advance(index - self._index) 1424 1425 def _warn_unsupported(self) -> None: 1426 if len(self._tokens) <= 1: 1427 return 1428 1429 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1430 # interested in emitting a warning for the one being currently processed. 1431 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1432 1433 logger.warning( 1434 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1435 ) 1436 1437 def _parse_command(self) -> exp.Command: 1438 self._warn_unsupported() 1439 return self.expression( 1440 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1441 ) 1442 1443 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1444 """ 1445 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1446 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1447 the parser state accordingly 1448 """ 1449 index = self._index 1450 error_level = self.error_level 1451 1452 self.error_level = ErrorLevel.IMMEDIATE 1453 try: 1454 this = parse_method() 1455 except ParseError: 1456 this = None 1457 finally: 1458 if not this or retreat: 1459 self._retreat(index) 1460 self.error_level = error_level 1461 1462 return this 1463 1464 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1465 start = self._prev 1466 exists = self._parse_exists() if allow_exists else None 1467 1468 self._match(TokenType.ON) 1469 1470 materialized = self._match_text_seq("MATERIALIZED") 1471 kind = self._match_set(self.CREATABLES) and self._prev 1472 if not kind: 1473 return self._parse_as_command(start) 1474 1475 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1476 this = self._parse_user_defined_function(kind=kind.token_type) 1477 elif kind.token_type == TokenType.TABLE: 1478 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1479 elif kind.token_type == TokenType.COLUMN: 1480 this = self._parse_column() 1481 else: 1482 this = self._parse_id_var() 1483 1484 self._match(TokenType.IS) 1485 1486 return self.expression( 1487 exp.Comment, 1488 this=this, 1489 kind=kind.text, 1490 expression=self._parse_string(), 1491 exists=exists, 1492 materialized=materialized, 1493 ) 1494 1495 def _parse_to_table( 1496 self, 1497 ) -> exp.ToTableProperty: 1498 table = self._parse_table_parts(schema=True) 1499 return self.expression(exp.ToTableProperty, this=table) 1500 1501 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1502 def _parse_ttl(self) -> exp.Expression: 1503 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1504 this = self._parse_bitwise() 1505 1506 if self._match_text_seq("DELETE"): 1507 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1508 if self._match_text_seq("RECOMPRESS"): 1509 return self.expression( 1510 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1511 ) 1512 if self._match_text_seq("TO", "DISK"): 1513 return self.expression( 1514 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1515 ) 1516 if self._match_text_seq("TO", "VOLUME"): 1517 return self.expression( 1518 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1519 ) 1520 1521 return this 1522 1523 expressions = self._parse_csv(_parse_ttl_action) 1524 where = self._parse_where() 1525 group = self._parse_group() 1526 1527 aggregates = None 1528 if group and self._match(TokenType.SET): 1529 aggregates = self._parse_csv(self._parse_set_item) 1530 1531 return self.expression( 1532 exp.MergeTreeTTL, 1533 expressions=expressions, 1534 where=where, 1535 group=group, 1536 aggregates=aggregates, 1537 ) 1538 1539 def _parse_statement(self) -> t.Optional[exp.Expression]: 1540 if self._curr is None: 1541 return None 1542 1543 if self._match_set(self.STATEMENT_PARSERS): 1544 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1545 1546 if self._match_set(self.dialect.tokenizer.COMMANDS): 1547 return self._parse_command() 1548 1549 expression = self._parse_expression() 1550 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1551 return self._parse_query_modifiers(expression) 1552 1553 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1554 start = self._prev 1555 temporary = self._match(TokenType.TEMPORARY) 1556 materialized = self._match_text_seq("MATERIALIZED") 1557 1558 kind = self._match_set(self.CREATABLES) and self._prev.text 1559 if not kind: 1560 return self._parse_as_command(start) 1561 1562 if_exists = exists or self._parse_exists() 1563 table = self._parse_table_parts( 1564 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1565 ) 1566 1567 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1568 1569 if self._match(TokenType.L_PAREN, advance=False): 1570 expressions = self._parse_wrapped_csv(self._parse_types) 1571 else: 1572 expressions = None 1573 1574 return self.expression( 1575 exp.Drop, 1576 comments=start.comments, 1577 exists=if_exists, 1578 this=table, 1579 expressions=expressions, 1580 kind=kind.upper(), 1581 temporary=temporary, 1582 materialized=materialized, 1583 cascade=self._match_text_seq("CASCADE"), 1584 constraints=self._match_text_seq("CONSTRAINTS"), 1585 purge=self._match_text_seq("PURGE"), 1586 cluster=cluster, 1587 ) 1588 1589 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1590 return ( 1591 self._match_text_seq("IF") 1592 and (not not_ or self._match(TokenType.NOT)) 1593 and self._match(TokenType.EXISTS) 1594 ) 1595 1596 def _parse_create(self) -> exp.Create | exp.Command: 1597 # Note: this can't be None because we've matched a statement parser 1598 start = self._prev 1599 comments = self._prev_comments 1600 1601 replace = ( 1602 start.token_type == TokenType.REPLACE 1603 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1604 or self._match_pair(TokenType.OR, TokenType.ALTER) 1605 ) 1606 1607 unique = self._match(TokenType.UNIQUE) 1608 1609 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1610 self._advance() 1611 1612 properties = None 1613 create_token = self._match_set(self.CREATABLES) and self._prev 1614 1615 if not create_token: 1616 # exp.Properties.Location.POST_CREATE 1617 properties = self._parse_properties() 1618 create_token = self._match_set(self.CREATABLES) and self._prev 1619 1620 if not properties or not create_token: 1621 return self._parse_as_command(start) 1622 1623 exists = self._parse_exists(not_=True) 1624 this = None 1625 expression: t.Optional[exp.Expression] = None 1626 indexes = None 1627 no_schema_binding = None 1628 begin = None 1629 end = None 1630 clone = None 1631 1632 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1633 nonlocal properties 1634 if properties and temp_props: 1635 properties.expressions.extend(temp_props.expressions) 1636 elif temp_props: 1637 properties = temp_props 1638 1639 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1640 this = self._parse_user_defined_function(kind=create_token.token_type) 1641 1642 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1643 extend_props(self._parse_properties()) 1644 1645 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1646 extend_props(self._parse_properties()) 1647 1648 if not expression: 1649 if self._match(TokenType.COMMAND): 1650 expression = self._parse_as_command(self._prev) 1651 else: 1652 begin = self._match(TokenType.BEGIN) 1653 return_ = self._match_text_seq("RETURN") 1654 1655 if self._match(TokenType.STRING, advance=False): 1656 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1657 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1658 expression = self._parse_string() 1659 extend_props(self._parse_properties()) 1660 else: 1661 expression = self._parse_statement() 1662 1663 end = self._match_text_seq("END") 1664 1665 if return_: 1666 expression = self.expression(exp.Return, this=expression) 1667 elif create_token.token_type == TokenType.INDEX: 1668 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1669 if not self._match(TokenType.ON): 1670 index = self._parse_id_var() 1671 anonymous = False 1672 else: 1673 index = None 1674 anonymous = True 1675 1676 this = self._parse_index(index=index, anonymous=anonymous) 1677 elif create_token.token_type in self.DB_CREATABLES: 1678 table_parts = self._parse_table_parts( 1679 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1680 ) 1681 1682 # exp.Properties.Location.POST_NAME 1683 self._match(TokenType.COMMA) 1684 extend_props(self._parse_properties(before=True)) 1685 1686 this = self._parse_schema(this=table_parts) 1687 1688 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1689 extend_props(self._parse_properties()) 1690 1691 self._match(TokenType.ALIAS) 1692 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1693 # exp.Properties.Location.POST_ALIAS 1694 extend_props(self._parse_properties()) 1695 1696 if create_token.token_type == TokenType.SEQUENCE: 1697 expression = self._parse_types() 1698 extend_props(self._parse_properties()) 1699 else: 1700 expression = self._parse_ddl_select() 1701 1702 if create_token.token_type == TokenType.TABLE: 1703 # exp.Properties.Location.POST_EXPRESSION 1704 extend_props(self._parse_properties()) 1705 1706 indexes = [] 1707 while True: 1708 index = self._parse_index() 1709 1710 # exp.Properties.Location.POST_INDEX 1711 extend_props(self._parse_properties()) 1712 1713 if not index: 1714 break 1715 else: 1716 self._match(TokenType.COMMA) 1717 indexes.append(index) 1718 elif create_token.token_type == TokenType.VIEW: 1719 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1720 no_schema_binding = True 1721 1722 shallow = self._match_text_seq("SHALLOW") 1723 1724 if self._match_texts(self.CLONE_KEYWORDS): 1725 copy = self._prev.text.lower() == "copy" 1726 clone = self.expression( 1727 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1728 ) 1729 1730 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1731 return self._parse_as_command(start) 1732 1733 return self.expression( 1734 exp.Create, 1735 comments=comments, 1736 this=this, 1737 kind=create_token.text.upper(), 1738 replace=replace, 1739 unique=unique, 1740 expression=expression, 1741 exists=exists, 1742 properties=properties, 1743 indexes=indexes, 1744 no_schema_binding=no_schema_binding, 1745 begin=begin, 1746 end=end, 1747 clone=clone, 1748 ) 1749 1750 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1751 seq = exp.SequenceProperties() 1752 1753 options = [] 1754 index = self._index 1755 1756 while self._curr: 1757 self._match(TokenType.COMMA) 1758 if self._match_text_seq("INCREMENT"): 1759 self._match_text_seq("BY") 1760 self._match_text_seq("=") 1761 seq.set("increment", self._parse_term()) 1762 elif self._match_text_seq("MINVALUE"): 1763 seq.set("minvalue", self._parse_term()) 1764 elif self._match_text_seq("MAXVALUE"): 1765 seq.set("maxvalue", self._parse_term()) 1766 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1767 self._match_text_seq("=") 1768 seq.set("start", self._parse_term()) 1769 elif self._match_text_seq("CACHE"): 1770 # T-SQL allows empty CACHE which is initialized dynamically 1771 seq.set("cache", self._parse_number() or True) 1772 elif self._match_text_seq("OWNED", "BY"): 1773 # "OWNED BY NONE" is the default 1774 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1775 else: 1776 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1777 if opt: 1778 options.append(opt) 1779 else: 1780 break 1781 1782 seq.set("options", options if options else None) 1783 return None if self._index == index else seq 1784 1785 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1786 # only used for teradata currently 1787 self._match(TokenType.COMMA) 1788 1789 kwargs = { 1790 "no": self._match_text_seq("NO"), 1791 "dual": self._match_text_seq("DUAL"), 1792 "before": self._match_text_seq("BEFORE"), 1793 "default": self._match_text_seq("DEFAULT"), 1794 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1795 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1796 "after": self._match_text_seq("AFTER"), 1797 "minimum": self._match_texts(("MIN", "MINIMUM")), 1798 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1799 } 1800 1801 if self._match_texts(self.PROPERTY_PARSERS): 1802 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1803 try: 1804 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1805 except TypeError: 1806 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1807 1808 return None 1809 1810 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1811 return self._parse_wrapped_csv(self._parse_property) 1812 1813 def _parse_property(self) -> t.Optional[exp.Expression]: 1814 if self._match_texts(self.PROPERTY_PARSERS): 1815 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1816 1817 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1818 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1819 1820 if self._match_text_seq("COMPOUND", "SORTKEY"): 1821 return self._parse_sortkey(compound=True) 1822 1823 if self._match_text_seq("SQL", "SECURITY"): 1824 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1825 1826 index = self._index 1827 key = self._parse_column() 1828 1829 if not self._match(TokenType.EQ): 1830 self._retreat(index) 1831 return self._parse_sequence_properties() 1832 1833 return self.expression( 1834 exp.Property, 1835 this=key.to_dot() if isinstance(key, exp.Column) else key, 1836 value=self._parse_bitwise() or self._parse_var(any_token=True), 1837 ) 1838 1839 def _parse_stored(self) -> exp.FileFormatProperty: 1840 self._match(TokenType.ALIAS) 1841 1842 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1843 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1844 1845 return self.expression( 1846 exp.FileFormatProperty, 1847 this=( 1848 self.expression( 1849 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1850 ) 1851 if input_format or output_format 1852 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1853 ), 1854 ) 1855 1856 def _parse_unquoted_field(self): 1857 field = self._parse_field() 1858 if isinstance(field, exp.Identifier) and not field.quoted: 1859 field = exp.var(field) 1860 1861 return field 1862 1863 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1864 self._match(TokenType.EQ) 1865 self._match(TokenType.ALIAS) 1866 1867 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1868 1869 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1870 properties = [] 1871 while True: 1872 if before: 1873 prop = self._parse_property_before() 1874 else: 1875 prop = self._parse_property() 1876 if not prop: 1877 break 1878 for p in ensure_list(prop): 1879 properties.append(p) 1880 1881 if properties: 1882 return self.expression(exp.Properties, expressions=properties) 1883 1884 return None 1885 1886 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1887 return self.expression( 1888 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1889 ) 1890 1891 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1892 if self._index >= 2: 1893 pre_volatile_token = self._tokens[self._index - 2] 1894 else: 1895 pre_volatile_token = None 1896 1897 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1898 return exp.VolatileProperty() 1899 1900 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1901 1902 def _parse_retention_period(self) -> exp.Var: 1903 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1904 number = self._parse_number() 1905 number_str = f"{number} " if number else "" 1906 unit = self._parse_var(any_token=True) 1907 return exp.var(f"{number_str}{unit}") 1908 1909 def _parse_system_versioning_property( 1910 self, with_: bool = False 1911 ) -> exp.WithSystemVersioningProperty: 1912 self._match(TokenType.EQ) 1913 prop = self.expression( 1914 exp.WithSystemVersioningProperty, 1915 **{ # type: ignore 1916 "on": True, 1917 "with": with_, 1918 }, 1919 ) 1920 1921 if self._match_text_seq("OFF"): 1922 prop.set("on", False) 1923 return prop 1924 1925 self._match(TokenType.ON) 1926 if self._match(TokenType.L_PAREN): 1927 while self._curr and not self._match(TokenType.R_PAREN): 1928 if self._match_text_seq("HISTORY_TABLE", "="): 1929 prop.set("this", self._parse_table_parts()) 1930 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1931 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1932 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1933 prop.set("retention_period", self._parse_retention_period()) 1934 1935 self._match(TokenType.COMMA) 1936 1937 return prop 1938 1939 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1940 self._match(TokenType.EQ) 1941 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1942 prop = self.expression(exp.DataDeletionProperty, on=on) 1943 1944 if self._match(TokenType.L_PAREN): 1945 while self._curr and not self._match(TokenType.R_PAREN): 1946 if self._match_text_seq("FILTER_COLUMN", "="): 1947 prop.set("filter_column", self._parse_column()) 1948 elif self._match_text_seq("RETENTION_PERIOD", "="): 1949 prop.set("retention_period", self._parse_retention_period()) 1950 1951 self._match(TokenType.COMMA) 1952 1953 return prop 1954 1955 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1956 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1957 prop = self._parse_system_versioning_property(with_=True) 1958 self._match_r_paren() 1959 return prop 1960 1961 if self._match(TokenType.L_PAREN, advance=False): 1962 return self._parse_wrapped_properties() 1963 1964 if self._match_text_seq("JOURNAL"): 1965 return self._parse_withjournaltable() 1966 1967 if self._match_texts(self.VIEW_ATTRIBUTES): 1968 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1969 1970 if self._match_text_seq("DATA"): 1971 return self._parse_withdata(no=False) 1972 elif self._match_text_seq("NO", "DATA"): 1973 return self._parse_withdata(no=True) 1974 1975 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1976 return self._parse_serde_properties(with_=True) 1977 1978 if not self._next: 1979 return None 1980 1981 return self._parse_withisolatedloading() 1982 1983 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1984 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1985 self._match(TokenType.EQ) 1986 1987 user = self._parse_id_var() 1988 self._match(TokenType.PARAMETER) 1989 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1990 1991 if not user or not host: 1992 return None 1993 1994 return exp.DefinerProperty(this=f"{user}@{host}") 1995 1996 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1997 self._match(TokenType.TABLE) 1998 self._match(TokenType.EQ) 1999 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2000 2001 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2002 return self.expression(exp.LogProperty, no=no) 2003 2004 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2005 return self.expression(exp.JournalProperty, **kwargs) 2006 2007 def _parse_checksum(self) -> exp.ChecksumProperty: 2008 self._match(TokenType.EQ) 2009 2010 on = None 2011 if self._match(TokenType.ON): 2012 on = True 2013 elif self._match_text_seq("OFF"): 2014 on = False 2015 2016 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2017 2018 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2019 return self.expression( 2020 exp.Cluster, 2021 expressions=( 2022 self._parse_wrapped_csv(self._parse_ordered) 2023 if wrapped 2024 else self._parse_csv(self._parse_ordered) 2025 ), 2026 ) 2027 2028 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2029 self._match_text_seq("BY") 2030 2031 self._match_l_paren() 2032 expressions = self._parse_csv(self._parse_column) 2033 self._match_r_paren() 2034 2035 if self._match_text_seq("SORTED", "BY"): 2036 self._match_l_paren() 2037 sorted_by = self._parse_csv(self._parse_ordered) 2038 self._match_r_paren() 2039 else: 2040 sorted_by = None 2041 2042 self._match(TokenType.INTO) 2043 buckets = self._parse_number() 2044 self._match_text_seq("BUCKETS") 2045 2046 return self.expression( 2047 exp.ClusteredByProperty, 2048 expressions=expressions, 2049 sorted_by=sorted_by, 2050 buckets=buckets, 2051 ) 2052 2053 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2054 if not self._match_text_seq("GRANTS"): 2055 self._retreat(self._index - 1) 2056 return None 2057 2058 return self.expression(exp.CopyGrantsProperty) 2059 2060 def _parse_freespace(self) -> exp.FreespaceProperty: 2061 self._match(TokenType.EQ) 2062 return self.expression( 2063 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2064 ) 2065 2066 def _parse_mergeblockratio( 2067 self, no: bool = False, default: bool = False 2068 ) -> exp.MergeBlockRatioProperty: 2069 if self._match(TokenType.EQ): 2070 return self.expression( 2071 exp.MergeBlockRatioProperty, 2072 this=self._parse_number(), 2073 percent=self._match(TokenType.PERCENT), 2074 ) 2075 2076 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2077 2078 def _parse_datablocksize( 2079 self, 2080 default: t.Optional[bool] = None, 2081 minimum: t.Optional[bool] = None, 2082 maximum: t.Optional[bool] = None, 2083 ) -> exp.DataBlocksizeProperty: 2084 self._match(TokenType.EQ) 2085 size = self._parse_number() 2086 2087 units = None 2088 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2089 units = self._prev.text 2090 2091 return self.expression( 2092 exp.DataBlocksizeProperty, 2093 size=size, 2094 units=units, 2095 default=default, 2096 minimum=minimum, 2097 maximum=maximum, 2098 ) 2099 2100 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2101 self._match(TokenType.EQ) 2102 always = self._match_text_seq("ALWAYS") 2103 manual = self._match_text_seq("MANUAL") 2104 never = self._match_text_seq("NEVER") 2105 default = self._match_text_seq("DEFAULT") 2106 2107 autotemp = None 2108 if self._match_text_seq("AUTOTEMP"): 2109 autotemp = self._parse_schema() 2110 2111 return self.expression( 2112 exp.BlockCompressionProperty, 2113 always=always, 2114 manual=manual, 2115 never=never, 2116 default=default, 2117 autotemp=autotemp, 2118 ) 2119 2120 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2121 index = self._index 2122 no = self._match_text_seq("NO") 2123 concurrent = self._match_text_seq("CONCURRENT") 2124 2125 if not self._match_text_seq("ISOLATED", "LOADING"): 2126 self._retreat(index) 2127 return None 2128 2129 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2130 return self.expression( 2131 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2132 ) 2133 2134 def _parse_locking(self) -> exp.LockingProperty: 2135 if self._match(TokenType.TABLE): 2136 kind = "TABLE" 2137 elif self._match(TokenType.VIEW): 2138 kind = "VIEW" 2139 elif self._match(TokenType.ROW): 2140 kind = "ROW" 2141 elif self._match_text_seq("DATABASE"): 2142 kind = "DATABASE" 2143 else: 2144 kind = None 2145 2146 if kind in ("DATABASE", "TABLE", "VIEW"): 2147 this = self._parse_table_parts() 2148 else: 2149 this = None 2150 2151 if self._match(TokenType.FOR): 2152 for_or_in = "FOR" 2153 elif self._match(TokenType.IN): 2154 for_or_in = "IN" 2155 else: 2156 for_or_in = None 2157 2158 if self._match_text_seq("ACCESS"): 2159 lock_type = "ACCESS" 2160 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2161 lock_type = "EXCLUSIVE" 2162 elif self._match_text_seq("SHARE"): 2163 lock_type = "SHARE" 2164 elif self._match_text_seq("READ"): 2165 lock_type = "READ" 2166 elif self._match_text_seq("WRITE"): 2167 lock_type = "WRITE" 2168 elif self._match_text_seq("CHECKSUM"): 2169 lock_type = "CHECKSUM" 2170 else: 2171 lock_type = None 2172 2173 override = self._match_text_seq("OVERRIDE") 2174 2175 return self.expression( 2176 exp.LockingProperty, 2177 this=this, 2178 kind=kind, 2179 for_or_in=for_or_in, 2180 lock_type=lock_type, 2181 override=override, 2182 ) 2183 2184 def _parse_partition_by(self) -> t.List[exp.Expression]: 2185 if self._match(TokenType.PARTITION_BY): 2186 return self._parse_csv(self._parse_conjunction) 2187 return [] 2188 2189 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2190 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2191 if self._match_text_seq("MINVALUE"): 2192 return exp.var("MINVALUE") 2193 if self._match_text_seq("MAXVALUE"): 2194 return exp.var("MAXVALUE") 2195 return self._parse_bitwise() 2196 2197 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2198 expression = None 2199 from_expressions = None 2200 to_expressions = None 2201 2202 if self._match(TokenType.IN): 2203 this = self._parse_wrapped_csv(self._parse_bitwise) 2204 elif self._match(TokenType.FROM): 2205 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2206 self._match_text_seq("TO") 2207 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2208 elif self._match_text_seq("WITH", "(", "MODULUS"): 2209 this = self._parse_number() 2210 self._match_text_seq(",", "REMAINDER") 2211 expression = self._parse_number() 2212 self._match_r_paren() 2213 else: 2214 self.raise_error("Failed to parse partition bound spec.") 2215 2216 return self.expression( 2217 exp.PartitionBoundSpec, 2218 this=this, 2219 expression=expression, 2220 from_expressions=from_expressions, 2221 to_expressions=to_expressions, 2222 ) 2223 2224 # https://www.postgresql.org/docs/current/sql-createtable.html 2225 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2226 if not self._match_text_seq("OF"): 2227 self._retreat(self._index - 1) 2228 return None 2229 2230 this = self._parse_table(schema=True) 2231 2232 if self._match(TokenType.DEFAULT): 2233 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2234 elif self._match_text_seq("FOR", "VALUES"): 2235 expression = self._parse_partition_bound_spec() 2236 else: 2237 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2238 2239 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2240 2241 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2242 self._match(TokenType.EQ) 2243 return self.expression( 2244 exp.PartitionedByProperty, 2245 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2246 ) 2247 2248 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2249 if self._match_text_seq("AND", "STATISTICS"): 2250 statistics = True 2251 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2252 statistics = False 2253 else: 2254 statistics = None 2255 2256 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2257 2258 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2259 if self._match_text_seq("SQL"): 2260 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2261 return None 2262 2263 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2264 if self._match_text_seq("SQL", "DATA"): 2265 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2266 return None 2267 2268 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2269 if self._match_text_seq("PRIMARY", "INDEX"): 2270 return exp.NoPrimaryIndexProperty() 2271 if self._match_text_seq("SQL"): 2272 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2273 return None 2274 2275 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2276 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2277 return exp.OnCommitProperty() 2278 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2279 return exp.OnCommitProperty(delete=True) 2280 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2281 2282 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2283 if self._match_text_seq("SQL", "DATA"): 2284 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2285 return None 2286 2287 def _parse_distkey(self) -> exp.DistKeyProperty: 2288 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2289 2290 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2291 table = self._parse_table(schema=True) 2292 2293 options = [] 2294 while self._match_texts(("INCLUDING", "EXCLUDING")): 2295 this = self._prev.text.upper() 2296 2297 id_var = self._parse_id_var() 2298 if not id_var: 2299 return None 2300 2301 options.append( 2302 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2303 ) 2304 2305 return self.expression(exp.LikeProperty, this=table, expressions=options) 2306 2307 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2308 return self.expression( 2309 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2310 ) 2311 2312 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2313 self._match(TokenType.EQ) 2314 return self.expression( 2315 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2316 ) 2317 2318 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2319 self._match_text_seq("WITH", "CONNECTION") 2320 return self.expression( 2321 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2322 ) 2323 2324 def _parse_returns(self) -> exp.ReturnsProperty: 2325 value: t.Optional[exp.Expression] 2326 null = None 2327 is_table = self._match(TokenType.TABLE) 2328 2329 if is_table: 2330 if self._match(TokenType.LT): 2331 value = self.expression( 2332 exp.Schema, 2333 this="TABLE", 2334 expressions=self._parse_csv(self._parse_struct_types), 2335 ) 2336 if not self._match(TokenType.GT): 2337 self.raise_error("Expecting >") 2338 else: 2339 value = self._parse_schema(exp.var("TABLE")) 2340 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2341 null = True 2342 value = None 2343 else: 2344 value = self._parse_types() 2345 2346 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2347 2348 def _parse_describe(self) -> exp.Describe: 2349 kind = self._match_set(self.CREATABLES) and self._prev.text 2350 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2351 if self._match(TokenType.DOT): 2352 style = None 2353 self._retreat(self._index - 2) 2354 this = self._parse_table(schema=True) 2355 properties = self._parse_properties() 2356 expressions = properties.expressions if properties else None 2357 return self.expression( 2358 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2359 ) 2360 2361 def _parse_insert(self) -> exp.Insert: 2362 comments = ensure_list(self._prev_comments) 2363 hint = self._parse_hint() 2364 overwrite = self._match(TokenType.OVERWRITE) 2365 ignore = self._match(TokenType.IGNORE) 2366 local = self._match_text_seq("LOCAL") 2367 alternative = None 2368 is_function = None 2369 2370 if self._match_text_seq("DIRECTORY"): 2371 this: t.Optional[exp.Expression] = self.expression( 2372 exp.Directory, 2373 this=self._parse_var_or_string(), 2374 local=local, 2375 row_format=self._parse_row_format(match_row=True), 2376 ) 2377 else: 2378 if self._match(TokenType.OR): 2379 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2380 2381 self._match(TokenType.INTO) 2382 comments += ensure_list(self._prev_comments) 2383 self._match(TokenType.TABLE) 2384 is_function = self._match(TokenType.FUNCTION) 2385 2386 this = ( 2387 self._parse_table(schema=True, parse_partition=True) 2388 if not is_function 2389 else self._parse_function() 2390 ) 2391 2392 returning = self._parse_returning() 2393 2394 return self.expression( 2395 exp.Insert, 2396 comments=comments, 2397 hint=hint, 2398 is_function=is_function, 2399 this=this, 2400 stored=self._match_text_seq("STORED") and self._parse_stored(), 2401 by_name=self._match_text_seq("BY", "NAME"), 2402 exists=self._parse_exists(), 2403 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2404 and self._parse_conjunction(), 2405 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2406 conflict=self._parse_on_conflict(), 2407 returning=returning or self._parse_returning(), 2408 overwrite=overwrite, 2409 alternative=alternative, 2410 ignore=ignore, 2411 ) 2412 2413 def _parse_kill(self) -> exp.Kill: 2414 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2415 2416 return self.expression( 2417 exp.Kill, 2418 this=self._parse_primary(), 2419 kind=kind, 2420 ) 2421 2422 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2423 conflict = self._match_text_seq("ON", "CONFLICT") 2424 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2425 2426 if not conflict and not duplicate: 2427 return None 2428 2429 conflict_keys = None 2430 constraint = None 2431 2432 if conflict: 2433 if self._match_text_seq("ON", "CONSTRAINT"): 2434 constraint = self._parse_id_var() 2435 elif self._match(TokenType.L_PAREN): 2436 conflict_keys = self._parse_csv(self._parse_id_var) 2437 self._match_r_paren() 2438 2439 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2440 if self._prev.token_type == TokenType.UPDATE: 2441 self._match(TokenType.SET) 2442 expressions = self._parse_csv(self._parse_equality) 2443 else: 2444 expressions = None 2445 2446 return self.expression( 2447 exp.OnConflict, 2448 duplicate=duplicate, 2449 expressions=expressions, 2450 action=action, 2451 conflict_keys=conflict_keys, 2452 constraint=constraint, 2453 ) 2454 2455 def _parse_returning(self) -> t.Optional[exp.Returning]: 2456 if not self._match(TokenType.RETURNING): 2457 return None 2458 return self.expression( 2459 exp.Returning, 2460 expressions=self._parse_csv(self._parse_expression), 2461 into=self._match(TokenType.INTO) and self._parse_table_part(), 2462 ) 2463 2464 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2465 if not self._match(TokenType.FORMAT): 2466 return None 2467 return self._parse_row_format() 2468 2469 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2470 index = self._index 2471 with_ = with_ or self._match_text_seq("WITH") 2472 2473 if not self._match(TokenType.SERDE_PROPERTIES): 2474 self._retreat(index) 2475 return None 2476 return self.expression( 2477 exp.SerdeProperties, 2478 **{ # type: ignore 2479 "expressions": self._parse_wrapped_properties(), 2480 "with": with_, 2481 }, 2482 ) 2483 2484 def _parse_row_format( 2485 self, match_row: bool = False 2486 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2487 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2488 return None 2489 2490 if self._match_text_seq("SERDE"): 2491 this = self._parse_string() 2492 2493 serde_properties = self._parse_serde_properties() 2494 2495 return self.expression( 2496 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2497 ) 2498 2499 self._match_text_seq("DELIMITED") 2500 2501 kwargs = {} 2502 2503 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2504 kwargs["fields"] = self._parse_string() 2505 if self._match_text_seq("ESCAPED", "BY"): 2506 kwargs["escaped"] = self._parse_string() 2507 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2508 kwargs["collection_items"] = self._parse_string() 2509 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2510 kwargs["map_keys"] = self._parse_string() 2511 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2512 kwargs["lines"] = self._parse_string() 2513 if self._match_text_seq("NULL", "DEFINED", "AS"): 2514 kwargs["null"] = self._parse_string() 2515 2516 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2517 2518 def _parse_load(self) -> exp.LoadData | exp.Command: 2519 if self._match_text_seq("DATA"): 2520 local = self._match_text_seq("LOCAL") 2521 self._match_text_seq("INPATH") 2522 inpath = self._parse_string() 2523 overwrite = self._match(TokenType.OVERWRITE) 2524 self._match_pair(TokenType.INTO, TokenType.TABLE) 2525 2526 return self.expression( 2527 exp.LoadData, 2528 this=self._parse_table(schema=True), 2529 local=local, 2530 overwrite=overwrite, 2531 inpath=inpath, 2532 partition=self._parse_partition(), 2533 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2534 serde=self._match_text_seq("SERDE") and self._parse_string(), 2535 ) 2536 return self._parse_as_command(self._prev) 2537 2538 def _parse_delete(self) -> exp.Delete: 2539 # This handles MySQL's "Multiple-Table Syntax" 2540 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2541 tables = None 2542 comments = self._prev_comments 2543 if not self._match(TokenType.FROM, advance=False): 2544 tables = self._parse_csv(self._parse_table) or None 2545 2546 returning = self._parse_returning() 2547 2548 return self.expression( 2549 exp.Delete, 2550 comments=comments, 2551 tables=tables, 2552 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2553 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2554 where=self._parse_where(), 2555 returning=returning or self._parse_returning(), 2556 limit=self._parse_limit(), 2557 ) 2558 2559 def _parse_update(self) -> exp.Update: 2560 comments = self._prev_comments 2561 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2562 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2563 returning = self._parse_returning() 2564 return self.expression( 2565 exp.Update, 2566 comments=comments, 2567 **{ # type: ignore 2568 "this": this, 2569 "expressions": expressions, 2570 "from": self._parse_from(joins=True), 2571 "where": self._parse_where(), 2572 "returning": returning or self._parse_returning(), 2573 "order": self._parse_order(), 2574 "limit": self._parse_limit(), 2575 }, 2576 ) 2577 2578 def _parse_uncache(self) -> exp.Uncache: 2579 if not self._match(TokenType.TABLE): 2580 self.raise_error("Expecting TABLE after UNCACHE") 2581 2582 return self.expression( 2583 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2584 ) 2585 2586 def _parse_cache(self) -> exp.Cache: 2587 lazy = self._match_text_seq("LAZY") 2588 self._match(TokenType.TABLE) 2589 table = self._parse_table(schema=True) 2590 2591 options = [] 2592 if self._match_text_seq("OPTIONS"): 2593 self._match_l_paren() 2594 k = self._parse_string() 2595 self._match(TokenType.EQ) 2596 v = self._parse_string() 2597 options = [k, v] 2598 self._match_r_paren() 2599 2600 self._match(TokenType.ALIAS) 2601 return self.expression( 2602 exp.Cache, 2603 this=table, 2604 lazy=lazy, 2605 options=options, 2606 expression=self._parse_select(nested=True), 2607 ) 2608 2609 def _parse_partition(self) -> t.Optional[exp.Partition]: 2610 if not self._match(TokenType.PARTITION): 2611 return None 2612 2613 return self.expression( 2614 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2615 ) 2616 2617 def _parse_value(self) -> t.Optional[exp.Tuple]: 2618 if self._match(TokenType.L_PAREN): 2619 expressions = self._parse_csv(self._parse_expression) 2620 self._match_r_paren() 2621 return self.expression(exp.Tuple, expressions=expressions) 2622 2623 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2624 expression = self._parse_expression() 2625 if expression: 2626 return self.expression(exp.Tuple, expressions=[expression]) 2627 return None 2628 2629 def _parse_projections(self) -> t.List[exp.Expression]: 2630 return self._parse_expressions() 2631 2632 def _parse_select( 2633 self, 2634 nested: bool = False, 2635 table: bool = False, 2636 parse_subquery_alias: bool = True, 2637 parse_set_operation: bool = True, 2638 ) -> t.Optional[exp.Expression]: 2639 cte = self._parse_with() 2640 2641 if cte: 2642 this = self._parse_statement() 2643 2644 if not this: 2645 self.raise_error("Failed to parse any statement following CTE") 2646 return cte 2647 2648 if "with" in this.arg_types: 2649 this.set("with", cte) 2650 else: 2651 self.raise_error(f"{this.key} does not support CTE") 2652 this = cte 2653 2654 return this 2655 2656 # duckdb supports leading with FROM x 2657 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2658 2659 if self._match(TokenType.SELECT): 2660 comments = self._prev_comments 2661 2662 hint = self._parse_hint() 2663 all_ = self._match(TokenType.ALL) 2664 distinct = self._match_set(self.DISTINCT_TOKENS) 2665 2666 kind = ( 2667 self._match(TokenType.ALIAS) 2668 and self._match_texts(("STRUCT", "VALUE")) 2669 and self._prev.text.upper() 2670 ) 2671 2672 if distinct: 2673 distinct = self.expression( 2674 exp.Distinct, 2675 on=self._parse_value() if self._match(TokenType.ON) else None, 2676 ) 2677 2678 if all_ and distinct: 2679 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2680 2681 limit = self._parse_limit(top=True) 2682 projections = self._parse_projections() 2683 2684 this = self.expression( 2685 exp.Select, 2686 kind=kind, 2687 hint=hint, 2688 distinct=distinct, 2689 expressions=projections, 2690 limit=limit, 2691 ) 2692 this.comments = comments 2693 2694 into = self._parse_into() 2695 if into: 2696 this.set("into", into) 2697 2698 if not from_: 2699 from_ = self._parse_from() 2700 2701 if from_: 2702 this.set("from", from_) 2703 2704 this = self._parse_query_modifiers(this) 2705 elif (table or nested) and self._match(TokenType.L_PAREN): 2706 if self._match(TokenType.PIVOT): 2707 this = self._parse_simplified_pivot() 2708 elif self._match(TokenType.FROM): 2709 this = exp.select("*").from_( 2710 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2711 ) 2712 else: 2713 this = ( 2714 self._parse_table() 2715 if table 2716 else self._parse_select(nested=True, parse_set_operation=False) 2717 ) 2718 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2719 2720 self._match_r_paren() 2721 2722 # We return early here so that the UNION isn't attached to the subquery by the 2723 # following call to _parse_set_operations, but instead becomes the parent node 2724 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2725 elif self._match(TokenType.VALUES, advance=False): 2726 this = self._parse_derived_table_values() 2727 elif from_: 2728 this = exp.select("*").from_(from_.this, copy=False) 2729 else: 2730 this = None 2731 2732 if parse_set_operation: 2733 return self._parse_set_operations(this) 2734 return this 2735 2736 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2737 if not skip_with_token and not self._match(TokenType.WITH): 2738 return None 2739 2740 comments = self._prev_comments 2741 recursive = self._match(TokenType.RECURSIVE) 2742 2743 expressions = [] 2744 while True: 2745 expressions.append(self._parse_cte()) 2746 2747 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2748 break 2749 else: 2750 self._match(TokenType.WITH) 2751 2752 return self.expression( 2753 exp.With, comments=comments, expressions=expressions, recursive=recursive 2754 ) 2755 2756 def _parse_cte(self) -> exp.CTE: 2757 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2758 if not alias or not alias.this: 2759 self.raise_error("Expected CTE to have alias") 2760 2761 self._match(TokenType.ALIAS) 2762 2763 if self._match_text_seq("NOT", "MATERIALIZED"): 2764 materialized = False 2765 elif self._match_text_seq("MATERIALIZED"): 2766 materialized = True 2767 else: 2768 materialized = None 2769 2770 return self.expression( 2771 exp.CTE, 2772 this=self._parse_wrapped(self._parse_statement), 2773 alias=alias, 2774 materialized=materialized, 2775 ) 2776 2777 def _parse_table_alias( 2778 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2779 ) -> t.Optional[exp.TableAlias]: 2780 any_token = self._match(TokenType.ALIAS) 2781 alias = ( 2782 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2783 or self._parse_string_as_identifier() 2784 ) 2785 2786 index = self._index 2787 if self._match(TokenType.L_PAREN): 2788 columns = self._parse_csv(self._parse_function_parameter) 2789 self._match_r_paren() if columns else self._retreat(index) 2790 else: 2791 columns = None 2792 2793 if not alias and not columns: 2794 return None 2795 2796 return self.expression(exp.TableAlias, this=alias, columns=columns) 2797 2798 def _parse_subquery( 2799 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2800 ) -> t.Optional[exp.Subquery]: 2801 if not this: 2802 return None 2803 2804 return self.expression( 2805 exp.Subquery, 2806 this=this, 2807 pivots=self._parse_pivots(), 2808 alias=self._parse_table_alias() if parse_alias else None, 2809 ) 2810 2811 def _implicit_unnests_to_explicit(self, this: E) -> E: 2812 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2813 2814 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2815 for i, join in enumerate(this.args.get("joins") or []): 2816 table = join.this 2817 normalized_table = table.copy() 2818 normalized_table.meta["maybe_column"] = True 2819 normalized_table = _norm(normalized_table, dialect=self.dialect) 2820 2821 if isinstance(table, exp.Table) and not join.args.get("on"): 2822 if normalized_table.parts[0].name in refs: 2823 table_as_column = table.to_column() 2824 unnest = exp.Unnest(expressions=[table_as_column]) 2825 2826 # Table.to_column creates a parent Alias node that we want to convert to 2827 # a TableAlias and attach to the Unnest, so it matches the parser's output 2828 if isinstance(table.args.get("alias"), exp.TableAlias): 2829 table_as_column.replace(table_as_column.this) 2830 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2831 2832 table.replace(unnest) 2833 2834 refs.add(normalized_table.alias_or_name) 2835 2836 return this 2837 2838 def _parse_query_modifiers( 2839 self, this: t.Optional[exp.Expression] 2840 ) -> t.Optional[exp.Expression]: 2841 if isinstance(this, (exp.Query, exp.Table)): 2842 for join in self._parse_joins(): 2843 this.append("joins", join) 2844 for lateral in iter(self._parse_lateral, None): 2845 this.append("laterals", lateral) 2846 2847 while True: 2848 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2849 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2850 key, expression = parser(self) 2851 2852 if expression: 2853 this.set(key, expression) 2854 if key == "limit": 2855 offset = expression.args.pop("offset", None) 2856 2857 if offset: 2858 offset = exp.Offset(expression=offset) 2859 this.set("offset", offset) 2860 2861 limit_by_expressions = expression.expressions 2862 expression.set("expressions", None) 2863 offset.set("expressions", limit_by_expressions) 2864 continue 2865 break 2866 2867 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2868 this = self._implicit_unnests_to_explicit(this) 2869 2870 return this 2871 2872 def _parse_hint(self) -> t.Optional[exp.Hint]: 2873 if self._match(TokenType.HINT): 2874 hints = [] 2875 for hint in iter( 2876 lambda: self._parse_csv( 2877 lambda: self._parse_function() or self._parse_var(upper=True) 2878 ), 2879 [], 2880 ): 2881 hints.extend(hint) 2882 2883 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2884 self.raise_error("Expected */ after HINT") 2885 2886 return self.expression(exp.Hint, expressions=hints) 2887 2888 return None 2889 2890 def _parse_into(self) -> t.Optional[exp.Into]: 2891 if not self._match(TokenType.INTO): 2892 return None 2893 2894 temp = self._match(TokenType.TEMPORARY) 2895 unlogged = self._match_text_seq("UNLOGGED") 2896 self._match(TokenType.TABLE) 2897 2898 return self.expression( 2899 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2900 ) 2901 2902 def _parse_from( 2903 self, joins: bool = False, skip_from_token: bool = False 2904 ) -> t.Optional[exp.From]: 2905 if not skip_from_token and not self._match(TokenType.FROM): 2906 return None 2907 2908 return self.expression( 2909 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2910 ) 2911 2912 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2913 return self.expression( 2914 exp.MatchRecognizeMeasure, 2915 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2916 this=self._parse_expression(), 2917 ) 2918 2919 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2920 if not self._match(TokenType.MATCH_RECOGNIZE): 2921 return None 2922 2923 self._match_l_paren() 2924 2925 partition = self._parse_partition_by() 2926 order = self._parse_order() 2927 2928 measures = ( 2929 self._parse_csv(self._parse_match_recognize_measure) 2930 if self._match_text_seq("MEASURES") 2931 else None 2932 ) 2933 2934 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2935 rows = exp.var("ONE ROW PER MATCH") 2936 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2937 text = "ALL ROWS PER MATCH" 2938 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2939 text += " SHOW EMPTY MATCHES" 2940 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2941 text += " OMIT EMPTY MATCHES" 2942 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2943 text += " WITH UNMATCHED ROWS" 2944 rows = exp.var(text) 2945 else: 2946 rows = None 2947 2948 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2949 text = "AFTER MATCH SKIP" 2950 if self._match_text_seq("PAST", "LAST", "ROW"): 2951 text += " PAST LAST ROW" 2952 elif self._match_text_seq("TO", "NEXT", "ROW"): 2953 text += " TO NEXT ROW" 2954 elif self._match_text_seq("TO", "FIRST"): 2955 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2956 elif self._match_text_seq("TO", "LAST"): 2957 text += f" TO LAST {self._advance_any().text}" # type: ignore 2958 after = exp.var(text) 2959 else: 2960 after = None 2961 2962 if self._match_text_seq("PATTERN"): 2963 self._match_l_paren() 2964 2965 if not self._curr: 2966 self.raise_error("Expecting )", self._curr) 2967 2968 paren = 1 2969 start = self._curr 2970 2971 while self._curr and paren > 0: 2972 if self._curr.token_type == TokenType.L_PAREN: 2973 paren += 1 2974 if self._curr.token_type == TokenType.R_PAREN: 2975 paren -= 1 2976 2977 end = self._prev 2978 self._advance() 2979 2980 if paren > 0: 2981 self.raise_error("Expecting )", self._curr) 2982 2983 pattern = exp.var(self._find_sql(start, end)) 2984 else: 2985 pattern = None 2986 2987 define = ( 2988 self._parse_csv(self._parse_name_as_expression) 2989 if self._match_text_seq("DEFINE") 2990 else None 2991 ) 2992 2993 self._match_r_paren() 2994 2995 return self.expression( 2996 exp.MatchRecognize, 2997 partition_by=partition, 2998 order=order, 2999 measures=measures, 3000 rows=rows, 3001 after=after, 3002 pattern=pattern, 3003 define=define, 3004 alias=self._parse_table_alias(), 3005 ) 3006 3007 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3008 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3009 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3010 cross_apply = False 3011 3012 if cross_apply is not None: 3013 this = self._parse_select(table=True) 3014 view = None 3015 outer = None 3016 elif self._match(TokenType.LATERAL): 3017 this = self._parse_select(table=True) 3018 view = self._match(TokenType.VIEW) 3019 outer = self._match(TokenType.OUTER) 3020 else: 3021 return None 3022 3023 if not this: 3024 this = ( 3025 self._parse_unnest() 3026 or self._parse_function() 3027 or self._parse_id_var(any_token=False) 3028 ) 3029 3030 while self._match(TokenType.DOT): 3031 this = exp.Dot( 3032 this=this, 3033 expression=self._parse_function() or self._parse_id_var(any_token=False), 3034 ) 3035 3036 if view: 3037 table = self._parse_id_var(any_token=False) 3038 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3039 table_alias: t.Optional[exp.TableAlias] = self.expression( 3040 exp.TableAlias, this=table, columns=columns 3041 ) 3042 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3043 # We move the alias from the lateral's child node to the lateral itself 3044 table_alias = this.args["alias"].pop() 3045 else: 3046 table_alias = self._parse_table_alias() 3047 3048 return self.expression( 3049 exp.Lateral, 3050 this=this, 3051 view=view, 3052 outer=outer, 3053 alias=table_alias, 3054 cross_apply=cross_apply, 3055 ) 3056 3057 def _parse_join_parts( 3058 self, 3059 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3060 return ( 3061 self._match_set(self.JOIN_METHODS) and self._prev, 3062 self._match_set(self.JOIN_SIDES) and self._prev, 3063 self._match_set(self.JOIN_KINDS) and self._prev, 3064 ) 3065 3066 def _parse_join( 3067 self, skip_join_token: bool = False, parse_bracket: bool = False 3068 ) -> t.Optional[exp.Join]: 3069 if self._match(TokenType.COMMA): 3070 return self.expression(exp.Join, this=self._parse_table()) 3071 3072 index = self._index 3073 method, side, kind = self._parse_join_parts() 3074 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3075 join = self._match(TokenType.JOIN) 3076 3077 if not skip_join_token and not join: 3078 self._retreat(index) 3079 kind = None 3080 method = None 3081 side = None 3082 3083 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3084 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3085 3086 if not skip_join_token and not join and not outer_apply and not cross_apply: 3087 return None 3088 3089 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3090 3091 if method: 3092 kwargs["method"] = method.text 3093 if side: 3094 kwargs["side"] = side.text 3095 if kind: 3096 kwargs["kind"] = kind.text 3097 if hint: 3098 kwargs["hint"] = hint 3099 3100 if self._match(TokenType.MATCH_CONDITION): 3101 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3102 3103 if self._match(TokenType.ON): 3104 kwargs["on"] = self._parse_conjunction() 3105 elif self._match(TokenType.USING): 3106 kwargs["using"] = self._parse_wrapped_id_vars() 3107 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3108 kind and kind.token_type == TokenType.CROSS 3109 ): 3110 index = self._index 3111 joins: t.Optional[list] = list(self._parse_joins()) 3112 3113 if joins and self._match(TokenType.ON): 3114 kwargs["on"] = self._parse_conjunction() 3115 elif joins and self._match(TokenType.USING): 3116 kwargs["using"] = self._parse_wrapped_id_vars() 3117 else: 3118 joins = None 3119 self._retreat(index) 3120 3121 kwargs["this"].set("joins", joins if joins else None) 3122 3123 comments = [c for token in (method, side, kind) if token for c in token.comments] 3124 return self.expression(exp.Join, comments=comments, **kwargs) 3125 3126 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3127 this = self._parse_conjunction() 3128 3129 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3130 return this 3131 3132 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3133 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3134 3135 return this 3136 3137 def _parse_index_params(self) -> exp.IndexParameters: 3138 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3139 3140 if self._match(TokenType.L_PAREN, advance=False): 3141 columns = self._parse_wrapped_csv(self._parse_with_operator) 3142 else: 3143 columns = None 3144 3145 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3146 partition_by = self._parse_partition_by() 3147 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3148 tablespace = ( 3149 self._parse_var(any_token=True) 3150 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3151 else None 3152 ) 3153 where = self._parse_where() 3154 3155 return self.expression( 3156 exp.IndexParameters, 3157 using=using, 3158 columns=columns, 3159 include=include, 3160 partition_by=partition_by, 3161 where=where, 3162 with_storage=with_storage, 3163 tablespace=tablespace, 3164 ) 3165 3166 def _parse_index( 3167 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3168 ) -> t.Optional[exp.Index]: 3169 if index or anonymous: 3170 unique = None 3171 primary = None 3172 amp = None 3173 3174 self._match(TokenType.ON) 3175 self._match(TokenType.TABLE) # hive 3176 table = self._parse_table_parts(schema=True) 3177 else: 3178 unique = self._match(TokenType.UNIQUE) 3179 primary = self._match_text_seq("PRIMARY") 3180 amp = self._match_text_seq("AMP") 3181 3182 if not self._match(TokenType.INDEX): 3183 return None 3184 3185 index = self._parse_id_var() 3186 table = None 3187 3188 params = self._parse_index_params() 3189 3190 return self.expression( 3191 exp.Index, 3192 this=index, 3193 table=table, 3194 unique=unique, 3195 primary=primary, 3196 amp=amp, 3197 params=params, 3198 ) 3199 3200 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3201 hints: t.List[exp.Expression] = [] 3202 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3203 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3204 hints.append( 3205 self.expression( 3206 exp.WithTableHint, 3207 expressions=self._parse_csv( 3208 lambda: self._parse_function() or self._parse_var(any_token=True) 3209 ), 3210 ) 3211 ) 3212 self._match_r_paren() 3213 else: 3214 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3215 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3216 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3217 3218 self._match_texts(("INDEX", "KEY")) 3219 if self._match(TokenType.FOR): 3220 hint.set("target", self._advance_any() and self._prev.text.upper()) 3221 3222 hint.set("expressions", self._parse_wrapped_id_vars()) 3223 hints.append(hint) 3224 3225 return hints or None 3226 3227 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3228 return ( 3229 (not schema and self._parse_function(optional_parens=False)) 3230 or self._parse_id_var(any_token=False) 3231 or self._parse_string_as_identifier() 3232 or self._parse_placeholder() 3233 ) 3234 3235 def _parse_table_parts( 3236 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3237 ) -> exp.Table: 3238 catalog = None 3239 db = None 3240 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3241 3242 while self._match(TokenType.DOT): 3243 if catalog: 3244 # This allows nesting the table in arbitrarily many dot expressions if needed 3245 table = self.expression( 3246 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3247 ) 3248 else: 3249 catalog = db 3250 db = table 3251 # "" used for tsql FROM a..b case 3252 table = self._parse_table_part(schema=schema) or "" 3253 3254 if ( 3255 wildcard 3256 and self._is_connected() 3257 and (isinstance(table, exp.Identifier) or not table) 3258 and self._match(TokenType.STAR) 3259 ): 3260 if isinstance(table, exp.Identifier): 3261 table.args["this"] += "*" 3262 else: 3263 table = exp.Identifier(this="*") 3264 3265 # We bubble up comments from the Identifier to the Table 3266 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3267 3268 if is_db_reference: 3269 catalog = db 3270 db = table 3271 table = None 3272 3273 if not table and not is_db_reference: 3274 self.raise_error(f"Expected table name but got {self._curr}") 3275 if not db and is_db_reference: 3276 self.raise_error(f"Expected database name but got {self._curr}") 3277 3278 return self.expression( 3279 exp.Table, 3280 comments=comments, 3281 this=table, 3282 db=db, 3283 catalog=catalog, 3284 pivots=self._parse_pivots(), 3285 ) 3286 3287 def _parse_table( 3288 self, 3289 schema: bool = False, 3290 joins: bool = False, 3291 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3292 parse_bracket: bool = False, 3293 is_db_reference: bool = False, 3294 parse_partition: bool = False, 3295 ) -> t.Optional[exp.Expression]: 3296 lateral = self._parse_lateral() 3297 if lateral: 3298 return lateral 3299 3300 unnest = self._parse_unnest() 3301 if unnest: 3302 return unnest 3303 3304 values = self._parse_derived_table_values() 3305 if values: 3306 return values 3307 3308 subquery = self._parse_select(table=True) 3309 if subquery: 3310 if not subquery.args.get("pivots"): 3311 subquery.set("pivots", self._parse_pivots()) 3312 return subquery 3313 3314 bracket = parse_bracket and self._parse_bracket(None) 3315 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3316 3317 only = self._match(TokenType.ONLY) 3318 3319 this = t.cast( 3320 exp.Expression, 3321 bracket 3322 or self._parse_bracket( 3323 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3324 ), 3325 ) 3326 3327 if only: 3328 this.set("only", only) 3329 3330 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3331 self._match_text_seq("*") 3332 3333 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3334 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3335 this.set("partition", self._parse_partition()) 3336 3337 if schema: 3338 return self._parse_schema(this=this) 3339 3340 version = self._parse_version() 3341 3342 if version: 3343 this.set("version", version) 3344 3345 if self.dialect.ALIAS_POST_TABLESAMPLE: 3346 table_sample = self._parse_table_sample() 3347 3348 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3349 if alias: 3350 this.set("alias", alias) 3351 3352 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3353 return self.expression( 3354 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3355 ) 3356 3357 this.set("hints", self._parse_table_hints()) 3358 3359 if not this.args.get("pivots"): 3360 this.set("pivots", self._parse_pivots()) 3361 3362 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3363 table_sample = self._parse_table_sample() 3364 3365 if table_sample: 3366 table_sample.set("this", this) 3367 this = table_sample 3368 3369 if joins: 3370 for join in self._parse_joins(): 3371 this.append("joins", join) 3372 3373 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3374 this.set("ordinality", True) 3375 this.set("alias", self._parse_table_alias()) 3376 3377 return this 3378 3379 def _parse_version(self) -> t.Optional[exp.Version]: 3380 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3381 this = "TIMESTAMP" 3382 elif self._match(TokenType.VERSION_SNAPSHOT): 3383 this = "VERSION" 3384 else: 3385 return None 3386 3387 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3388 kind = self._prev.text.upper() 3389 start = self._parse_bitwise() 3390 self._match_texts(("TO", "AND")) 3391 end = self._parse_bitwise() 3392 expression: t.Optional[exp.Expression] = self.expression( 3393 exp.Tuple, expressions=[start, end] 3394 ) 3395 elif self._match_text_seq("CONTAINED", "IN"): 3396 kind = "CONTAINED IN" 3397 expression = self.expression( 3398 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3399 ) 3400 elif self._match(TokenType.ALL): 3401 kind = "ALL" 3402 expression = None 3403 else: 3404 self._match_text_seq("AS", "OF") 3405 kind = "AS OF" 3406 expression = self._parse_type() 3407 3408 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3409 3410 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3411 if not self._match(TokenType.UNNEST): 3412 return None 3413 3414 expressions = self._parse_wrapped_csv(self._parse_equality) 3415 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3416 3417 alias = self._parse_table_alias() if with_alias else None 3418 3419 if alias: 3420 if self.dialect.UNNEST_COLUMN_ONLY: 3421 if alias.args.get("columns"): 3422 self.raise_error("Unexpected extra column alias in unnest.") 3423 3424 alias.set("columns", [alias.this]) 3425 alias.set("this", None) 3426 3427 columns = alias.args.get("columns") or [] 3428 if offset and len(expressions) < len(columns): 3429 offset = columns.pop() 3430 3431 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3432 self._match(TokenType.ALIAS) 3433 offset = self._parse_id_var( 3434 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3435 ) or exp.to_identifier("offset") 3436 3437 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3438 3439 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3440 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3441 if not is_derived and not self._match_text_seq("VALUES"): 3442 return None 3443 3444 expressions = self._parse_csv(self._parse_value) 3445 alias = self._parse_table_alias() 3446 3447 if is_derived: 3448 self._match_r_paren() 3449 3450 return self.expression( 3451 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3452 ) 3453 3454 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3455 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3456 as_modifier and self._match_text_seq("USING", "SAMPLE") 3457 ): 3458 return None 3459 3460 bucket_numerator = None 3461 bucket_denominator = None 3462 bucket_field = None 3463 percent = None 3464 size = None 3465 seed = None 3466 3467 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3468 matched_l_paren = self._match(TokenType.L_PAREN) 3469 3470 if self.TABLESAMPLE_CSV: 3471 num = None 3472 expressions = self._parse_csv(self._parse_primary) 3473 else: 3474 expressions = None 3475 num = ( 3476 self._parse_factor() 3477 if self._match(TokenType.NUMBER, advance=False) 3478 else self._parse_primary() or self._parse_placeholder() 3479 ) 3480 3481 if self._match_text_seq("BUCKET"): 3482 bucket_numerator = self._parse_number() 3483 self._match_text_seq("OUT", "OF") 3484 bucket_denominator = bucket_denominator = self._parse_number() 3485 self._match(TokenType.ON) 3486 bucket_field = self._parse_field() 3487 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3488 percent = num 3489 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3490 size = num 3491 else: 3492 percent = num 3493 3494 if matched_l_paren: 3495 self._match_r_paren() 3496 3497 if self._match(TokenType.L_PAREN): 3498 method = self._parse_var(upper=True) 3499 seed = self._match(TokenType.COMMA) and self._parse_number() 3500 self._match_r_paren() 3501 elif self._match_texts(("SEED", "REPEATABLE")): 3502 seed = self._parse_wrapped(self._parse_number) 3503 3504 if not method and self.DEFAULT_SAMPLING_METHOD: 3505 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3506 3507 return self.expression( 3508 exp.TableSample, 3509 expressions=expressions, 3510 method=method, 3511 bucket_numerator=bucket_numerator, 3512 bucket_denominator=bucket_denominator, 3513 bucket_field=bucket_field, 3514 percent=percent, 3515 size=size, 3516 seed=seed, 3517 ) 3518 3519 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3520 return list(iter(self._parse_pivot, None)) or None 3521 3522 def _parse_joins(self) -> t.Iterator[exp.Join]: 3523 return iter(self._parse_join, None) 3524 3525 # https://duckdb.org/docs/sql/statements/pivot 3526 def _parse_simplified_pivot(self) -> exp.Pivot: 3527 def _parse_on() -> t.Optional[exp.Expression]: 3528 this = self._parse_bitwise() 3529 return self._parse_in(this) if self._match(TokenType.IN) else this 3530 3531 this = self._parse_table() 3532 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3533 using = self._match(TokenType.USING) and self._parse_csv( 3534 lambda: self._parse_alias(self._parse_function()) 3535 ) 3536 group = self._parse_group() 3537 return self.expression( 3538 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3539 ) 3540 3541 def _parse_pivot_in(self) -> exp.In: 3542 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3543 this = self._parse_conjunction() 3544 3545 self._match(TokenType.ALIAS) 3546 alias = self._parse_field() 3547 if alias: 3548 return self.expression(exp.PivotAlias, this=this, alias=alias) 3549 3550 return this 3551 3552 value = self._parse_column() 3553 3554 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3555 self.raise_error("Expecting IN (") 3556 3557 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3558 3559 self._match_r_paren() 3560 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3561 3562 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3563 index = self._index 3564 include_nulls = None 3565 3566 if self._match(TokenType.PIVOT): 3567 unpivot = False 3568 elif self._match(TokenType.UNPIVOT): 3569 unpivot = True 3570 3571 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3572 if self._match_text_seq("INCLUDE", "NULLS"): 3573 include_nulls = True 3574 elif self._match_text_seq("EXCLUDE", "NULLS"): 3575 include_nulls = False 3576 else: 3577 return None 3578 3579 expressions = [] 3580 3581 if not self._match(TokenType.L_PAREN): 3582 self._retreat(index) 3583 return None 3584 3585 if unpivot: 3586 expressions = self._parse_csv(self._parse_column) 3587 else: 3588 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3589 3590 if not expressions: 3591 self.raise_error("Failed to parse PIVOT's aggregation list") 3592 3593 if not self._match(TokenType.FOR): 3594 self.raise_error("Expecting FOR") 3595 3596 field = self._parse_pivot_in() 3597 3598 self._match_r_paren() 3599 3600 pivot = self.expression( 3601 exp.Pivot, 3602 expressions=expressions, 3603 field=field, 3604 unpivot=unpivot, 3605 include_nulls=include_nulls, 3606 ) 3607 3608 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3609 pivot.set("alias", self._parse_table_alias()) 3610 3611 if not unpivot: 3612 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3613 3614 columns: t.List[exp.Expression] = [] 3615 for fld in pivot.args["field"].expressions: 3616 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3617 for name in names: 3618 if self.PREFIXED_PIVOT_COLUMNS: 3619 name = f"{name}_{field_name}" if name else field_name 3620 else: 3621 name = f"{field_name}_{name}" if name else field_name 3622 3623 columns.append(exp.to_identifier(name)) 3624 3625 pivot.set("columns", columns) 3626 3627 return pivot 3628 3629 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3630 return [agg.alias for agg in aggregations] 3631 3632 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3633 if not skip_where_token and not self._match(TokenType.PREWHERE): 3634 return None 3635 3636 return self.expression( 3637 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3638 ) 3639 3640 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3641 if not skip_where_token and not self._match(TokenType.WHERE): 3642 return None 3643 3644 return self.expression( 3645 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3646 ) 3647 3648 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3649 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3650 return None 3651 3652 elements: t.Dict[str, t.Any] = defaultdict(list) 3653 3654 if self._match(TokenType.ALL): 3655 elements["all"] = True 3656 elif self._match(TokenType.DISTINCT): 3657 elements["all"] = False 3658 3659 while True: 3660 expressions = self._parse_csv( 3661 lambda: None 3662 if self._match(TokenType.ROLLUP, advance=False) 3663 else self._parse_conjunction() 3664 ) 3665 if expressions: 3666 elements["expressions"].extend(expressions) 3667 3668 grouping_sets = self._parse_grouping_sets() 3669 if grouping_sets: 3670 elements["grouping_sets"].extend(grouping_sets) 3671 3672 rollup = None 3673 cube = None 3674 totals = None 3675 3676 index = self._index 3677 with_ = self._match(TokenType.WITH) 3678 if self._match(TokenType.ROLLUP): 3679 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3680 elements["rollup"].extend(ensure_list(rollup)) 3681 3682 if self._match(TokenType.CUBE): 3683 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3684 elements["cube"].extend(ensure_list(cube)) 3685 3686 if self._match_text_seq("TOTALS"): 3687 totals = True 3688 elements["totals"] = True # type: ignore 3689 3690 if not (grouping_sets or rollup or cube or totals): 3691 if with_: 3692 self._retreat(index) 3693 break 3694 3695 return self.expression(exp.Group, **elements) # type: ignore 3696 3697 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3698 if not self._match(TokenType.GROUPING_SETS): 3699 return None 3700 3701 return self._parse_wrapped_csv(self._parse_grouping_set) 3702 3703 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3704 if self._match(TokenType.L_PAREN): 3705 grouping_set = self._parse_csv(self._parse_column) 3706 self._match_r_paren() 3707 return self.expression(exp.Tuple, expressions=grouping_set) 3708 3709 return self._parse_column() 3710 3711 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3712 if not skip_having_token and not self._match(TokenType.HAVING): 3713 return None 3714 return self.expression(exp.Having, this=self._parse_conjunction()) 3715 3716 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3717 if not self._match(TokenType.QUALIFY): 3718 return None 3719 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3720 3721 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3722 if skip_start_token: 3723 start = None 3724 elif self._match(TokenType.START_WITH): 3725 start = self._parse_conjunction() 3726 else: 3727 return None 3728 3729 self._match(TokenType.CONNECT_BY) 3730 nocycle = self._match_text_seq("NOCYCLE") 3731 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3732 exp.Prior, this=self._parse_bitwise() 3733 ) 3734 connect = self._parse_conjunction() 3735 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3736 3737 if not start and self._match(TokenType.START_WITH): 3738 start = self._parse_conjunction() 3739 3740 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3741 3742 def _parse_name_as_expression(self) -> exp.Alias: 3743 return self.expression( 3744 exp.Alias, 3745 alias=self._parse_id_var(any_token=True), 3746 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3747 ) 3748 3749 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3750 if self._match_text_seq("INTERPOLATE"): 3751 return self._parse_wrapped_csv(self._parse_name_as_expression) 3752 return None 3753 3754 def _parse_order( 3755 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3756 ) -> t.Optional[exp.Expression]: 3757 siblings = None 3758 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3759 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3760 return this 3761 3762 siblings = True 3763 3764 return self.expression( 3765 exp.Order, 3766 this=this, 3767 expressions=self._parse_csv(self._parse_ordered), 3768 interpolate=self._parse_interpolate(), 3769 siblings=siblings, 3770 ) 3771 3772 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3773 if not self._match(token): 3774 return None 3775 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3776 3777 def _parse_ordered( 3778 self, parse_method: t.Optional[t.Callable] = None 3779 ) -> t.Optional[exp.Ordered]: 3780 this = parse_method() if parse_method else self._parse_conjunction() 3781 if not this: 3782 return None 3783 3784 asc = self._match(TokenType.ASC) 3785 desc = self._match(TokenType.DESC) or (asc and False) 3786 3787 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3788 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3789 3790 nulls_first = is_nulls_first or False 3791 explicitly_null_ordered = is_nulls_first or is_nulls_last 3792 3793 if ( 3794 not explicitly_null_ordered 3795 and ( 3796 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3797 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3798 ) 3799 and self.dialect.NULL_ORDERING != "nulls_are_last" 3800 ): 3801 nulls_first = True 3802 3803 if self._match_text_seq("WITH", "FILL"): 3804 with_fill = self.expression( 3805 exp.WithFill, 3806 **{ # type: ignore 3807 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3808 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3809 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3810 }, 3811 ) 3812 else: 3813 with_fill = None 3814 3815 return self.expression( 3816 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3817 ) 3818 3819 def _parse_limit( 3820 self, 3821 this: t.Optional[exp.Expression] = None, 3822 top: bool = False, 3823 skip_limit_token: bool = False, 3824 ) -> t.Optional[exp.Expression]: 3825 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3826 comments = self._prev_comments 3827 if top: 3828 limit_paren = self._match(TokenType.L_PAREN) 3829 expression = self._parse_term() if limit_paren else self._parse_number() 3830 3831 if limit_paren: 3832 self._match_r_paren() 3833 else: 3834 expression = self._parse_term() 3835 3836 if self._match(TokenType.COMMA): 3837 offset = expression 3838 expression = self._parse_term() 3839 else: 3840 offset = None 3841 3842 limit_exp = self.expression( 3843 exp.Limit, 3844 this=this, 3845 expression=expression, 3846 offset=offset, 3847 comments=comments, 3848 expressions=self._parse_limit_by(), 3849 ) 3850 3851 return limit_exp 3852 3853 if self._match(TokenType.FETCH): 3854 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3855 direction = self._prev.text.upper() if direction else "FIRST" 3856 3857 count = self._parse_field(tokens=self.FETCH_TOKENS) 3858 percent = self._match(TokenType.PERCENT) 3859 3860 self._match_set((TokenType.ROW, TokenType.ROWS)) 3861 3862 only = self._match_text_seq("ONLY") 3863 with_ties = self._match_text_seq("WITH", "TIES") 3864 3865 if only and with_ties: 3866 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3867 3868 return self.expression( 3869 exp.Fetch, 3870 direction=direction, 3871 count=count, 3872 percent=percent, 3873 with_ties=with_ties, 3874 ) 3875 3876 return this 3877 3878 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3879 if not self._match(TokenType.OFFSET): 3880 return this 3881 3882 count = self._parse_term() 3883 self._match_set((TokenType.ROW, TokenType.ROWS)) 3884 3885 return self.expression( 3886 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3887 ) 3888 3889 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3890 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3891 3892 def _parse_locks(self) -> t.List[exp.Lock]: 3893 locks = [] 3894 while True: 3895 if self._match_text_seq("FOR", "UPDATE"): 3896 update = True 3897 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3898 "LOCK", "IN", "SHARE", "MODE" 3899 ): 3900 update = False 3901 else: 3902 break 3903 3904 expressions = None 3905 if self._match_text_seq("OF"): 3906 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3907 3908 wait: t.Optional[bool | exp.Expression] = None 3909 if self._match_text_seq("NOWAIT"): 3910 wait = True 3911 elif self._match_text_seq("WAIT"): 3912 wait = self._parse_primary() 3913 elif self._match_text_seq("SKIP", "LOCKED"): 3914 wait = False 3915 3916 locks.append( 3917 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3918 ) 3919 3920 return locks 3921 3922 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3923 while this and self._match_set(self.SET_OPERATIONS): 3924 token_type = self._prev.token_type 3925 3926 if token_type == TokenType.UNION: 3927 operation = exp.Union 3928 elif token_type == TokenType.EXCEPT: 3929 operation = exp.Except 3930 else: 3931 operation = exp.Intersect 3932 3933 comments = self._prev.comments 3934 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3935 by_name = self._match_text_seq("BY", "NAME") 3936 expression = self._parse_select(nested=True, parse_set_operation=False) 3937 3938 this = self.expression( 3939 operation, 3940 comments=comments, 3941 this=this, 3942 distinct=distinct, 3943 by_name=by_name, 3944 expression=expression, 3945 ) 3946 3947 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3948 expression = this.expression 3949 3950 if expression: 3951 for arg in self.UNION_MODIFIERS: 3952 expr = expression.args.get(arg) 3953 if expr: 3954 this.set(arg, expr.pop()) 3955 3956 return this 3957 3958 def _parse_expression(self) -> t.Optional[exp.Expression]: 3959 return self._parse_alias(self._parse_conjunction()) 3960 3961 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3962 this = self._parse_equality() 3963 3964 if self._match(TokenType.COLON_EQ): 3965 this = self.expression( 3966 exp.PropertyEQ, 3967 this=this, 3968 comments=self._prev_comments, 3969 expression=self._parse_conjunction(), 3970 ) 3971 3972 while self._match_set(self.CONJUNCTION): 3973 this = self.expression( 3974 self.CONJUNCTION[self._prev.token_type], 3975 this=this, 3976 comments=self._prev_comments, 3977 expression=self._parse_equality(), 3978 ) 3979 return this 3980 3981 def _parse_equality(self) -> t.Optional[exp.Expression]: 3982 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3983 3984 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3985 return self._parse_tokens(self._parse_range, self.COMPARISON) 3986 3987 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3988 this = this or self._parse_bitwise() 3989 negate = self._match(TokenType.NOT) 3990 3991 if self._match_set(self.RANGE_PARSERS): 3992 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3993 if not expression: 3994 return this 3995 3996 this = expression 3997 elif self._match(TokenType.ISNULL): 3998 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3999 4000 # Postgres supports ISNULL and NOTNULL for conditions. 4001 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4002 if self._match(TokenType.NOTNULL): 4003 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4004 this = self.expression(exp.Not, this=this) 4005 4006 if negate: 4007 this = self.expression(exp.Not, this=this) 4008 4009 if self._match(TokenType.IS): 4010 this = self._parse_is(this) 4011 4012 return this 4013 4014 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4015 index = self._index - 1 4016 negate = self._match(TokenType.NOT) 4017 4018 if self._match_text_seq("DISTINCT", "FROM"): 4019 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4020 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4021 4022 expression = self._parse_null() or self._parse_boolean() 4023 if not expression: 4024 self._retreat(index) 4025 return None 4026 4027 this = self.expression(exp.Is, this=this, expression=expression) 4028 return self.expression(exp.Not, this=this) if negate else this 4029 4030 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4031 unnest = self._parse_unnest(with_alias=False) 4032 if unnest: 4033 this = self.expression(exp.In, this=this, unnest=unnest) 4034 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4035 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4036 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4037 4038 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4039 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4040 else: 4041 this = self.expression(exp.In, this=this, expressions=expressions) 4042 4043 if matched_l_paren: 4044 self._match_r_paren(this) 4045 elif not self._match(TokenType.R_BRACKET, expression=this): 4046 self.raise_error("Expecting ]") 4047 else: 4048 this = self.expression(exp.In, this=this, field=self._parse_field()) 4049 4050 return this 4051 4052 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4053 low = self._parse_bitwise() 4054 self._match(TokenType.AND) 4055 high = self._parse_bitwise() 4056 return self.expression(exp.Between, this=this, low=low, high=high) 4057 4058 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4059 if not self._match(TokenType.ESCAPE): 4060 return this 4061 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4062 4063 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4064 index = self._index 4065 4066 if not self._match(TokenType.INTERVAL) and match_interval: 4067 return None 4068 4069 if self._match(TokenType.STRING, advance=False): 4070 this = self._parse_primary() 4071 else: 4072 this = self._parse_term() 4073 4074 if not this or ( 4075 isinstance(this, exp.Column) 4076 and not this.table 4077 and not this.this.quoted 4078 and this.name.upper() == "IS" 4079 ): 4080 self._retreat(index) 4081 return None 4082 4083 unit = self._parse_function() or ( 4084 not self._match(TokenType.ALIAS, advance=False) 4085 and self._parse_var(any_token=True, upper=True) 4086 ) 4087 4088 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4089 # each INTERVAL expression into this canonical form so it's easy to transpile 4090 if this and this.is_number: 4091 this = exp.Literal.string(this.name) 4092 elif this and this.is_string: 4093 parts = this.name.split() 4094 4095 if len(parts) == 2: 4096 if unit: 4097 # This is not actually a unit, it's something else (e.g. a "window side") 4098 unit = None 4099 self._retreat(self._index - 1) 4100 4101 this = exp.Literal.string(parts[0]) 4102 unit = self.expression(exp.Var, this=parts[1].upper()) 4103 4104 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4105 unit = self.expression( 4106 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4107 ) 4108 4109 interval = self.expression(exp.Interval, this=this, unit=unit) 4110 4111 index = self._index 4112 self._match(TokenType.PLUS) 4113 4114 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4115 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4116 return self.expression( 4117 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4118 ) 4119 4120 self._retreat(index) 4121 return interval 4122 4123 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4124 this = self._parse_term() 4125 4126 while True: 4127 if self._match_set(self.BITWISE): 4128 this = self.expression( 4129 self.BITWISE[self._prev.token_type], 4130 this=this, 4131 expression=self._parse_term(), 4132 ) 4133 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4134 this = self.expression( 4135 exp.DPipe, 4136 this=this, 4137 expression=self._parse_term(), 4138 safe=not self.dialect.STRICT_STRING_CONCAT, 4139 ) 4140 elif self._match(TokenType.DQMARK): 4141 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4142 elif self._match_pair(TokenType.LT, TokenType.LT): 4143 this = self.expression( 4144 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4145 ) 4146 elif self._match_pair(TokenType.GT, TokenType.GT): 4147 this = self.expression( 4148 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4149 ) 4150 else: 4151 break 4152 4153 return this 4154 4155 def _parse_term(self) -> t.Optional[exp.Expression]: 4156 return self._parse_tokens(self._parse_factor, self.TERM) 4157 4158 def _parse_factor(self) -> t.Optional[exp.Expression]: 4159 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4160 this = parse_method() 4161 4162 while self._match_set(self.FACTOR): 4163 this = self.expression( 4164 self.FACTOR[self._prev.token_type], 4165 this=this, 4166 comments=self._prev_comments, 4167 expression=parse_method(), 4168 ) 4169 if isinstance(this, exp.Div): 4170 this.args["typed"] = self.dialect.TYPED_DIVISION 4171 this.args["safe"] = self.dialect.SAFE_DIVISION 4172 4173 return this 4174 4175 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4176 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4177 4178 def _parse_unary(self) -> t.Optional[exp.Expression]: 4179 if self._match_set(self.UNARY_PARSERS): 4180 return self.UNARY_PARSERS[self._prev.token_type](self) 4181 return self._parse_at_time_zone(self._parse_type()) 4182 4183 def _parse_type( 4184 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4185 ) -> t.Optional[exp.Expression]: 4186 interval = parse_interval and self._parse_interval() 4187 if interval: 4188 return interval 4189 4190 index = self._index 4191 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4192 4193 if data_type: 4194 index2 = self._index 4195 this = self._parse_primary() 4196 4197 if isinstance(this, exp.Literal): 4198 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4199 if parser: 4200 return parser(self, this, data_type) 4201 4202 return self.expression(exp.Cast, this=this, to=data_type) 4203 4204 if data_type.expressions: 4205 self._retreat(index2) 4206 return self._parse_column_ops(data_type) 4207 4208 self._retreat(index) 4209 4210 if fallback_to_identifier: 4211 return self._parse_id_var() 4212 4213 this = self._parse_column() 4214 return this and self._parse_column_ops(this) 4215 4216 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4217 this = self._parse_type() 4218 if not this: 4219 return None 4220 4221 if isinstance(this, exp.Column) and not this.table: 4222 this = exp.var(this.name.upper()) 4223 4224 return self.expression( 4225 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4226 ) 4227 4228 def _parse_types( 4229 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4230 ) -> t.Optional[exp.Expression]: 4231 index = self._index 4232 4233 this: t.Optional[exp.Expression] = None 4234 prefix = self._match_text_seq("SYSUDTLIB", ".") 4235 4236 if not self._match_set(self.TYPE_TOKENS): 4237 identifier = allow_identifiers and self._parse_id_var( 4238 any_token=False, tokens=(TokenType.VAR,) 4239 ) 4240 if identifier: 4241 tokens = self.dialect.tokenize(identifier.name) 4242 4243 if len(tokens) != 1: 4244 self.raise_error("Unexpected identifier", self._prev) 4245 4246 if tokens[0].token_type in self.TYPE_TOKENS: 4247 self._prev = tokens[0] 4248 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4249 type_name = identifier.name 4250 4251 while self._match(TokenType.DOT): 4252 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4253 4254 this = exp.DataType.build(type_name, udt=True) 4255 else: 4256 self._retreat(self._index - 1) 4257 return None 4258 else: 4259 return None 4260 4261 type_token = self._prev.token_type 4262 4263 if type_token == TokenType.PSEUDO_TYPE: 4264 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4265 4266 if type_token == TokenType.OBJECT_IDENTIFIER: 4267 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4268 4269 nested = type_token in self.NESTED_TYPE_TOKENS 4270 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4271 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4272 expressions = None 4273 maybe_func = False 4274 4275 if self._match(TokenType.L_PAREN): 4276 if is_struct: 4277 expressions = self._parse_csv(self._parse_struct_types) 4278 elif nested: 4279 expressions = self._parse_csv( 4280 lambda: self._parse_types( 4281 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4282 ) 4283 ) 4284 elif type_token in self.ENUM_TYPE_TOKENS: 4285 expressions = self._parse_csv(self._parse_equality) 4286 elif is_aggregate: 4287 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4288 any_token=False, tokens=(TokenType.VAR,) 4289 ) 4290 if not func_or_ident or not self._match(TokenType.COMMA): 4291 return None 4292 expressions = self._parse_csv( 4293 lambda: self._parse_types( 4294 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4295 ) 4296 ) 4297 expressions.insert(0, func_or_ident) 4298 else: 4299 expressions = self._parse_csv(self._parse_type_size) 4300 4301 if not expressions or not self._match(TokenType.R_PAREN): 4302 self._retreat(index) 4303 return None 4304 4305 maybe_func = True 4306 4307 values: t.Optional[t.List[exp.Expression]] = None 4308 4309 if nested and self._match(TokenType.LT): 4310 if is_struct: 4311 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4312 else: 4313 expressions = self._parse_csv( 4314 lambda: self._parse_types( 4315 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4316 ) 4317 ) 4318 4319 if not self._match(TokenType.GT): 4320 self.raise_error("Expecting >") 4321 4322 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4323 values = self._parse_csv(self._parse_conjunction) 4324 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4325 4326 if type_token in self.TIMESTAMPS: 4327 if self._match_text_seq("WITH", "TIME", "ZONE"): 4328 maybe_func = False 4329 tz_type = ( 4330 exp.DataType.Type.TIMETZ 4331 if type_token in self.TIMES 4332 else exp.DataType.Type.TIMESTAMPTZ 4333 ) 4334 this = exp.DataType(this=tz_type, expressions=expressions) 4335 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4336 maybe_func = False 4337 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4338 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4339 maybe_func = False 4340 elif type_token == TokenType.INTERVAL: 4341 unit = self._parse_var(upper=True) 4342 if unit: 4343 if self._match_text_seq("TO"): 4344 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4345 4346 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4347 else: 4348 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4349 4350 if maybe_func and check_func: 4351 index2 = self._index 4352 peek = self._parse_string() 4353 4354 if not peek: 4355 self._retreat(index) 4356 return None 4357 4358 self._retreat(index2) 4359 4360 if not this: 4361 if self._match_text_seq("UNSIGNED"): 4362 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4363 if not unsigned_type_token: 4364 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4365 4366 type_token = unsigned_type_token or type_token 4367 4368 this = exp.DataType( 4369 this=exp.DataType.Type[type_token.value], 4370 expressions=expressions, 4371 nested=nested, 4372 values=values, 4373 prefix=prefix, 4374 ) 4375 elif expressions: 4376 this.set("expressions", expressions) 4377 4378 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4379 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4380 4381 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4382 converter = self.TYPE_CONVERTER.get(this.this) 4383 if converter: 4384 this = converter(t.cast(exp.DataType, this)) 4385 4386 return this 4387 4388 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4389 index = self._index 4390 this = ( 4391 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4392 or self._parse_id_var() 4393 ) 4394 self._match(TokenType.COLON) 4395 column_def = self._parse_column_def(this) 4396 4397 if type_required and ( 4398 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4399 ): 4400 self._retreat(index) 4401 return self._parse_types() 4402 4403 return column_def 4404 4405 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4406 if not self._match_text_seq("AT", "TIME", "ZONE"): 4407 return this 4408 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4409 4410 def _parse_column(self) -> t.Optional[exp.Expression]: 4411 this = self._parse_column_reference() 4412 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4413 4414 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4415 this = self._parse_field() 4416 if ( 4417 not this 4418 and self._match(TokenType.VALUES, advance=False) 4419 and self.VALUES_FOLLOWED_BY_PAREN 4420 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4421 ): 4422 this = self._parse_id_var() 4423 4424 if isinstance(this, exp.Identifier): 4425 # We bubble up comments from the Identifier to the Column 4426 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4427 4428 return this 4429 4430 def _parse_colon_as_json_extract( 4431 self, this: t.Optional[exp.Expression] 4432 ) -> t.Optional[exp.Expression]: 4433 casts = [] 4434 json_path = [] 4435 4436 while self._match(TokenType.COLON): 4437 start_index = self._index 4438 path = self._parse_column_ops(self._parse_field(any_token=True)) 4439 4440 # The cast :: operator has a lower precedence than the extraction operator :, so 4441 # we rearrange the AST appropriately to avoid casting the JSON path 4442 while isinstance(path, exp.Cast): 4443 casts.append(path.to) 4444 path = path.this 4445 4446 if casts: 4447 dcolon_offset = next( 4448 i 4449 for i, t in enumerate(self._tokens[start_index:]) 4450 if t.token_type == TokenType.DCOLON 4451 ) 4452 end_token = self._tokens[start_index + dcolon_offset - 1] 4453 else: 4454 end_token = self._prev 4455 4456 if path: 4457 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4458 4459 if json_path: 4460 this = self.expression( 4461 exp.JSONExtract, 4462 this=this, 4463 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4464 ) 4465 4466 while casts: 4467 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4468 4469 return this 4470 4471 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4472 this = self._parse_bracket(this) 4473 4474 while self._match_set(self.COLUMN_OPERATORS): 4475 op_token = self._prev.token_type 4476 op = self.COLUMN_OPERATORS.get(op_token) 4477 4478 if op_token == TokenType.DCOLON: 4479 field = self._parse_types() 4480 if not field: 4481 self.raise_error("Expected type") 4482 elif op and self._curr: 4483 field = self._parse_column_reference() 4484 else: 4485 field = self._parse_field(any_token=True, anonymous_func=True) 4486 4487 if isinstance(field, exp.Func) and this: 4488 # bigquery allows function calls like x.y.count(...) 4489 # SAFE.SUBSTR(...) 4490 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4491 this = exp.replace_tree( 4492 this, 4493 lambda n: ( 4494 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4495 if n.table 4496 else n.this 4497 ) 4498 if isinstance(n, exp.Column) 4499 else n, 4500 ) 4501 4502 if op: 4503 this = op(self, this, field) 4504 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4505 this = self.expression( 4506 exp.Column, 4507 this=field, 4508 table=this.this, 4509 db=this.args.get("table"), 4510 catalog=this.args.get("db"), 4511 ) 4512 else: 4513 this = self.expression(exp.Dot, this=this, expression=field) 4514 4515 this = self._parse_bracket(this) 4516 4517 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4518 4519 def _parse_primary(self) -> t.Optional[exp.Expression]: 4520 if self._match_set(self.PRIMARY_PARSERS): 4521 token_type = self._prev.token_type 4522 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4523 4524 if token_type == TokenType.STRING: 4525 expressions = [primary] 4526 while self._match(TokenType.STRING): 4527 expressions.append(exp.Literal.string(self._prev.text)) 4528 4529 if len(expressions) > 1: 4530 return self.expression(exp.Concat, expressions=expressions) 4531 4532 return primary 4533 4534 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4535 return exp.Literal.number(f"0.{self._prev.text}") 4536 4537 if self._match(TokenType.L_PAREN): 4538 comments = self._prev_comments 4539 query = self._parse_select() 4540 4541 if query: 4542 expressions = [query] 4543 else: 4544 expressions = self._parse_expressions() 4545 4546 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4547 4548 if not this and self._match(TokenType.R_PAREN, advance=False): 4549 this = self.expression(exp.Tuple) 4550 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4551 this = self._parse_subquery(this=this, parse_alias=False) 4552 elif isinstance(this, exp.Subquery): 4553 this = self._parse_subquery( 4554 this=self._parse_set_operations(this), parse_alias=False 4555 ) 4556 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4557 this = self.expression(exp.Tuple, expressions=expressions) 4558 else: 4559 this = self.expression(exp.Paren, this=this) 4560 4561 if this: 4562 this.add_comments(comments) 4563 4564 self._match_r_paren(expression=this) 4565 return this 4566 4567 return None 4568 4569 def _parse_field( 4570 self, 4571 any_token: bool = False, 4572 tokens: t.Optional[t.Collection[TokenType]] = None, 4573 anonymous_func: bool = False, 4574 ) -> t.Optional[exp.Expression]: 4575 if anonymous_func: 4576 field = ( 4577 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4578 or self._parse_primary() 4579 ) 4580 else: 4581 field = self._parse_primary() or self._parse_function( 4582 anonymous=anonymous_func, any_token=any_token 4583 ) 4584 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4585 4586 def _parse_function( 4587 self, 4588 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4589 anonymous: bool = False, 4590 optional_parens: bool = True, 4591 any_token: bool = False, 4592 ) -> t.Optional[exp.Expression]: 4593 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4594 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4595 fn_syntax = False 4596 if ( 4597 self._match(TokenType.L_BRACE, advance=False) 4598 and self._next 4599 and self._next.text.upper() == "FN" 4600 ): 4601 self._advance(2) 4602 fn_syntax = True 4603 4604 func = self._parse_function_call( 4605 functions=functions, 4606 anonymous=anonymous, 4607 optional_parens=optional_parens, 4608 any_token=any_token, 4609 ) 4610 4611 if fn_syntax: 4612 self._match(TokenType.R_BRACE) 4613 4614 return func 4615 4616 def _parse_function_call( 4617 self, 4618 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4619 anonymous: bool = False, 4620 optional_parens: bool = True, 4621 any_token: bool = False, 4622 ) -> t.Optional[exp.Expression]: 4623 if not self._curr: 4624 return None 4625 4626 comments = self._curr.comments 4627 token_type = self._curr.token_type 4628 this = self._curr.text 4629 upper = this.upper() 4630 4631 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4632 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4633 self._advance() 4634 return self._parse_window(parser(self)) 4635 4636 if not self._next or self._next.token_type != TokenType.L_PAREN: 4637 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4638 self._advance() 4639 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4640 4641 return None 4642 4643 if any_token: 4644 if token_type in self.RESERVED_TOKENS: 4645 return None 4646 elif token_type not in self.FUNC_TOKENS: 4647 return None 4648 4649 self._advance(2) 4650 4651 parser = self.FUNCTION_PARSERS.get(upper) 4652 if parser and not anonymous: 4653 this = parser(self) 4654 else: 4655 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4656 4657 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4658 this = self.expression(subquery_predicate, this=self._parse_select()) 4659 self._match_r_paren() 4660 return this 4661 4662 if functions is None: 4663 functions = self.FUNCTIONS 4664 4665 function = functions.get(upper) 4666 4667 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4668 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4669 4670 if alias: 4671 args = self._kv_to_prop_eq(args) 4672 4673 if function and not anonymous: 4674 if "dialect" in function.__code__.co_varnames: 4675 func = function(args, dialect=self.dialect) 4676 else: 4677 func = function(args) 4678 4679 func = self.validate_expression(func, args) 4680 if not self.dialect.NORMALIZE_FUNCTIONS: 4681 func.meta["name"] = this 4682 4683 this = func 4684 else: 4685 if token_type == TokenType.IDENTIFIER: 4686 this = exp.Identifier(this=this, quoted=True) 4687 this = self.expression(exp.Anonymous, this=this, expressions=args) 4688 4689 if isinstance(this, exp.Expression): 4690 this.add_comments(comments) 4691 4692 self._match_r_paren(this) 4693 return self._parse_window(this) 4694 4695 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4696 transformed = [] 4697 4698 for e in expressions: 4699 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4700 if isinstance(e, exp.Alias): 4701 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4702 4703 if not isinstance(e, exp.PropertyEQ): 4704 e = self.expression( 4705 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4706 ) 4707 4708 if isinstance(e.this, exp.Column): 4709 e.this.replace(e.this.this) 4710 4711 transformed.append(e) 4712 4713 return transformed 4714 4715 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4716 return self._parse_column_def(self._parse_id_var()) 4717 4718 def _parse_user_defined_function( 4719 self, kind: t.Optional[TokenType] = None 4720 ) -> t.Optional[exp.Expression]: 4721 this = self._parse_id_var() 4722 4723 while self._match(TokenType.DOT): 4724 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4725 4726 if not self._match(TokenType.L_PAREN): 4727 return this 4728 4729 expressions = self._parse_csv(self._parse_function_parameter) 4730 self._match_r_paren() 4731 return self.expression( 4732 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4733 ) 4734 4735 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4736 literal = self._parse_primary() 4737 if literal: 4738 return self.expression(exp.Introducer, this=token.text, expression=literal) 4739 4740 return self.expression(exp.Identifier, this=token.text) 4741 4742 def _parse_session_parameter(self) -> exp.SessionParameter: 4743 kind = None 4744 this = self._parse_id_var() or self._parse_primary() 4745 4746 if this and self._match(TokenType.DOT): 4747 kind = this.name 4748 this = self._parse_var() or self._parse_primary() 4749 4750 return self.expression(exp.SessionParameter, this=this, kind=kind) 4751 4752 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4753 return self._parse_id_var() 4754 4755 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4756 index = self._index 4757 4758 if self._match(TokenType.L_PAREN): 4759 expressions = t.cast( 4760 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4761 ) 4762 4763 if not self._match(TokenType.R_PAREN): 4764 self._retreat(index) 4765 else: 4766 expressions = [self._parse_lambda_arg()] 4767 4768 if self._match_set(self.LAMBDAS): 4769 return self.LAMBDAS[self._prev.token_type](self, expressions) 4770 4771 self._retreat(index) 4772 4773 this: t.Optional[exp.Expression] 4774 4775 if self._match(TokenType.DISTINCT): 4776 this = self.expression( 4777 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4778 ) 4779 else: 4780 this = self._parse_select_or_expression(alias=alias) 4781 4782 return self._parse_limit( 4783 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4784 ) 4785 4786 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4787 index = self._index 4788 if not self._match(TokenType.L_PAREN): 4789 return this 4790 4791 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4792 # expr can be of both types 4793 if self._match_set(self.SELECT_START_TOKENS): 4794 self._retreat(index) 4795 return this 4796 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4797 self._match_r_paren() 4798 return self.expression(exp.Schema, this=this, expressions=args) 4799 4800 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4801 return self._parse_column_def(self._parse_field(any_token=True)) 4802 4803 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4804 # column defs are not really columns, they're identifiers 4805 if isinstance(this, exp.Column): 4806 this = this.this 4807 4808 kind = self._parse_types(schema=True) 4809 4810 if self._match_text_seq("FOR", "ORDINALITY"): 4811 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4812 4813 constraints: t.List[exp.Expression] = [] 4814 4815 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4816 ("ALIAS", "MATERIALIZED") 4817 ): 4818 persisted = self._prev.text.upper() == "MATERIALIZED" 4819 constraints.append( 4820 self.expression( 4821 exp.ComputedColumnConstraint, 4822 this=self._parse_conjunction(), 4823 persisted=persisted or self._match_text_seq("PERSISTED"), 4824 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4825 ) 4826 ) 4827 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4828 self._match(TokenType.ALIAS) 4829 constraints.append( 4830 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4831 ) 4832 4833 while True: 4834 constraint = self._parse_column_constraint() 4835 if not constraint: 4836 break 4837 constraints.append(constraint) 4838 4839 if not kind and not constraints: 4840 return this 4841 4842 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4843 4844 def _parse_auto_increment( 4845 self, 4846 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4847 start = None 4848 increment = None 4849 4850 if self._match(TokenType.L_PAREN, advance=False): 4851 args = self._parse_wrapped_csv(self._parse_bitwise) 4852 start = seq_get(args, 0) 4853 increment = seq_get(args, 1) 4854 elif self._match_text_seq("START"): 4855 start = self._parse_bitwise() 4856 self._match_text_seq("INCREMENT") 4857 increment = self._parse_bitwise() 4858 4859 if start and increment: 4860 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4861 4862 return exp.AutoIncrementColumnConstraint() 4863 4864 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4865 if not self._match_text_seq("REFRESH"): 4866 self._retreat(self._index - 1) 4867 return None 4868 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4869 4870 def _parse_compress(self) -> exp.CompressColumnConstraint: 4871 if self._match(TokenType.L_PAREN, advance=False): 4872 return self.expression( 4873 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4874 ) 4875 4876 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4877 4878 def _parse_generated_as_identity( 4879 self, 4880 ) -> ( 4881 exp.GeneratedAsIdentityColumnConstraint 4882 | exp.ComputedColumnConstraint 4883 | exp.GeneratedAsRowColumnConstraint 4884 ): 4885 if self._match_text_seq("BY", "DEFAULT"): 4886 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4887 this = self.expression( 4888 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4889 ) 4890 else: 4891 self._match_text_seq("ALWAYS") 4892 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4893 4894 self._match(TokenType.ALIAS) 4895 4896 if self._match_text_seq("ROW"): 4897 start = self._match_text_seq("START") 4898 if not start: 4899 self._match(TokenType.END) 4900 hidden = self._match_text_seq("HIDDEN") 4901 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4902 4903 identity = self._match_text_seq("IDENTITY") 4904 4905 if self._match(TokenType.L_PAREN): 4906 if self._match(TokenType.START_WITH): 4907 this.set("start", self._parse_bitwise()) 4908 if self._match_text_seq("INCREMENT", "BY"): 4909 this.set("increment", self._parse_bitwise()) 4910 if self._match_text_seq("MINVALUE"): 4911 this.set("minvalue", self._parse_bitwise()) 4912 if self._match_text_seq("MAXVALUE"): 4913 this.set("maxvalue", self._parse_bitwise()) 4914 4915 if self._match_text_seq("CYCLE"): 4916 this.set("cycle", True) 4917 elif self._match_text_seq("NO", "CYCLE"): 4918 this.set("cycle", False) 4919 4920 if not identity: 4921 this.set("expression", self._parse_range()) 4922 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4923 args = self._parse_csv(self._parse_bitwise) 4924 this.set("start", seq_get(args, 0)) 4925 this.set("increment", seq_get(args, 1)) 4926 4927 self._match_r_paren() 4928 4929 return this 4930 4931 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4932 self._match_text_seq("LENGTH") 4933 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4934 4935 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4936 if self._match_text_seq("NULL"): 4937 return self.expression(exp.NotNullColumnConstraint) 4938 if self._match_text_seq("CASESPECIFIC"): 4939 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4940 if self._match_text_seq("FOR", "REPLICATION"): 4941 return self.expression(exp.NotForReplicationColumnConstraint) 4942 return None 4943 4944 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4945 if self._match(TokenType.CONSTRAINT): 4946 this = self._parse_id_var() 4947 else: 4948 this = None 4949 4950 if self._match_texts(self.CONSTRAINT_PARSERS): 4951 return self.expression( 4952 exp.ColumnConstraint, 4953 this=this, 4954 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4955 ) 4956 4957 return this 4958 4959 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4960 if not self._match(TokenType.CONSTRAINT): 4961 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4962 4963 return self.expression( 4964 exp.Constraint, 4965 this=self._parse_id_var(), 4966 expressions=self._parse_unnamed_constraints(), 4967 ) 4968 4969 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4970 constraints = [] 4971 while True: 4972 constraint = self._parse_unnamed_constraint() or self._parse_function() 4973 if not constraint: 4974 break 4975 constraints.append(constraint) 4976 4977 return constraints 4978 4979 def _parse_unnamed_constraint( 4980 self, constraints: t.Optional[t.Collection[str]] = None 4981 ) -> t.Optional[exp.Expression]: 4982 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4983 constraints or self.CONSTRAINT_PARSERS 4984 ): 4985 return None 4986 4987 constraint = self._prev.text.upper() 4988 if constraint not in self.CONSTRAINT_PARSERS: 4989 self.raise_error(f"No parser found for schema constraint {constraint}.") 4990 4991 return self.CONSTRAINT_PARSERS[constraint](self) 4992 4993 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4994 self._match_text_seq("KEY") 4995 return self.expression( 4996 exp.UniqueColumnConstraint, 4997 this=self._parse_schema(self._parse_id_var(any_token=False)), 4998 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4999 on_conflict=self._parse_on_conflict(), 5000 ) 5001 5002 def _parse_key_constraint_options(self) -> t.List[str]: 5003 options = [] 5004 while True: 5005 if not self._curr: 5006 break 5007 5008 if self._match(TokenType.ON): 5009 action = None 5010 on = self._advance_any() and self._prev.text 5011 5012 if self._match_text_seq("NO", "ACTION"): 5013 action = "NO ACTION" 5014 elif self._match_text_seq("CASCADE"): 5015 action = "CASCADE" 5016 elif self._match_text_seq("RESTRICT"): 5017 action = "RESTRICT" 5018 elif self._match_pair(TokenType.SET, TokenType.NULL): 5019 action = "SET NULL" 5020 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5021 action = "SET DEFAULT" 5022 else: 5023 self.raise_error("Invalid key constraint") 5024 5025 options.append(f"ON {on} {action}") 5026 elif self._match_text_seq("NOT", "ENFORCED"): 5027 options.append("NOT ENFORCED") 5028 elif self._match_text_seq("DEFERRABLE"): 5029 options.append("DEFERRABLE") 5030 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5031 options.append("INITIALLY DEFERRED") 5032 elif self._match_text_seq("NORELY"): 5033 options.append("NORELY") 5034 elif self._match_text_seq("MATCH", "FULL"): 5035 options.append("MATCH FULL") 5036 else: 5037 break 5038 5039 return options 5040 5041 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5042 if match and not self._match(TokenType.REFERENCES): 5043 return None 5044 5045 expressions = None 5046 this = self._parse_table(schema=True) 5047 options = self._parse_key_constraint_options() 5048 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5049 5050 def _parse_foreign_key(self) -> exp.ForeignKey: 5051 expressions = self._parse_wrapped_id_vars() 5052 reference = self._parse_references() 5053 options = {} 5054 5055 while self._match(TokenType.ON): 5056 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5057 self.raise_error("Expected DELETE or UPDATE") 5058 5059 kind = self._prev.text.lower() 5060 5061 if self._match_text_seq("NO", "ACTION"): 5062 action = "NO ACTION" 5063 elif self._match(TokenType.SET): 5064 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5065 action = "SET " + self._prev.text.upper() 5066 else: 5067 self._advance() 5068 action = self._prev.text.upper() 5069 5070 options[kind] = action 5071 5072 return self.expression( 5073 exp.ForeignKey, 5074 expressions=expressions, 5075 reference=reference, 5076 **options, # type: ignore 5077 ) 5078 5079 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5080 return self._parse_field() 5081 5082 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5083 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5084 self._retreat(self._index - 1) 5085 return None 5086 5087 id_vars = self._parse_wrapped_id_vars() 5088 return self.expression( 5089 exp.PeriodForSystemTimeConstraint, 5090 this=seq_get(id_vars, 0), 5091 expression=seq_get(id_vars, 1), 5092 ) 5093 5094 def _parse_primary_key( 5095 self, wrapped_optional: bool = False, in_props: bool = False 5096 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5097 desc = ( 5098 self._match_set((TokenType.ASC, TokenType.DESC)) 5099 and self._prev.token_type == TokenType.DESC 5100 ) 5101 5102 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5103 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5104 5105 expressions = self._parse_wrapped_csv( 5106 self._parse_primary_key_part, optional=wrapped_optional 5107 ) 5108 options = self._parse_key_constraint_options() 5109 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5110 5111 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5112 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 5113 5114 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5115 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5116 return this 5117 5118 bracket_kind = self._prev.token_type 5119 expressions = self._parse_csv( 5120 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5121 ) 5122 5123 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5124 self.raise_error("Expected ]") 5125 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5126 self.raise_error("Expected }") 5127 5128 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5129 if bracket_kind == TokenType.L_BRACE: 5130 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5131 elif not this or this.name.upper() == "ARRAY": 5132 this = self.expression(exp.Array, expressions=expressions) 5133 else: 5134 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5135 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5136 5137 self._add_comments(this) 5138 return self._parse_bracket(this) 5139 5140 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5141 if self._match(TokenType.COLON): 5142 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 5143 return this 5144 5145 def _parse_case(self) -> t.Optional[exp.Expression]: 5146 ifs = [] 5147 default = None 5148 5149 comments = self._prev_comments 5150 expression = self._parse_conjunction() 5151 5152 while self._match(TokenType.WHEN): 5153 this = self._parse_conjunction() 5154 self._match(TokenType.THEN) 5155 then = self._parse_conjunction() 5156 ifs.append(self.expression(exp.If, this=this, true=then)) 5157 5158 if self._match(TokenType.ELSE): 5159 default = self._parse_conjunction() 5160 5161 if not self._match(TokenType.END): 5162 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5163 default = exp.column("interval") 5164 else: 5165 self.raise_error("Expected END after CASE", self._prev) 5166 5167 return self.expression( 5168 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5169 ) 5170 5171 def _parse_if(self) -> t.Optional[exp.Expression]: 5172 if self._match(TokenType.L_PAREN): 5173 args = self._parse_csv(self._parse_conjunction) 5174 this = self.validate_expression(exp.If.from_arg_list(args), args) 5175 self._match_r_paren() 5176 else: 5177 index = self._index - 1 5178 5179 if self.NO_PAREN_IF_COMMANDS and index == 0: 5180 return self._parse_as_command(self._prev) 5181 5182 condition = self._parse_conjunction() 5183 5184 if not condition: 5185 self._retreat(index) 5186 return None 5187 5188 self._match(TokenType.THEN) 5189 true = self._parse_conjunction() 5190 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 5191 self._match(TokenType.END) 5192 this = self.expression(exp.If, this=condition, true=true, false=false) 5193 5194 return this 5195 5196 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5197 if not self._match_text_seq("VALUE", "FOR"): 5198 self._retreat(self._index - 1) 5199 return None 5200 5201 return self.expression( 5202 exp.NextValueFor, 5203 this=self._parse_column(), 5204 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5205 ) 5206 5207 def _parse_extract(self) -> exp.Extract: 5208 this = self._parse_function() or self._parse_var() or self._parse_type() 5209 5210 if self._match(TokenType.FROM): 5211 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5212 5213 if not self._match(TokenType.COMMA): 5214 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5215 5216 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5217 5218 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5219 this = self._parse_conjunction() 5220 5221 if not self._match(TokenType.ALIAS): 5222 if self._match(TokenType.COMMA): 5223 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5224 5225 self.raise_error("Expected AS after CAST") 5226 5227 fmt = None 5228 to = self._parse_types() 5229 5230 if self._match(TokenType.FORMAT): 5231 fmt_string = self._parse_string() 5232 fmt = self._parse_at_time_zone(fmt_string) 5233 5234 if not to: 5235 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5236 if to.this in exp.DataType.TEMPORAL_TYPES: 5237 this = self.expression( 5238 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5239 this=this, 5240 format=exp.Literal.string( 5241 format_time( 5242 fmt_string.this if fmt_string else "", 5243 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5244 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5245 ) 5246 ), 5247 ) 5248 5249 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5250 this.set("zone", fmt.args["zone"]) 5251 return this 5252 elif not to: 5253 self.raise_error("Expected TYPE after CAST") 5254 elif isinstance(to, exp.Identifier): 5255 to = exp.DataType.build(to.name, udt=True) 5256 elif to.this == exp.DataType.Type.CHAR: 5257 if self._match(TokenType.CHARACTER_SET): 5258 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5259 5260 return self.expression( 5261 exp.Cast if strict else exp.TryCast, 5262 this=this, 5263 to=to, 5264 format=fmt, 5265 safe=safe, 5266 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5267 ) 5268 5269 def _parse_string_agg(self) -> exp.Expression: 5270 if self._match(TokenType.DISTINCT): 5271 args: t.List[t.Optional[exp.Expression]] = [ 5272 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5273 ] 5274 if self._match(TokenType.COMMA): 5275 args.extend(self._parse_csv(self._parse_conjunction)) 5276 else: 5277 args = self._parse_csv(self._parse_conjunction) # type: ignore 5278 5279 index = self._index 5280 if not self._match(TokenType.R_PAREN) and args: 5281 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5282 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5283 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5284 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5285 5286 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5287 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5288 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5289 if not self._match_text_seq("WITHIN", "GROUP"): 5290 self._retreat(index) 5291 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5292 5293 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5294 order = self._parse_order(this=seq_get(args, 0)) 5295 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5296 5297 def _parse_convert( 5298 self, strict: bool, safe: t.Optional[bool] = None 5299 ) -> t.Optional[exp.Expression]: 5300 this = self._parse_bitwise() 5301 5302 if self._match(TokenType.USING): 5303 to: t.Optional[exp.Expression] = self.expression( 5304 exp.CharacterSet, this=self._parse_var() 5305 ) 5306 elif self._match(TokenType.COMMA): 5307 to = self._parse_types() 5308 else: 5309 to = None 5310 5311 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5312 5313 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5314 """ 5315 There are generally two variants of the DECODE function: 5316 5317 - DECODE(bin, charset) 5318 - DECODE(expression, search, result [, search, result] ... [, default]) 5319 5320 The second variant will always be parsed into a CASE expression. Note that NULL 5321 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5322 instead of relying on pattern matching. 5323 """ 5324 args = self._parse_csv(self._parse_conjunction) 5325 5326 if len(args) < 3: 5327 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5328 5329 expression, *expressions = args 5330 if not expression: 5331 return None 5332 5333 ifs = [] 5334 for search, result in zip(expressions[::2], expressions[1::2]): 5335 if not search or not result: 5336 return None 5337 5338 if isinstance(search, exp.Literal): 5339 ifs.append( 5340 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5341 ) 5342 elif isinstance(search, exp.Null): 5343 ifs.append( 5344 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5345 ) 5346 else: 5347 cond = exp.or_( 5348 exp.EQ(this=expression.copy(), expression=search), 5349 exp.and_( 5350 exp.Is(this=expression.copy(), expression=exp.Null()), 5351 exp.Is(this=search.copy(), expression=exp.Null()), 5352 copy=False, 5353 ), 5354 copy=False, 5355 ) 5356 ifs.append(exp.If(this=cond, true=result)) 5357 5358 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5359 5360 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5361 self._match_text_seq("KEY") 5362 key = self._parse_column() 5363 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5364 self._match_text_seq("VALUE") 5365 value = self._parse_bitwise() 5366 5367 if not key and not value: 5368 return None 5369 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5370 5371 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5372 if not this or not self._match_text_seq("FORMAT", "JSON"): 5373 return this 5374 5375 return self.expression(exp.FormatJson, this=this) 5376 5377 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5378 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5379 for value in values: 5380 if self._match_text_seq(value, "ON", on): 5381 return f"{value} ON {on}" 5382 5383 return None 5384 5385 @t.overload 5386 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5387 5388 @t.overload 5389 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5390 5391 def _parse_json_object(self, agg=False): 5392 star = self._parse_star() 5393 expressions = ( 5394 [star] 5395 if star 5396 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5397 ) 5398 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5399 5400 unique_keys = None 5401 if self._match_text_seq("WITH", "UNIQUE"): 5402 unique_keys = True 5403 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5404 unique_keys = False 5405 5406 self._match_text_seq("KEYS") 5407 5408 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5409 self._parse_type() 5410 ) 5411 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5412 5413 return self.expression( 5414 exp.JSONObjectAgg if agg else exp.JSONObject, 5415 expressions=expressions, 5416 null_handling=null_handling, 5417 unique_keys=unique_keys, 5418 return_type=return_type, 5419 encoding=encoding, 5420 ) 5421 5422 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5423 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5424 if not self._match_text_seq("NESTED"): 5425 this = self._parse_id_var() 5426 kind = self._parse_types(allow_identifiers=False) 5427 nested = None 5428 else: 5429 this = None 5430 kind = None 5431 nested = True 5432 5433 path = self._match_text_seq("PATH") and self._parse_string() 5434 nested_schema = nested and self._parse_json_schema() 5435 5436 return self.expression( 5437 exp.JSONColumnDef, 5438 this=this, 5439 kind=kind, 5440 path=path, 5441 nested_schema=nested_schema, 5442 ) 5443 5444 def _parse_json_schema(self) -> exp.JSONSchema: 5445 self._match_text_seq("COLUMNS") 5446 return self.expression( 5447 exp.JSONSchema, 5448 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5449 ) 5450 5451 def _parse_json_table(self) -> exp.JSONTable: 5452 this = self._parse_format_json(self._parse_bitwise()) 5453 path = self._match(TokenType.COMMA) and self._parse_string() 5454 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5455 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5456 schema = self._parse_json_schema() 5457 5458 return exp.JSONTable( 5459 this=this, 5460 schema=schema, 5461 path=path, 5462 error_handling=error_handling, 5463 empty_handling=empty_handling, 5464 ) 5465 5466 def _parse_match_against(self) -> exp.MatchAgainst: 5467 expressions = self._parse_csv(self._parse_column) 5468 5469 self._match_text_seq(")", "AGAINST", "(") 5470 5471 this = self._parse_string() 5472 5473 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5474 modifier = "IN NATURAL LANGUAGE MODE" 5475 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5476 modifier = f"{modifier} WITH QUERY EXPANSION" 5477 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5478 modifier = "IN BOOLEAN MODE" 5479 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5480 modifier = "WITH QUERY EXPANSION" 5481 else: 5482 modifier = None 5483 5484 return self.expression( 5485 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5486 ) 5487 5488 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5489 def _parse_open_json(self) -> exp.OpenJSON: 5490 this = self._parse_bitwise() 5491 path = self._match(TokenType.COMMA) and self._parse_string() 5492 5493 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5494 this = self._parse_field(any_token=True) 5495 kind = self._parse_types() 5496 path = self._parse_string() 5497 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5498 5499 return self.expression( 5500 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5501 ) 5502 5503 expressions = None 5504 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5505 self._match_l_paren() 5506 expressions = self._parse_csv(_parse_open_json_column_def) 5507 5508 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5509 5510 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5511 args = self._parse_csv(self._parse_bitwise) 5512 5513 if self._match(TokenType.IN): 5514 return self.expression( 5515 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5516 ) 5517 5518 if haystack_first: 5519 haystack = seq_get(args, 0) 5520 needle = seq_get(args, 1) 5521 else: 5522 needle = seq_get(args, 0) 5523 haystack = seq_get(args, 1) 5524 5525 return self.expression( 5526 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5527 ) 5528 5529 def _parse_predict(self) -> exp.Predict: 5530 self._match_text_seq("MODEL") 5531 this = self._parse_table() 5532 5533 self._match(TokenType.COMMA) 5534 self._match_text_seq("TABLE") 5535 5536 return self.expression( 5537 exp.Predict, 5538 this=this, 5539 expression=self._parse_table(), 5540 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5541 ) 5542 5543 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5544 args = self._parse_csv(self._parse_table) 5545 return exp.JoinHint(this=func_name.upper(), expressions=args) 5546 5547 def _parse_substring(self) -> exp.Substring: 5548 # Postgres supports the form: substring(string [from int] [for int]) 5549 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5550 5551 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5552 5553 if self._match(TokenType.FROM): 5554 args.append(self._parse_bitwise()) 5555 if self._match(TokenType.FOR): 5556 if len(args) == 1: 5557 args.append(exp.Literal.number(1)) 5558 args.append(self._parse_bitwise()) 5559 5560 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5561 5562 def _parse_trim(self) -> exp.Trim: 5563 # https://www.w3resource.com/sql/character-functions/trim.php 5564 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5565 5566 position = None 5567 collation = None 5568 expression = None 5569 5570 if self._match_texts(self.TRIM_TYPES): 5571 position = self._prev.text.upper() 5572 5573 this = self._parse_bitwise() 5574 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5575 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5576 expression = self._parse_bitwise() 5577 5578 if invert_order: 5579 this, expression = expression, this 5580 5581 if self._match(TokenType.COLLATE): 5582 collation = self._parse_bitwise() 5583 5584 return self.expression( 5585 exp.Trim, this=this, position=position, expression=expression, collation=collation 5586 ) 5587 5588 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5589 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5590 5591 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5592 return self._parse_window(self._parse_id_var(), alias=True) 5593 5594 def _parse_respect_or_ignore_nulls( 5595 self, this: t.Optional[exp.Expression] 5596 ) -> t.Optional[exp.Expression]: 5597 if self._match_text_seq("IGNORE", "NULLS"): 5598 return self.expression(exp.IgnoreNulls, this=this) 5599 if self._match_text_seq("RESPECT", "NULLS"): 5600 return self.expression(exp.RespectNulls, this=this) 5601 return this 5602 5603 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5604 if self._match(TokenType.HAVING): 5605 self._match_texts(("MAX", "MIN")) 5606 max = self._prev.text.upper() != "MIN" 5607 return self.expression( 5608 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5609 ) 5610 5611 return this 5612 5613 def _parse_window( 5614 self, this: t.Optional[exp.Expression], alias: bool = False 5615 ) -> t.Optional[exp.Expression]: 5616 func = this 5617 comments = func.comments if isinstance(func, exp.Expression) else None 5618 5619 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5620 self._match(TokenType.WHERE) 5621 this = self.expression( 5622 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5623 ) 5624 self._match_r_paren() 5625 5626 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5627 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5628 if self._match_text_seq("WITHIN", "GROUP"): 5629 order = self._parse_wrapped(self._parse_order) 5630 this = self.expression(exp.WithinGroup, this=this, expression=order) 5631 5632 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5633 # Some dialects choose to implement and some do not. 5634 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5635 5636 # There is some code above in _parse_lambda that handles 5637 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5638 5639 # The below changes handle 5640 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5641 5642 # Oracle allows both formats 5643 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5644 # and Snowflake chose to do the same for familiarity 5645 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5646 if isinstance(this, exp.AggFunc): 5647 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5648 5649 if ignore_respect and ignore_respect is not this: 5650 ignore_respect.replace(ignore_respect.this) 5651 this = self.expression(ignore_respect.__class__, this=this) 5652 5653 this = self._parse_respect_or_ignore_nulls(this) 5654 5655 # bigquery select from window x AS (partition by ...) 5656 if alias: 5657 over = None 5658 self._match(TokenType.ALIAS) 5659 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5660 return this 5661 else: 5662 over = self._prev.text.upper() 5663 5664 if comments and isinstance(func, exp.Expression): 5665 func.pop_comments() 5666 5667 if not self._match(TokenType.L_PAREN): 5668 return self.expression( 5669 exp.Window, 5670 comments=comments, 5671 this=this, 5672 alias=self._parse_id_var(False), 5673 over=over, 5674 ) 5675 5676 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5677 5678 first = self._match(TokenType.FIRST) 5679 if self._match_text_seq("LAST"): 5680 first = False 5681 5682 partition, order = self._parse_partition_and_order() 5683 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5684 5685 if kind: 5686 self._match(TokenType.BETWEEN) 5687 start = self._parse_window_spec() 5688 self._match(TokenType.AND) 5689 end = self._parse_window_spec() 5690 5691 spec = self.expression( 5692 exp.WindowSpec, 5693 kind=kind, 5694 start=start["value"], 5695 start_side=start["side"], 5696 end=end["value"], 5697 end_side=end["side"], 5698 ) 5699 else: 5700 spec = None 5701 5702 self._match_r_paren() 5703 5704 window = self.expression( 5705 exp.Window, 5706 comments=comments, 5707 this=this, 5708 partition_by=partition, 5709 order=order, 5710 spec=spec, 5711 alias=window_alias, 5712 over=over, 5713 first=first, 5714 ) 5715 5716 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5717 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5718 return self._parse_window(window, alias=alias) 5719 5720 return window 5721 5722 def _parse_partition_and_order( 5723 self, 5724 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5725 return self._parse_partition_by(), self._parse_order() 5726 5727 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5728 self._match(TokenType.BETWEEN) 5729 5730 return { 5731 "value": ( 5732 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5733 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5734 or self._parse_bitwise() 5735 ), 5736 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5737 } 5738 5739 def _parse_alias( 5740 self, this: t.Optional[exp.Expression], explicit: bool = False 5741 ) -> t.Optional[exp.Expression]: 5742 any_token = self._match(TokenType.ALIAS) 5743 comments = self._prev_comments or [] 5744 5745 if explicit and not any_token: 5746 return this 5747 5748 if self._match(TokenType.L_PAREN): 5749 aliases = self.expression( 5750 exp.Aliases, 5751 comments=comments, 5752 this=this, 5753 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5754 ) 5755 self._match_r_paren(aliases) 5756 return aliases 5757 5758 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5759 self.STRING_ALIASES and self._parse_string_as_identifier() 5760 ) 5761 5762 if alias: 5763 comments.extend(alias.pop_comments()) 5764 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5765 column = this.this 5766 5767 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5768 if not this.comments and column and column.comments: 5769 this.comments = column.pop_comments() 5770 5771 return this 5772 5773 def _parse_id_var( 5774 self, 5775 any_token: bool = True, 5776 tokens: t.Optional[t.Collection[TokenType]] = None, 5777 ) -> t.Optional[exp.Expression]: 5778 expression = self._parse_identifier() 5779 if not expression and ( 5780 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5781 ): 5782 quoted = self._prev.token_type == TokenType.STRING 5783 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5784 5785 return expression 5786 5787 def _parse_string(self) -> t.Optional[exp.Expression]: 5788 if self._match_set(self.STRING_PARSERS): 5789 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5790 return self._parse_placeholder() 5791 5792 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5793 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5794 5795 def _parse_number(self) -> t.Optional[exp.Expression]: 5796 if self._match_set(self.NUMERIC_PARSERS): 5797 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5798 return self._parse_placeholder() 5799 5800 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5801 if self._match(TokenType.IDENTIFIER): 5802 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5803 return self._parse_placeholder() 5804 5805 def _parse_var( 5806 self, 5807 any_token: bool = False, 5808 tokens: t.Optional[t.Collection[TokenType]] = None, 5809 upper: bool = False, 5810 ) -> t.Optional[exp.Expression]: 5811 if ( 5812 (any_token and self._advance_any()) 5813 or self._match(TokenType.VAR) 5814 or (self._match_set(tokens) if tokens else False) 5815 ): 5816 return self.expression( 5817 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5818 ) 5819 return self._parse_placeholder() 5820 5821 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5822 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5823 self._advance() 5824 return self._prev 5825 return None 5826 5827 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5828 return self._parse_var() or self._parse_string() 5829 5830 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5831 return self._parse_primary() or self._parse_var(any_token=True) 5832 5833 def _parse_null(self) -> t.Optional[exp.Expression]: 5834 if self._match_set(self.NULL_TOKENS): 5835 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5836 return self._parse_placeholder() 5837 5838 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5839 if self._match(TokenType.TRUE): 5840 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5841 if self._match(TokenType.FALSE): 5842 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5843 return self._parse_placeholder() 5844 5845 def _parse_star(self) -> t.Optional[exp.Expression]: 5846 if self._match(TokenType.STAR): 5847 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5848 return self._parse_placeholder() 5849 5850 def _parse_parameter(self) -> exp.Parameter: 5851 this = self._parse_identifier() or self._parse_primary_or_var() 5852 return self.expression(exp.Parameter, this=this) 5853 5854 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5855 if self._match_set(self.PLACEHOLDER_PARSERS): 5856 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5857 if placeholder: 5858 return placeholder 5859 self._advance(-1) 5860 return None 5861 5862 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5863 if not self._match_texts(keywords): 5864 return None 5865 if self._match(TokenType.L_PAREN, advance=False): 5866 return self._parse_wrapped_csv(self._parse_expression) 5867 5868 expression = self._parse_expression() 5869 return [expression] if expression else None 5870 5871 def _parse_csv( 5872 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5873 ) -> t.List[exp.Expression]: 5874 parse_result = parse_method() 5875 items = [parse_result] if parse_result is not None else [] 5876 5877 while self._match(sep): 5878 self._add_comments(parse_result) 5879 parse_result = parse_method() 5880 if parse_result is not None: 5881 items.append(parse_result) 5882 5883 return items 5884 5885 def _parse_tokens( 5886 self, parse_method: t.Callable, expressions: t.Dict 5887 ) -> t.Optional[exp.Expression]: 5888 this = parse_method() 5889 5890 while self._match_set(expressions): 5891 this = self.expression( 5892 expressions[self._prev.token_type], 5893 this=this, 5894 comments=self._prev_comments, 5895 expression=parse_method(), 5896 ) 5897 5898 return this 5899 5900 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5901 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5902 5903 def _parse_wrapped_csv( 5904 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5905 ) -> t.List[exp.Expression]: 5906 return self._parse_wrapped( 5907 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5908 ) 5909 5910 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5911 wrapped = self._match(TokenType.L_PAREN) 5912 if not wrapped and not optional: 5913 self.raise_error("Expecting (") 5914 parse_result = parse_method() 5915 if wrapped: 5916 self._match_r_paren() 5917 return parse_result 5918 5919 def _parse_expressions(self) -> t.List[exp.Expression]: 5920 return self._parse_csv(self._parse_expression) 5921 5922 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5923 return self._parse_select() or self._parse_set_operations( 5924 self._parse_expression() if alias else self._parse_conjunction() 5925 ) 5926 5927 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5928 return self._parse_query_modifiers( 5929 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5930 ) 5931 5932 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5933 this = None 5934 if self._match_texts(self.TRANSACTION_KIND): 5935 this = self._prev.text 5936 5937 self._match_texts(("TRANSACTION", "WORK")) 5938 5939 modes = [] 5940 while True: 5941 mode = [] 5942 while self._match(TokenType.VAR): 5943 mode.append(self._prev.text) 5944 5945 if mode: 5946 modes.append(" ".join(mode)) 5947 if not self._match(TokenType.COMMA): 5948 break 5949 5950 return self.expression(exp.Transaction, this=this, modes=modes) 5951 5952 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5953 chain = None 5954 savepoint = None 5955 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5956 5957 self._match_texts(("TRANSACTION", "WORK")) 5958 5959 if self._match_text_seq("TO"): 5960 self._match_text_seq("SAVEPOINT") 5961 savepoint = self._parse_id_var() 5962 5963 if self._match(TokenType.AND): 5964 chain = not self._match_text_seq("NO") 5965 self._match_text_seq("CHAIN") 5966 5967 if is_rollback: 5968 return self.expression(exp.Rollback, savepoint=savepoint) 5969 5970 return self.expression(exp.Commit, chain=chain) 5971 5972 def _parse_refresh(self) -> exp.Refresh: 5973 self._match(TokenType.TABLE) 5974 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5975 5976 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5977 if not self._match_text_seq("ADD"): 5978 return None 5979 5980 self._match(TokenType.COLUMN) 5981 exists_column = self._parse_exists(not_=True) 5982 expression = self._parse_field_def() 5983 5984 if expression: 5985 expression.set("exists", exists_column) 5986 5987 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5988 if self._match_texts(("FIRST", "AFTER")): 5989 position = self._prev.text 5990 column_position = self.expression( 5991 exp.ColumnPosition, this=self._parse_column(), position=position 5992 ) 5993 expression.set("position", column_position) 5994 5995 return expression 5996 5997 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5998 drop = self._match(TokenType.DROP) and self._parse_drop() 5999 if drop and not isinstance(drop, exp.Command): 6000 drop.set("kind", drop.args.get("kind", "COLUMN")) 6001 return drop 6002 6003 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6004 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6005 return self.expression( 6006 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6007 ) 6008 6009 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6010 index = self._index - 1 6011 6012 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6013 return self._parse_csv( 6014 lambda: self.expression( 6015 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6016 ) 6017 ) 6018 6019 self._retreat(index) 6020 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6021 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6022 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6023 6024 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6025 if self._match_texts(self.ALTER_ALTER_PARSERS): 6026 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6027 6028 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6029 # keyword after ALTER we default to parsing this statement 6030 self._match(TokenType.COLUMN) 6031 column = self._parse_field(any_token=True) 6032 6033 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6034 return self.expression(exp.AlterColumn, this=column, drop=True) 6035 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6036 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 6037 if self._match(TokenType.COMMENT): 6038 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6039 6040 self._match_text_seq("SET", "DATA") 6041 self._match_text_seq("TYPE") 6042 return self.expression( 6043 exp.AlterColumn, 6044 this=column, 6045 dtype=self._parse_types(), 6046 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6047 using=self._match(TokenType.USING) and self._parse_conjunction(), 6048 ) 6049 6050 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6051 if self._match_texts(("ALL", "EVEN", "AUTO")): 6052 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6053 6054 self._match_text_seq("KEY", "DISTKEY") 6055 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6056 6057 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6058 if compound: 6059 self._match_text_seq("SORTKEY") 6060 6061 if self._match(TokenType.L_PAREN, advance=False): 6062 return self.expression( 6063 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6064 ) 6065 6066 self._match_texts(("AUTO", "NONE")) 6067 return self.expression( 6068 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6069 ) 6070 6071 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6072 index = self._index - 1 6073 6074 partition_exists = self._parse_exists() 6075 if self._match(TokenType.PARTITION, advance=False): 6076 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6077 6078 self._retreat(index) 6079 return self._parse_csv(self._parse_drop_column) 6080 6081 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6082 if self._match(TokenType.COLUMN): 6083 exists = self._parse_exists() 6084 old_column = self._parse_column() 6085 to = self._match_text_seq("TO") 6086 new_column = self._parse_column() 6087 6088 if old_column is None or to is None or new_column is None: 6089 return None 6090 6091 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6092 6093 self._match_text_seq("TO") 6094 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6095 6096 def _parse_alter_table_set(self) -> exp.AlterSet: 6097 alter_set = self.expression(exp.AlterSet) 6098 6099 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6100 "TABLE", "PROPERTIES" 6101 ): 6102 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_conjunction)) 6103 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6104 alter_set.set("expressions", [self._parse_conjunction()]) 6105 elif self._match_texts(("LOGGED", "UNLOGGED")): 6106 alter_set.set("option", exp.var(self._prev.text.upper())) 6107 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6108 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6109 elif self._match_text_seq("LOCATION"): 6110 alter_set.set("location", self._parse_field()) 6111 elif self._match_text_seq("ACCESS", "METHOD"): 6112 alter_set.set("access_method", self._parse_field()) 6113 elif self._match_text_seq("TABLESPACE"): 6114 alter_set.set("tablespace", self._parse_field()) 6115 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6116 alter_set.set("file_format", [self._parse_field()]) 6117 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6118 alter_set.set("file_format", self._parse_wrapped_options()) 6119 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6120 alter_set.set("copy_options", self._parse_wrapped_options()) 6121 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6122 alter_set.set("tag", self._parse_csv(self._parse_conjunction)) 6123 else: 6124 if self._match_text_seq("SERDE"): 6125 alter_set.set("serde", self._parse_field()) 6126 6127 alter_set.set("expressions", [self._parse_properties()]) 6128 6129 return alter_set 6130 6131 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6132 start = self._prev 6133 6134 if not self._match(TokenType.TABLE): 6135 return self._parse_as_command(start) 6136 6137 exists = self._parse_exists() 6138 only = self._match_text_seq("ONLY") 6139 this = self._parse_table(schema=True) 6140 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6141 6142 if self._next: 6143 self._advance() 6144 6145 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6146 if parser: 6147 actions = ensure_list(parser(self)) 6148 options = self._parse_csv(self._parse_property) 6149 6150 if not self._curr and actions: 6151 return self.expression( 6152 exp.AlterTable, 6153 this=this, 6154 exists=exists, 6155 actions=actions, 6156 only=only, 6157 options=options, 6158 cluster=cluster, 6159 ) 6160 6161 return self._parse_as_command(start) 6162 6163 def _parse_merge(self) -> exp.Merge: 6164 self._match(TokenType.INTO) 6165 target = self._parse_table() 6166 6167 if target and self._match(TokenType.ALIAS, advance=False): 6168 target.set("alias", self._parse_table_alias()) 6169 6170 self._match(TokenType.USING) 6171 using = self._parse_table() 6172 6173 self._match(TokenType.ON) 6174 on = self._parse_conjunction() 6175 6176 return self.expression( 6177 exp.Merge, 6178 this=target, 6179 using=using, 6180 on=on, 6181 expressions=self._parse_when_matched(), 6182 ) 6183 6184 def _parse_when_matched(self) -> t.List[exp.When]: 6185 whens = [] 6186 6187 while self._match(TokenType.WHEN): 6188 matched = not self._match(TokenType.NOT) 6189 self._match_text_seq("MATCHED") 6190 source = ( 6191 False 6192 if self._match_text_seq("BY", "TARGET") 6193 else self._match_text_seq("BY", "SOURCE") 6194 ) 6195 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 6196 6197 self._match(TokenType.THEN) 6198 6199 if self._match(TokenType.INSERT): 6200 _this = self._parse_star() 6201 if _this: 6202 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6203 else: 6204 then = self.expression( 6205 exp.Insert, 6206 this=self._parse_value(), 6207 expression=self._match_text_seq("VALUES") and self._parse_value(), 6208 ) 6209 elif self._match(TokenType.UPDATE): 6210 expressions = self._parse_star() 6211 if expressions: 6212 then = self.expression(exp.Update, expressions=expressions) 6213 else: 6214 then = self.expression( 6215 exp.Update, 6216 expressions=self._match(TokenType.SET) 6217 and self._parse_csv(self._parse_equality), 6218 ) 6219 elif self._match(TokenType.DELETE): 6220 then = self.expression(exp.Var, this=self._prev.text) 6221 else: 6222 then = None 6223 6224 whens.append( 6225 self.expression( 6226 exp.When, 6227 matched=matched, 6228 source=source, 6229 condition=condition, 6230 then=then, 6231 ) 6232 ) 6233 return whens 6234 6235 def _parse_show(self) -> t.Optional[exp.Expression]: 6236 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6237 if parser: 6238 return parser(self) 6239 return self._parse_as_command(self._prev) 6240 6241 def _parse_set_item_assignment( 6242 self, kind: t.Optional[str] = None 6243 ) -> t.Optional[exp.Expression]: 6244 index = self._index 6245 6246 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6247 return self._parse_set_transaction(global_=kind == "GLOBAL") 6248 6249 left = self._parse_primary() or self._parse_column() 6250 assignment_delimiter = self._match_texts(("=", "TO")) 6251 6252 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6253 self._retreat(index) 6254 return None 6255 6256 right = self._parse_statement() or self._parse_id_var() 6257 if isinstance(right, (exp.Column, exp.Identifier)): 6258 right = exp.var(right.name) 6259 6260 this = self.expression(exp.EQ, this=left, expression=right) 6261 return self.expression(exp.SetItem, this=this, kind=kind) 6262 6263 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6264 self._match_text_seq("TRANSACTION") 6265 characteristics = self._parse_csv( 6266 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6267 ) 6268 return self.expression( 6269 exp.SetItem, 6270 expressions=characteristics, 6271 kind="TRANSACTION", 6272 **{"global": global_}, # type: ignore 6273 ) 6274 6275 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6276 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6277 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6278 6279 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6280 index = self._index 6281 set_ = self.expression( 6282 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6283 ) 6284 6285 if self._curr: 6286 self._retreat(index) 6287 return self._parse_as_command(self._prev) 6288 6289 return set_ 6290 6291 def _parse_var_from_options( 6292 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6293 ) -> t.Optional[exp.Var]: 6294 start = self._curr 6295 if not start: 6296 return None 6297 6298 option = start.text.upper() 6299 continuations = options.get(option) 6300 6301 index = self._index 6302 self._advance() 6303 for keywords in continuations or []: 6304 if isinstance(keywords, str): 6305 keywords = (keywords,) 6306 6307 if self._match_text_seq(*keywords): 6308 option = f"{option} {' '.join(keywords)}" 6309 break 6310 else: 6311 if continuations or continuations is None: 6312 if raise_unmatched: 6313 self.raise_error(f"Unknown option {option}") 6314 6315 self._retreat(index) 6316 return None 6317 6318 return exp.var(option) 6319 6320 def _parse_as_command(self, start: Token) -> exp.Command: 6321 while self._curr: 6322 self._advance() 6323 text = self._find_sql(start, self._prev) 6324 size = len(start.text) 6325 self._warn_unsupported() 6326 return exp.Command(this=text[:size], expression=text[size:]) 6327 6328 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6329 settings = [] 6330 6331 self._match_l_paren() 6332 kind = self._parse_id_var() 6333 6334 if self._match(TokenType.L_PAREN): 6335 while True: 6336 key = self._parse_id_var() 6337 value = self._parse_primary() 6338 6339 if not key and value is None: 6340 break 6341 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6342 self._match(TokenType.R_PAREN) 6343 6344 self._match_r_paren() 6345 6346 return self.expression( 6347 exp.DictProperty, 6348 this=this, 6349 kind=kind.this if kind else None, 6350 settings=settings, 6351 ) 6352 6353 def _parse_dict_range(self, this: str) -> exp.DictRange: 6354 self._match_l_paren() 6355 has_min = self._match_text_seq("MIN") 6356 if has_min: 6357 min = self._parse_var() or self._parse_primary() 6358 self._match_text_seq("MAX") 6359 max = self._parse_var() or self._parse_primary() 6360 else: 6361 max = self._parse_var() or self._parse_primary() 6362 min = exp.Literal.number(0) 6363 self._match_r_paren() 6364 return self.expression(exp.DictRange, this=this, min=min, max=max) 6365 6366 def _parse_comprehension( 6367 self, this: t.Optional[exp.Expression] 6368 ) -> t.Optional[exp.Comprehension]: 6369 index = self._index 6370 expression = self._parse_column() 6371 if not self._match(TokenType.IN): 6372 self._retreat(index - 1) 6373 return None 6374 iterator = self._parse_column() 6375 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6376 return self.expression( 6377 exp.Comprehension, 6378 this=this, 6379 expression=expression, 6380 iterator=iterator, 6381 condition=condition, 6382 ) 6383 6384 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6385 if self._match(TokenType.HEREDOC_STRING): 6386 return self.expression(exp.Heredoc, this=self._prev.text) 6387 6388 if not self._match_text_seq("$"): 6389 return None 6390 6391 tags = ["$"] 6392 tag_text = None 6393 6394 if self._is_connected(): 6395 self._advance() 6396 tags.append(self._prev.text.upper()) 6397 else: 6398 self.raise_error("No closing $ found") 6399 6400 if tags[-1] != "$": 6401 if self._is_connected() and self._match_text_seq("$"): 6402 tag_text = tags[-1] 6403 tags.append("$") 6404 else: 6405 self.raise_error("No closing $ found") 6406 6407 heredoc_start = self._curr 6408 6409 while self._curr: 6410 if self._match_text_seq(*tags, advance=False): 6411 this = self._find_sql(heredoc_start, self._prev) 6412 self._advance(len(tags)) 6413 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6414 6415 self._advance() 6416 6417 self.raise_error(f"No closing {''.join(tags)} found") 6418 return None 6419 6420 def _find_parser( 6421 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6422 ) -> t.Optional[t.Callable]: 6423 if not self._curr: 6424 return None 6425 6426 index = self._index 6427 this = [] 6428 while True: 6429 # The current token might be multiple words 6430 curr = self._curr.text.upper() 6431 key = curr.split(" ") 6432 this.append(curr) 6433 6434 self._advance() 6435 result, trie = in_trie(trie, key) 6436 if result == TrieResult.FAILED: 6437 break 6438 6439 if result == TrieResult.EXISTS: 6440 subparser = parsers[" ".join(this)] 6441 return subparser 6442 6443 self._retreat(index) 6444 return None 6445 6446 def _match(self, token_type, advance=True, expression=None): 6447 if not self._curr: 6448 return None 6449 6450 if self._curr.token_type == token_type: 6451 if advance: 6452 self._advance() 6453 self._add_comments(expression) 6454 return True 6455 6456 return None 6457 6458 def _match_set(self, types, advance=True): 6459 if not self._curr: 6460 return None 6461 6462 if self._curr.token_type in types: 6463 if advance: 6464 self._advance() 6465 return True 6466 6467 return None 6468 6469 def _match_pair(self, token_type_a, token_type_b, advance=True): 6470 if not self._curr or not self._next: 6471 return None 6472 6473 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6474 if advance: 6475 self._advance(2) 6476 return True 6477 6478 return None 6479 6480 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6481 if not self._match(TokenType.L_PAREN, expression=expression): 6482 self.raise_error("Expecting (") 6483 6484 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6485 if not self._match(TokenType.R_PAREN, expression=expression): 6486 self.raise_error("Expecting )") 6487 6488 def _match_texts(self, texts, advance=True): 6489 if self._curr and self._curr.text.upper() in texts: 6490 if advance: 6491 self._advance() 6492 return True 6493 return None 6494 6495 def _match_text_seq(self, *texts, advance=True): 6496 index = self._index 6497 for text in texts: 6498 if self._curr and self._curr.text.upper() == text: 6499 self._advance() 6500 else: 6501 self._retreat(index) 6502 return None 6503 6504 if not advance: 6505 self._retreat(index) 6506 6507 return True 6508 6509 def _replace_lambda( 6510 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6511 ) -> t.Optional[exp.Expression]: 6512 if not node: 6513 return node 6514 6515 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6516 6517 for column in node.find_all(exp.Column): 6518 typ = lambda_types.get(column.parts[0].name) 6519 if typ is not None: 6520 dot_or_id = column.to_dot() if column.table else column.this 6521 6522 if typ: 6523 dot_or_id = self.expression( 6524 exp.Cast, 6525 this=dot_or_id, 6526 to=typ, 6527 ) 6528 6529 parent = column.parent 6530 6531 while isinstance(parent, exp.Dot): 6532 if not isinstance(parent.parent, exp.Dot): 6533 parent.replace(dot_or_id) 6534 break 6535 parent = parent.parent 6536 else: 6537 if column is node: 6538 node = dot_or_id 6539 else: 6540 column.replace(dot_or_id) 6541 return node 6542 6543 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6544 start = self._prev 6545 6546 # Not to be confused with TRUNCATE(number, decimals) function call 6547 if self._match(TokenType.L_PAREN): 6548 self._retreat(self._index - 2) 6549 return self._parse_function() 6550 6551 # Clickhouse supports TRUNCATE DATABASE as well 6552 is_database = self._match(TokenType.DATABASE) 6553 6554 self._match(TokenType.TABLE) 6555 6556 exists = self._parse_exists(not_=False) 6557 6558 expressions = self._parse_csv( 6559 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6560 ) 6561 6562 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6563 6564 if self._match_text_seq("RESTART", "IDENTITY"): 6565 identity = "RESTART" 6566 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6567 identity = "CONTINUE" 6568 else: 6569 identity = None 6570 6571 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6572 option = self._prev.text 6573 else: 6574 option = None 6575 6576 partition = self._parse_partition() 6577 6578 # Fallback case 6579 if self._curr: 6580 return self._parse_as_command(start) 6581 6582 return self.expression( 6583 exp.TruncateTable, 6584 expressions=expressions, 6585 is_database=is_database, 6586 exists=exists, 6587 cluster=cluster, 6588 identity=identity, 6589 option=option, 6590 partition=partition, 6591 ) 6592 6593 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6594 this = self._parse_ordered(self._parse_opclass) 6595 6596 if not self._match(TokenType.WITH): 6597 return this 6598 6599 op = self._parse_var(any_token=True) 6600 6601 return self.expression(exp.WithOperator, this=this, op=op) 6602 6603 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6604 opts = [] 6605 self._match(TokenType.EQ) 6606 self._match(TokenType.L_PAREN) 6607 while self._curr and not self._match(TokenType.R_PAREN): 6608 opts.append(self._parse_conjunction()) 6609 self._match(TokenType.COMMA) 6610 return opts 6611 6612 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6613 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6614 6615 options = [] 6616 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6617 option = self._parse_unquoted_field() 6618 value = None 6619 6620 # Some options are defined as functions with the values as params 6621 if not isinstance(option, exp.Func): 6622 prev = self._prev.text.upper() 6623 # Different dialects might separate options and values by white space, "=" and "AS" 6624 self._match(TokenType.EQ) 6625 self._match(TokenType.ALIAS) 6626 6627 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6628 # Snowflake FILE_FORMAT case 6629 value = self._parse_wrapped_options() 6630 else: 6631 value = self._parse_unquoted_field() 6632 6633 param = self.expression(exp.CopyParameter, this=option, expression=value) 6634 options.append(param) 6635 6636 if sep: 6637 self._match(sep) 6638 6639 return options 6640 6641 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6642 expr = self.expression(exp.Credentials) 6643 6644 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6645 expr.set("storage", self._parse_conjunction()) 6646 if self._match_text_seq("CREDENTIALS"): 6647 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6648 creds = ( 6649 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6650 ) 6651 expr.set("credentials", creds) 6652 if self._match_text_seq("ENCRYPTION"): 6653 expr.set("encryption", self._parse_wrapped_options()) 6654 if self._match_text_seq("IAM_ROLE"): 6655 expr.set("iam_role", self._parse_field()) 6656 if self._match_text_seq("REGION"): 6657 expr.set("region", self._parse_field()) 6658 6659 return expr 6660 6661 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6662 return self._parse_field() 6663 6664 def _parse_copy(self) -> exp.Copy | exp.Command: 6665 start = self._prev 6666 6667 self._match(TokenType.INTO) 6668 6669 this = ( 6670 self._parse_conjunction() 6671 if self._match(TokenType.L_PAREN, advance=False) 6672 else self._parse_table(schema=True) 6673 ) 6674 6675 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6676 6677 files = self._parse_csv(self._parse_file_location) 6678 credentials = self._parse_credentials() 6679 6680 self._match_text_seq("WITH") 6681 6682 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6683 6684 # Fallback case 6685 if self._curr: 6686 return self._parse_as_command(start) 6687 6688 return self.expression( 6689 exp.Copy, 6690 this=this, 6691 kind=kind, 6692 credentials=credentials, 6693 files=files, 6694 params=params, 6695 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
122class Parser(metaclass=_Parser): 123 """ 124 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 125 126 Args: 127 error_level: The desired error level. 128 Default: ErrorLevel.IMMEDIATE 129 error_message_context: The amount of context to capture from a query string when displaying 130 the error message (in number of characters). 131 Default: 100 132 max_errors: Maximum number of error messages to include in a raised ParseError. 133 This is only relevant if error_level is ErrorLevel.RAISE. 134 Default: 3 135 """ 136 137 FUNCTIONS: t.Dict[str, t.Callable] = { 138 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 139 "CONCAT": lambda args, dialect: exp.Concat( 140 expressions=args, 141 safe=not dialect.STRICT_STRING_CONCAT, 142 coalesce=dialect.CONCAT_COALESCE, 143 ), 144 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 145 expressions=args, 146 safe=not dialect.STRICT_STRING_CONCAT, 147 coalesce=dialect.CONCAT_COALESCE, 148 ), 149 "DATE_TO_DATE_STR": lambda args: exp.Cast( 150 this=seq_get(args, 0), 151 to=exp.DataType(this=exp.DataType.Type.TEXT), 152 ), 153 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 154 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 155 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 156 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 157 "LIKE": build_like, 158 "LOG": build_logarithm, 159 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 160 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 161 "MOD": build_mod, 162 "TIME_TO_TIME_STR": lambda args: exp.Cast( 163 this=seq_get(args, 0), 164 to=exp.DataType(this=exp.DataType.Type.TEXT), 165 ), 166 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 167 this=exp.Cast( 168 this=seq_get(args, 0), 169 to=exp.DataType(this=exp.DataType.Type.TEXT), 170 ), 171 start=exp.Literal.number(1), 172 length=exp.Literal.number(10), 173 ), 174 "VAR_MAP": build_var_map, 175 "LOWER": build_lower, 176 "UPPER": build_upper, 177 "HEX": build_hex, 178 "TO_HEX": build_hex, 179 } 180 181 NO_PAREN_FUNCTIONS = { 182 TokenType.CURRENT_DATE: exp.CurrentDate, 183 TokenType.CURRENT_DATETIME: exp.CurrentDate, 184 TokenType.CURRENT_TIME: exp.CurrentTime, 185 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 186 TokenType.CURRENT_USER: exp.CurrentUser, 187 } 188 189 STRUCT_TYPE_TOKENS = { 190 TokenType.NESTED, 191 TokenType.OBJECT, 192 TokenType.STRUCT, 193 } 194 195 NESTED_TYPE_TOKENS = { 196 TokenType.ARRAY, 197 TokenType.LOWCARDINALITY, 198 TokenType.MAP, 199 TokenType.NULLABLE, 200 *STRUCT_TYPE_TOKENS, 201 } 202 203 ENUM_TYPE_TOKENS = { 204 TokenType.ENUM, 205 TokenType.ENUM8, 206 TokenType.ENUM16, 207 } 208 209 AGGREGATE_TYPE_TOKENS = { 210 TokenType.AGGREGATEFUNCTION, 211 TokenType.SIMPLEAGGREGATEFUNCTION, 212 } 213 214 TYPE_TOKENS = { 215 TokenType.BIT, 216 TokenType.BOOLEAN, 217 TokenType.TINYINT, 218 TokenType.UTINYINT, 219 TokenType.SMALLINT, 220 TokenType.USMALLINT, 221 TokenType.INT, 222 TokenType.UINT, 223 TokenType.BIGINT, 224 TokenType.UBIGINT, 225 TokenType.INT128, 226 TokenType.UINT128, 227 TokenType.INT256, 228 TokenType.UINT256, 229 TokenType.MEDIUMINT, 230 TokenType.UMEDIUMINT, 231 TokenType.FIXEDSTRING, 232 TokenType.FLOAT, 233 TokenType.DOUBLE, 234 TokenType.CHAR, 235 TokenType.NCHAR, 236 TokenType.VARCHAR, 237 TokenType.NVARCHAR, 238 TokenType.BPCHAR, 239 TokenType.TEXT, 240 TokenType.MEDIUMTEXT, 241 TokenType.LONGTEXT, 242 TokenType.MEDIUMBLOB, 243 TokenType.LONGBLOB, 244 TokenType.BINARY, 245 TokenType.VARBINARY, 246 TokenType.JSON, 247 TokenType.JSONB, 248 TokenType.INTERVAL, 249 TokenType.TINYBLOB, 250 TokenType.TINYTEXT, 251 TokenType.TIME, 252 TokenType.TIMETZ, 253 TokenType.TIMESTAMP, 254 TokenType.TIMESTAMP_S, 255 TokenType.TIMESTAMP_MS, 256 TokenType.TIMESTAMP_NS, 257 TokenType.TIMESTAMPTZ, 258 TokenType.TIMESTAMPLTZ, 259 TokenType.TIMESTAMPNTZ, 260 TokenType.DATETIME, 261 TokenType.DATETIME64, 262 TokenType.DATE, 263 TokenType.DATE32, 264 TokenType.INT4RANGE, 265 TokenType.INT4MULTIRANGE, 266 TokenType.INT8RANGE, 267 TokenType.INT8MULTIRANGE, 268 TokenType.NUMRANGE, 269 TokenType.NUMMULTIRANGE, 270 TokenType.TSRANGE, 271 TokenType.TSMULTIRANGE, 272 TokenType.TSTZRANGE, 273 TokenType.TSTZMULTIRANGE, 274 TokenType.DATERANGE, 275 TokenType.DATEMULTIRANGE, 276 TokenType.DECIMAL, 277 TokenType.UDECIMAL, 278 TokenType.BIGDECIMAL, 279 TokenType.UUID, 280 TokenType.GEOGRAPHY, 281 TokenType.GEOMETRY, 282 TokenType.HLLSKETCH, 283 TokenType.HSTORE, 284 TokenType.PSEUDO_TYPE, 285 TokenType.SUPER, 286 TokenType.SERIAL, 287 TokenType.SMALLSERIAL, 288 TokenType.BIGSERIAL, 289 TokenType.XML, 290 TokenType.YEAR, 291 TokenType.UNIQUEIDENTIFIER, 292 TokenType.USERDEFINED, 293 TokenType.MONEY, 294 TokenType.SMALLMONEY, 295 TokenType.ROWVERSION, 296 TokenType.IMAGE, 297 TokenType.VARIANT, 298 TokenType.OBJECT, 299 TokenType.OBJECT_IDENTIFIER, 300 TokenType.INET, 301 TokenType.IPADDRESS, 302 TokenType.IPPREFIX, 303 TokenType.IPV4, 304 TokenType.IPV6, 305 TokenType.UNKNOWN, 306 TokenType.NULL, 307 TokenType.NAME, 308 TokenType.TDIGEST, 309 *ENUM_TYPE_TOKENS, 310 *NESTED_TYPE_TOKENS, 311 *AGGREGATE_TYPE_TOKENS, 312 } 313 314 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 315 TokenType.BIGINT: TokenType.UBIGINT, 316 TokenType.INT: TokenType.UINT, 317 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 318 TokenType.SMALLINT: TokenType.USMALLINT, 319 TokenType.TINYINT: TokenType.UTINYINT, 320 TokenType.DECIMAL: TokenType.UDECIMAL, 321 } 322 323 SUBQUERY_PREDICATES = { 324 TokenType.ANY: exp.Any, 325 TokenType.ALL: exp.All, 326 TokenType.EXISTS: exp.Exists, 327 TokenType.SOME: exp.Any, 328 } 329 330 RESERVED_TOKENS = { 331 *Tokenizer.SINGLE_TOKENS.values(), 332 TokenType.SELECT, 333 } - {TokenType.IDENTIFIER} 334 335 DB_CREATABLES = { 336 TokenType.DATABASE, 337 TokenType.DICTIONARY, 338 TokenType.MODEL, 339 TokenType.SCHEMA, 340 TokenType.SEQUENCE, 341 TokenType.STORAGE_INTEGRATION, 342 TokenType.TABLE, 343 TokenType.TAG, 344 TokenType.VIEW, 345 TokenType.WAREHOUSE, 346 TokenType.STREAMLIT, 347 } 348 349 CREATABLES = { 350 TokenType.COLUMN, 351 TokenType.CONSTRAINT, 352 TokenType.FOREIGN_KEY, 353 TokenType.FUNCTION, 354 TokenType.INDEX, 355 TokenType.PROCEDURE, 356 *DB_CREATABLES, 357 } 358 359 # Tokens that can represent identifiers 360 ID_VAR_TOKENS = { 361 TokenType.VAR, 362 TokenType.ANTI, 363 TokenType.APPLY, 364 TokenType.ASC, 365 TokenType.ASOF, 366 TokenType.AUTO_INCREMENT, 367 TokenType.BEGIN, 368 TokenType.BPCHAR, 369 TokenType.CACHE, 370 TokenType.CASE, 371 TokenType.COLLATE, 372 TokenType.COMMAND, 373 TokenType.COMMENT, 374 TokenType.COMMIT, 375 TokenType.CONSTRAINT, 376 TokenType.COPY, 377 TokenType.DEFAULT, 378 TokenType.DELETE, 379 TokenType.DESC, 380 TokenType.DESCRIBE, 381 TokenType.DICTIONARY, 382 TokenType.DIV, 383 TokenType.END, 384 TokenType.EXECUTE, 385 TokenType.ESCAPE, 386 TokenType.FALSE, 387 TokenType.FIRST, 388 TokenType.FILTER, 389 TokenType.FINAL, 390 TokenType.FORMAT, 391 TokenType.FULL, 392 TokenType.IDENTIFIER, 393 TokenType.IS, 394 TokenType.ISNULL, 395 TokenType.INTERVAL, 396 TokenType.KEEP, 397 TokenType.KILL, 398 TokenType.LEFT, 399 TokenType.LOAD, 400 TokenType.MERGE, 401 TokenType.NATURAL, 402 TokenType.NEXT, 403 TokenType.OFFSET, 404 TokenType.OPERATOR, 405 TokenType.ORDINALITY, 406 TokenType.OVERLAPS, 407 TokenType.OVERWRITE, 408 TokenType.PARTITION, 409 TokenType.PERCENT, 410 TokenType.PIVOT, 411 TokenType.PRAGMA, 412 TokenType.RANGE, 413 TokenType.RECURSIVE, 414 TokenType.REFERENCES, 415 TokenType.REFRESH, 416 TokenType.REPLACE, 417 TokenType.RIGHT, 418 TokenType.ROLLUP, 419 TokenType.ROW, 420 TokenType.ROWS, 421 TokenType.SEMI, 422 TokenType.SET, 423 TokenType.SETTINGS, 424 TokenType.SHOW, 425 TokenType.TEMPORARY, 426 TokenType.TOP, 427 TokenType.TRUE, 428 TokenType.TRUNCATE, 429 TokenType.UNIQUE, 430 TokenType.UNNEST, 431 TokenType.UNPIVOT, 432 TokenType.UPDATE, 433 TokenType.USE, 434 TokenType.VOLATILE, 435 TokenType.WINDOW, 436 *CREATABLES, 437 *SUBQUERY_PREDICATES, 438 *TYPE_TOKENS, 439 *NO_PAREN_FUNCTIONS, 440 } 441 442 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 443 444 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 445 TokenType.ANTI, 446 TokenType.APPLY, 447 TokenType.ASOF, 448 TokenType.FULL, 449 TokenType.LEFT, 450 TokenType.LOCK, 451 TokenType.NATURAL, 452 TokenType.OFFSET, 453 TokenType.RIGHT, 454 TokenType.SEMI, 455 TokenType.WINDOW, 456 } 457 458 ALIAS_TOKENS = ID_VAR_TOKENS 459 460 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 461 462 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 463 464 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 465 466 FUNC_TOKENS = { 467 TokenType.COLLATE, 468 TokenType.COMMAND, 469 TokenType.CURRENT_DATE, 470 TokenType.CURRENT_DATETIME, 471 TokenType.CURRENT_TIMESTAMP, 472 TokenType.CURRENT_TIME, 473 TokenType.CURRENT_USER, 474 TokenType.FILTER, 475 TokenType.FIRST, 476 TokenType.FORMAT, 477 TokenType.GLOB, 478 TokenType.IDENTIFIER, 479 TokenType.INDEX, 480 TokenType.ISNULL, 481 TokenType.ILIKE, 482 TokenType.INSERT, 483 TokenType.LIKE, 484 TokenType.MERGE, 485 TokenType.OFFSET, 486 TokenType.PRIMARY_KEY, 487 TokenType.RANGE, 488 TokenType.REPLACE, 489 TokenType.RLIKE, 490 TokenType.ROW, 491 TokenType.UNNEST, 492 TokenType.VAR, 493 TokenType.LEFT, 494 TokenType.RIGHT, 495 TokenType.SEQUENCE, 496 TokenType.DATE, 497 TokenType.DATETIME, 498 TokenType.TABLE, 499 TokenType.TIMESTAMP, 500 TokenType.TIMESTAMPTZ, 501 TokenType.TRUNCATE, 502 TokenType.WINDOW, 503 TokenType.XOR, 504 *TYPE_TOKENS, 505 *SUBQUERY_PREDICATES, 506 } 507 508 CONJUNCTION = { 509 TokenType.AND: exp.And, 510 TokenType.OR: exp.Or, 511 } 512 513 EQUALITY = { 514 TokenType.EQ: exp.EQ, 515 TokenType.NEQ: exp.NEQ, 516 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 517 } 518 519 COMPARISON = { 520 TokenType.GT: exp.GT, 521 TokenType.GTE: exp.GTE, 522 TokenType.LT: exp.LT, 523 TokenType.LTE: exp.LTE, 524 } 525 526 BITWISE = { 527 TokenType.AMP: exp.BitwiseAnd, 528 TokenType.CARET: exp.BitwiseXor, 529 TokenType.PIPE: exp.BitwiseOr, 530 } 531 532 TERM = { 533 TokenType.DASH: exp.Sub, 534 TokenType.PLUS: exp.Add, 535 TokenType.MOD: exp.Mod, 536 TokenType.COLLATE: exp.Collate, 537 } 538 539 FACTOR = { 540 TokenType.DIV: exp.IntDiv, 541 TokenType.LR_ARROW: exp.Distance, 542 TokenType.SLASH: exp.Div, 543 TokenType.STAR: exp.Mul, 544 } 545 546 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 547 548 TIMES = { 549 TokenType.TIME, 550 TokenType.TIMETZ, 551 } 552 553 TIMESTAMPS = { 554 TokenType.TIMESTAMP, 555 TokenType.TIMESTAMPTZ, 556 TokenType.TIMESTAMPLTZ, 557 *TIMES, 558 } 559 560 SET_OPERATIONS = { 561 TokenType.UNION, 562 TokenType.INTERSECT, 563 TokenType.EXCEPT, 564 } 565 566 JOIN_METHODS = { 567 TokenType.ASOF, 568 TokenType.NATURAL, 569 TokenType.POSITIONAL, 570 } 571 572 JOIN_SIDES = { 573 TokenType.LEFT, 574 TokenType.RIGHT, 575 TokenType.FULL, 576 } 577 578 JOIN_KINDS = { 579 TokenType.INNER, 580 TokenType.OUTER, 581 TokenType.CROSS, 582 TokenType.SEMI, 583 TokenType.ANTI, 584 } 585 586 JOIN_HINTS: t.Set[str] = set() 587 588 LAMBDAS = { 589 TokenType.ARROW: lambda self, expressions: self.expression( 590 exp.Lambda, 591 this=self._replace_lambda( 592 self._parse_conjunction(), 593 expressions, 594 ), 595 expressions=expressions, 596 ), 597 TokenType.FARROW: lambda self, expressions: self.expression( 598 exp.Kwarg, 599 this=exp.var(expressions[0].name), 600 expression=self._parse_conjunction(), 601 ), 602 } 603 604 COLUMN_OPERATORS = { 605 TokenType.DOT: None, 606 TokenType.DCOLON: lambda self, this, to: self.expression( 607 exp.Cast if self.STRICT_CAST else exp.TryCast, 608 this=this, 609 to=to, 610 ), 611 TokenType.ARROW: lambda self, this, path: self.expression( 612 exp.JSONExtract, 613 this=this, 614 expression=self.dialect.to_json_path(path), 615 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 616 ), 617 TokenType.DARROW: lambda self, this, path: self.expression( 618 exp.JSONExtractScalar, 619 this=this, 620 expression=self.dialect.to_json_path(path), 621 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 622 ), 623 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 624 exp.JSONBExtract, 625 this=this, 626 expression=path, 627 ), 628 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 629 exp.JSONBExtractScalar, 630 this=this, 631 expression=path, 632 ), 633 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 634 exp.JSONBContains, 635 this=this, 636 expression=key, 637 ), 638 } 639 640 EXPRESSION_PARSERS = { 641 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 642 exp.Column: lambda self: self._parse_column(), 643 exp.Condition: lambda self: self._parse_conjunction(), 644 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 645 exp.Expression: lambda self: self._parse_expression(), 646 exp.From: lambda self: self._parse_from(joins=True), 647 exp.Group: lambda self: self._parse_group(), 648 exp.Having: lambda self: self._parse_having(), 649 exp.Identifier: lambda self: self._parse_id_var(), 650 exp.Join: lambda self: self._parse_join(), 651 exp.Lambda: lambda self: self._parse_lambda(), 652 exp.Lateral: lambda self: self._parse_lateral(), 653 exp.Limit: lambda self: self._parse_limit(), 654 exp.Offset: lambda self: self._parse_offset(), 655 exp.Order: lambda self: self._parse_order(), 656 exp.Ordered: lambda self: self._parse_ordered(), 657 exp.Properties: lambda self: self._parse_properties(), 658 exp.Qualify: lambda self: self._parse_qualify(), 659 exp.Returning: lambda self: self._parse_returning(), 660 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 661 exp.Table: lambda self: self._parse_table_parts(), 662 exp.TableAlias: lambda self: self._parse_table_alias(), 663 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 664 exp.Where: lambda self: self._parse_where(), 665 exp.Window: lambda self: self._parse_named_window(), 666 exp.With: lambda self: self._parse_with(), 667 "JOIN_TYPE": lambda self: self._parse_join_parts(), 668 } 669 670 STATEMENT_PARSERS = { 671 TokenType.ALTER: lambda self: self._parse_alter(), 672 TokenType.BEGIN: lambda self: self._parse_transaction(), 673 TokenType.CACHE: lambda self: self._parse_cache(), 674 TokenType.COMMENT: lambda self: self._parse_comment(), 675 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 676 TokenType.COPY: lambda self: self._parse_copy(), 677 TokenType.CREATE: lambda self: self._parse_create(), 678 TokenType.DELETE: lambda self: self._parse_delete(), 679 TokenType.DESC: lambda self: self._parse_describe(), 680 TokenType.DESCRIBE: lambda self: self._parse_describe(), 681 TokenType.DROP: lambda self: self._parse_drop(), 682 TokenType.INSERT: lambda self: self._parse_insert(), 683 TokenType.KILL: lambda self: self._parse_kill(), 684 TokenType.LOAD: lambda self: self._parse_load(), 685 TokenType.MERGE: lambda self: self._parse_merge(), 686 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 687 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 688 TokenType.REFRESH: lambda self: self._parse_refresh(), 689 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 690 TokenType.SET: lambda self: self._parse_set(), 691 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 692 TokenType.UNCACHE: lambda self: self._parse_uncache(), 693 TokenType.UPDATE: lambda self: self._parse_update(), 694 TokenType.USE: lambda self: self.expression( 695 exp.Use, 696 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 697 this=self._parse_table(schema=False), 698 ), 699 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 700 } 701 702 UNARY_PARSERS = { 703 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 704 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 705 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 706 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 707 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 708 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 709 } 710 711 STRING_PARSERS = { 712 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 713 exp.RawString, this=token.text 714 ), 715 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 716 exp.National, this=token.text 717 ), 718 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 719 TokenType.STRING: lambda self, token: self.expression( 720 exp.Literal, this=token.text, is_string=True 721 ), 722 TokenType.UNICODE_STRING: lambda self, token: self.expression( 723 exp.UnicodeString, 724 this=token.text, 725 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 726 ), 727 } 728 729 NUMERIC_PARSERS = { 730 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 731 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 732 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 733 TokenType.NUMBER: lambda self, token: self.expression( 734 exp.Literal, this=token.text, is_string=False 735 ), 736 } 737 738 PRIMARY_PARSERS = { 739 **STRING_PARSERS, 740 **NUMERIC_PARSERS, 741 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 742 TokenType.NULL: lambda self, _: self.expression(exp.Null), 743 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 744 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 745 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 746 TokenType.STAR: lambda self, _: self.expression( 747 exp.Star, 748 **{ 749 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 750 "replace": self._parse_star_op("REPLACE"), 751 "rename": self._parse_star_op("RENAME"), 752 }, 753 ), 754 } 755 756 PLACEHOLDER_PARSERS = { 757 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 758 TokenType.PARAMETER: lambda self: self._parse_parameter(), 759 TokenType.COLON: lambda self: ( 760 self.expression(exp.Placeholder, this=self._prev.text) 761 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 762 else None 763 ), 764 } 765 766 RANGE_PARSERS = { 767 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 768 TokenType.GLOB: binary_range_parser(exp.Glob), 769 TokenType.ILIKE: binary_range_parser(exp.ILike), 770 TokenType.IN: lambda self, this: self._parse_in(this), 771 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 772 TokenType.IS: lambda self, this: self._parse_is(this), 773 TokenType.LIKE: binary_range_parser(exp.Like), 774 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 775 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 776 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 777 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 778 } 779 780 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 781 "ALLOWED_VALUES": lambda self: self.expression( 782 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 783 ), 784 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 785 "AUTO": lambda self: self._parse_auto_property(), 786 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 787 "BACKUP": lambda self: self.expression( 788 exp.BackupProperty, this=self._parse_var(any_token=True) 789 ), 790 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 791 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 792 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 793 "CHECKSUM": lambda self: self._parse_checksum(), 794 "CLUSTER BY": lambda self: self._parse_cluster(), 795 "CLUSTERED": lambda self: self._parse_clustered_by(), 796 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 797 exp.CollateProperty, **kwargs 798 ), 799 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 800 "CONTAINS": lambda self: self._parse_contains_property(), 801 "COPY": lambda self: self._parse_copy_property(), 802 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 803 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 804 "DEFINER": lambda self: self._parse_definer(), 805 "DETERMINISTIC": lambda self: self.expression( 806 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 807 ), 808 "DISTKEY": lambda self: self._parse_distkey(), 809 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 810 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 811 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 812 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 813 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 814 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 815 "FREESPACE": lambda self: self._parse_freespace(), 816 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 817 "HEAP": lambda self: self.expression(exp.HeapProperty), 818 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 819 "IMMUTABLE": lambda self: self.expression( 820 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 821 ), 822 "INHERITS": lambda self: self.expression( 823 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 824 ), 825 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 826 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 827 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 828 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 829 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 830 "LIKE": lambda self: self._parse_create_like(), 831 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 832 "LOCK": lambda self: self._parse_locking(), 833 "LOCKING": lambda self: self._parse_locking(), 834 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 835 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 836 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 837 "MODIFIES": lambda self: self._parse_modifies_property(), 838 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 839 "NO": lambda self: self._parse_no_property(), 840 "ON": lambda self: self._parse_on_property(), 841 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 842 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 843 "PARTITION": lambda self: self._parse_partitioned_of(), 844 "PARTITION BY": lambda self: self._parse_partitioned_by(), 845 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 846 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 847 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 848 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 849 "READS": lambda self: self._parse_reads_property(), 850 "REMOTE": lambda self: self._parse_remote_with_connection(), 851 "RETURNS": lambda self: self._parse_returns(), 852 "STRICT": lambda self: self.expression(exp.StrictProperty), 853 "ROW": lambda self: self._parse_row(), 854 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 855 "SAMPLE": lambda self: self.expression( 856 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 857 ), 858 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 859 "SETTINGS": lambda self: self.expression( 860 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 861 ), 862 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 863 "SORTKEY": lambda self: self._parse_sortkey(), 864 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 865 "STABLE": lambda self: self.expression( 866 exp.StabilityProperty, this=exp.Literal.string("STABLE") 867 ), 868 "STORED": lambda self: self._parse_stored(), 869 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 870 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 871 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 872 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 873 "TO": lambda self: self._parse_to_table(), 874 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 875 "TRANSFORM": lambda self: self.expression( 876 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 877 ), 878 "TTL": lambda self: self._parse_ttl(), 879 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 880 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 881 "VOLATILE": lambda self: self._parse_volatile_property(), 882 "WITH": lambda self: self._parse_with_property(), 883 } 884 885 CONSTRAINT_PARSERS = { 886 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 887 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 888 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 889 "CHARACTER SET": lambda self: self.expression( 890 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 891 ), 892 "CHECK": lambda self: self.expression( 893 exp.CheckColumnConstraint, 894 this=self._parse_wrapped(self._parse_conjunction), 895 enforced=self._match_text_seq("ENFORCED"), 896 ), 897 "COLLATE": lambda self: self.expression( 898 exp.CollateColumnConstraint, this=self._parse_var() 899 ), 900 "COMMENT": lambda self: self.expression( 901 exp.CommentColumnConstraint, this=self._parse_string() 902 ), 903 "COMPRESS": lambda self: self._parse_compress(), 904 "CLUSTERED": lambda self: self.expression( 905 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 906 ), 907 "NONCLUSTERED": lambda self: self.expression( 908 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 909 ), 910 "DEFAULT": lambda self: self.expression( 911 exp.DefaultColumnConstraint, this=self._parse_bitwise() 912 ), 913 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 914 "EPHEMERAL": lambda self: self.expression( 915 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 916 ), 917 "EXCLUDE": lambda self: self.expression( 918 exp.ExcludeColumnConstraint, this=self._parse_index_params() 919 ), 920 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 921 "FORMAT": lambda self: self.expression( 922 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 923 ), 924 "GENERATED": lambda self: self._parse_generated_as_identity(), 925 "IDENTITY": lambda self: self._parse_auto_increment(), 926 "INLINE": lambda self: self._parse_inline(), 927 "LIKE": lambda self: self._parse_create_like(), 928 "NOT": lambda self: self._parse_not_constraint(), 929 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 930 "ON": lambda self: ( 931 self._match(TokenType.UPDATE) 932 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 933 ) 934 or self.expression(exp.OnProperty, this=self._parse_id_var()), 935 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 936 "PERIOD": lambda self: self._parse_period_for_system_time(), 937 "PRIMARY KEY": lambda self: self._parse_primary_key(), 938 "REFERENCES": lambda self: self._parse_references(match=False), 939 "TITLE": lambda self: self.expression( 940 exp.TitleColumnConstraint, this=self._parse_var_or_string() 941 ), 942 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 943 "UNIQUE": lambda self: self._parse_unique(), 944 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 945 "WITH": lambda self: self.expression( 946 exp.Properties, expressions=self._parse_wrapped_properties() 947 ), 948 } 949 950 ALTER_PARSERS = { 951 "ADD": lambda self: self._parse_alter_table_add(), 952 "ALTER": lambda self: self._parse_alter_table_alter(), 953 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 954 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 955 "DROP": lambda self: self._parse_alter_table_drop(), 956 "RENAME": lambda self: self._parse_alter_table_rename(), 957 "SET": lambda self: self._parse_alter_table_set(), 958 } 959 960 ALTER_ALTER_PARSERS = { 961 "DISTKEY": lambda self: self._parse_alter_diststyle(), 962 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 963 "SORTKEY": lambda self: self._parse_alter_sortkey(), 964 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 965 } 966 967 SCHEMA_UNNAMED_CONSTRAINTS = { 968 "CHECK", 969 "EXCLUDE", 970 "FOREIGN KEY", 971 "LIKE", 972 "PERIOD", 973 "PRIMARY KEY", 974 "UNIQUE", 975 } 976 977 NO_PAREN_FUNCTION_PARSERS = { 978 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 979 "CASE": lambda self: self._parse_case(), 980 "IF": lambda self: self._parse_if(), 981 "NEXT": lambda self: self._parse_next_value_for(), 982 } 983 984 INVALID_FUNC_NAME_TOKENS = { 985 TokenType.IDENTIFIER, 986 TokenType.STRING, 987 } 988 989 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 990 991 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 992 993 FUNCTION_PARSERS = { 994 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 995 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 996 "DECODE": lambda self: self._parse_decode(), 997 "EXTRACT": lambda self: self._parse_extract(), 998 "JSON_OBJECT": lambda self: self._parse_json_object(), 999 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1000 "JSON_TABLE": lambda self: self._parse_json_table(), 1001 "MATCH": lambda self: self._parse_match_against(), 1002 "OPENJSON": lambda self: self._parse_open_json(), 1003 "POSITION": lambda self: self._parse_position(), 1004 "PREDICT": lambda self: self._parse_predict(), 1005 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1006 "STRING_AGG": lambda self: self._parse_string_agg(), 1007 "SUBSTRING": lambda self: self._parse_substring(), 1008 "TRIM": lambda self: self._parse_trim(), 1009 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1010 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1011 } 1012 1013 QUERY_MODIFIER_PARSERS = { 1014 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1015 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1016 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1017 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1018 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1019 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1020 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1021 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1022 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1023 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1024 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1025 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1026 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1027 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1028 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1029 TokenType.CLUSTER_BY: lambda self: ( 1030 "cluster", 1031 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1032 ), 1033 TokenType.DISTRIBUTE_BY: lambda self: ( 1034 "distribute", 1035 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1036 ), 1037 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1038 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1039 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1040 } 1041 1042 SET_PARSERS = { 1043 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1044 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1045 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1046 "TRANSACTION": lambda self: self._parse_set_transaction(), 1047 } 1048 1049 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1050 1051 TYPE_LITERAL_PARSERS = { 1052 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1053 } 1054 1055 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1056 1057 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1058 1059 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1060 1061 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1062 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1063 "ISOLATION": ( 1064 ("LEVEL", "REPEATABLE", "READ"), 1065 ("LEVEL", "READ", "COMMITTED"), 1066 ("LEVEL", "READ", "UNCOMITTED"), 1067 ("LEVEL", "SERIALIZABLE"), 1068 ), 1069 "READ": ("WRITE", "ONLY"), 1070 } 1071 1072 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1073 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1074 ) 1075 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1076 1077 CREATE_SEQUENCE: OPTIONS_TYPE = { 1078 "SCALE": ("EXTEND", "NOEXTEND"), 1079 "SHARD": ("EXTEND", "NOEXTEND"), 1080 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1081 **dict.fromkeys( 1082 ( 1083 "SESSION", 1084 "GLOBAL", 1085 "KEEP", 1086 "NOKEEP", 1087 "ORDER", 1088 "NOORDER", 1089 "NOCACHE", 1090 "CYCLE", 1091 "NOCYCLE", 1092 "NOMINVALUE", 1093 "NOMAXVALUE", 1094 "NOSCALE", 1095 "NOSHARD", 1096 ), 1097 tuple(), 1098 ), 1099 } 1100 1101 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1102 1103 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1104 1105 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1106 1107 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1108 1109 CLONE_KEYWORDS = {"CLONE", "COPY"} 1110 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1111 1112 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1113 1114 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1115 1116 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1117 1118 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1119 1120 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1121 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1122 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1123 1124 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1125 1126 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1127 1128 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1129 1130 DISTINCT_TOKENS = {TokenType.DISTINCT} 1131 1132 NULL_TOKENS = {TokenType.NULL} 1133 1134 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1135 1136 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1137 1138 STRICT_CAST = True 1139 1140 PREFIXED_PIVOT_COLUMNS = False 1141 IDENTIFY_PIVOT_STRINGS = False 1142 1143 LOG_DEFAULTS_TO_LN = False 1144 1145 # Whether ADD is present for each column added by ALTER TABLE 1146 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1147 1148 # Whether the table sample clause expects CSV syntax 1149 TABLESAMPLE_CSV = False 1150 1151 # The default method used for table sampling 1152 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1153 1154 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1155 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1156 1157 # Whether the TRIM function expects the characters to trim as its first argument 1158 TRIM_PATTERN_FIRST = False 1159 1160 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1161 STRING_ALIASES = False 1162 1163 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1164 MODIFIERS_ATTACHED_TO_UNION = True 1165 UNION_MODIFIERS = {"order", "limit", "offset"} 1166 1167 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1168 NO_PAREN_IF_COMMANDS = True 1169 1170 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1171 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1172 1173 # Whether the `:` operator is used to extract a value from a JSON document 1174 COLON_IS_JSON_EXTRACT = False 1175 1176 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1177 # If this is True and '(' is not found, the keyword will be treated as an identifier 1178 VALUES_FOLLOWED_BY_PAREN = True 1179 1180 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1181 SUPPORTS_IMPLICIT_UNNEST = False 1182 1183 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1184 INTERVAL_SPANS = True 1185 1186 # Whether a PARTITION clause can follow a table reference 1187 SUPPORTS_PARTITION_SELECTION = False 1188 1189 __slots__ = ( 1190 "error_level", 1191 "error_message_context", 1192 "max_errors", 1193 "dialect", 1194 "sql", 1195 "errors", 1196 "_tokens", 1197 "_index", 1198 "_curr", 1199 "_next", 1200 "_prev", 1201 "_prev_comments", 1202 ) 1203 1204 # Autofilled 1205 SHOW_TRIE: t.Dict = {} 1206 SET_TRIE: t.Dict = {} 1207 1208 def __init__( 1209 self, 1210 error_level: t.Optional[ErrorLevel] = None, 1211 error_message_context: int = 100, 1212 max_errors: int = 3, 1213 dialect: DialectType = None, 1214 ): 1215 from sqlglot.dialects import Dialect 1216 1217 self.error_level = error_level or ErrorLevel.IMMEDIATE 1218 self.error_message_context = error_message_context 1219 self.max_errors = max_errors 1220 self.dialect = Dialect.get_or_raise(dialect) 1221 self.reset() 1222 1223 def reset(self): 1224 self.sql = "" 1225 self.errors = [] 1226 self._tokens = [] 1227 self._index = 0 1228 self._curr = None 1229 self._next = None 1230 self._prev = None 1231 self._prev_comments = None 1232 1233 def parse( 1234 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1235 ) -> t.List[t.Optional[exp.Expression]]: 1236 """ 1237 Parses a list of tokens and returns a list of syntax trees, one tree 1238 per parsed SQL statement. 1239 1240 Args: 1241 raw_tokens: The list of tokens. 1242 sql: The original SQL string, used to produce helpful debug messages. 1243 1244 Returns: 1245 The list of the produced syntax trees. 1246 """ 1247 return self._parse( 1248 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1249 ) 1250 1251 def parse_into( 1252 self, 1253 expression_types: exp.IntoType, 1254 raw_tokens: t.List[Token], 1255 sql: t.Optional[str] = None, 1256 ) -> t.List[t.Optional[exp.Expression]]: 1257 """ 1258 Parses a list of tokens into a given Expression type. If a collection of Expression 1259 types is given instead, this method will try to parse the token list into each one 1260 of them, stopping at the first for which the parsing succeeds. 1261 1262 Args: 1263 expression_types: The expression type(s) to try and parse the token list into. 1264 raw_tokens: The list of tokens. 1265 sql: The original SQL string, used to produce helpful debug messages. 1266 1267 Returns: 1268 The target Expression. 1269 """ 1270 errors = [] 1271 for expression_type in ensure_list(expression_types): 1272 parser = self.EXPRESSION_PARSERS.get(expression_type) 1273 if not parser: 1274 raise TypeError(f"No parser registered for {expression_type}") 1275 1276 try: 1277 return self._parse(parser, raw_tokens, sql) 1278 except ParseError as e: 1279 e.errors[0]["into_expression"] = expression_type 1280 errors.append(e) 1281 1282 raise ParseError( 1283 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1284 errors=merge_errors(errors), 1285 ) from errors[-1] 1286 1287 def _parse( 1288 self, 1289 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1290 raw_tokens: t.List[Token], 1291 sql: t.Optional[str] = None, 1292 ) -> t.List[t.Optional[exp.Expression]]: 1293 self.reset() 1294 self.sql = sql or "" 1295 1296 total = len(raw_tokens) 1297 chunks: t.List[t.List[Token]] = [[]] 1298 1299 for i, token in enumerate(raw_tokens): 1300 if token.token_type == TokenType.SEMICOLON: 1301 if token.comments: 1302 chunks.append([token]) 1303 1304 if i < total - 1: 1305 chunks.append([]) 1306 else: 1307 chunks[-1].append(token) 1308 1309 expressions = [] 1310 1311 for tokens in chunks: 1312 self._index = -1 1313 self._tokens = tokens 1314 self._advance() 1315 1316 expressions.append(parse_method(self)) 1317 1318 if self._index < len(self._tokens): 1319 self.raise_error("Invalid expression / Unexpected token") 1320 1321 self.check_errors() 1322 1323 return expressions 1324 1325 def check_errors(self) -> None: 1326 """Logs or raises any found errors, depending on the chosen error level setting.""" 1327 if self.error_level == ErrorLevel.WARN: 1328 for error in self.errors: 1329 logger.error(str(error)) 1330 elif self.error_level == ErrorLevel.RAISE and self.errors: 1331 raise ParseError( 1332 concat_messages(self.errors, self.max_errors), 1333 errors=merge_errors(self.errors), 1334 ) 1335 1336 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1337 """ 1338 Appends an error in the list of recorded errors or raises it, depending on the chosen 1339 error level setting. 1340 """ 1341 token = token or self._curr or self._prev or Token.string("") 1342 start = token.start 1343 end = token.end + 1 1344 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1345 highlight = self.sql[start:end] 1346 end_context = self.sql[end : end + self.error_message_context] 1347 1348 error = ParseError.new( 1349 f"{message}. Line {token.line}, Col: {token.col}.\n" 1350 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1351 description=message, 1352 line=token.line, 1353 col=token.col, 1354 start_context=start_context, 1355 highlight=highlight, 1356 end_context=end_context, 1357 ) 1358 1359 if self.error_level == ErrorLevel.IMMEDIATE: 1360 raise error 1361 1362 self.errors.append(error) 1363 1364 def expression( 1365 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1366 ) -> E: 1367 """ 1368 Creates a new, validated Expression. 1369 1370 Args: 1371 exp_class: The expression class to instantiate. 1372 comments: An optional list of comments to attach to the expression. 1373 kwargs: The arguments to set for the expression along with their respective values. 1374 1375 Returns: 1376 The target expression. 1377 """ 1378 instance = exp_class(**kwargs) 1379 instance.add_comments(comments) if comments else self._add_comments(instance) 1380 return self.validate_expression(instance) 1381 1382 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1383 if expression and self._prev_comments: 1384 expression.add_comments(self._prev_comments) 1385 self._prev_comments = None 1386 1387 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1388 """ 1389 Validates an Expression, making sure that all its mandatory arguments are set. 1390 1391 Args: 1392 expression: The expression to validate. 1393 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1394 1395 Returns: 1396 The validated expression. 1397 """ 1398 if self.error_level != ErrorLevel.IGNORE: 1399 for error_message in expression.error_messages(args): 1400 self.raise_error(error_message) 1401 1402 return expression 1403 1404 def _find_sql(self, start: Token, end: Token) -> str: 1405 return self.sql[start.start : end.end + 1] 1406 1407 def _is_connected(self) -> bool: 1408 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1409 1410 def _advance(self, times: int = 1) -> None: 1411 self._index += times 1412 self._curr = seq_get(self._tokens, self._index) 1413 self._next = seq_get(self._tokens, self._index + 1) 1414 1415 if self._index > 0: 1416 self._prev = self._tokens[self._index - 1] 1417 self._prev_comments = self._prev.comments 1418 else: 1419 self._prev = None 1420 self._prev_comments = None 1421 1422 def _retreat(self, index: int) -> None: 1423 if index != self._index: 1424 self._advance(index - self._index) 1425 1426 def _warn_unsupported(self) -> None: 1427 if len(self._tokens) <= 1: 1428 return 1429 1430 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1431 # interested in emitting a warning for the one being currently processed. 1432 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1433 1434 logger.warning( 1435 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1436 ) 1437 1438 def _parse_command(self) -> exp.Command: 1439 self._warn_unsupported() 1440 return self.expression( 1441 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1442 ) 1443 1444 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1445 """ 1446 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1447 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1448 the parser state accordingly 1449 """ 1450 index = self._index 1451 error_level = self.error_level 1452 1453 self.error_level = ErrorLevel.IMMEDIATE 1454 try: 1455 this = parse_method() 1456 except ParseError: 1457 this = None 1458 finally: 1459 if not this or retreat: 1460 self._retreat(index) 1461 self.error_level = error_level 1462 1463 return this 1464 1465 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1466 start = self._prev 1467 exists = self._parse_exists() if allow_exists else None 1468 1469 self._match(TokenType.ON) 1470 1471 materialized = self._match_text_seq("MATERIALIZED") 1472 kind = self._match_set(self.CREATABLES) and self._prev 1473 if not kind: 1474 return self._parse_as_command(start) 1475 1476 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1477 this = self._parse_user_defined_function(kind=kind.token_type) 1478 elif kind.token_type == TokenType.TABLE: 1479 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1480 elif kind.token_type == TokenType.COLUMN: 1481 this = self._parse_column() 1482 else: 1483 this = self._parse_id_var() 1484 1485 self._match(TokenType.IS) 1486 1487 return self.expression( 1488 exp.Comment, 1489 this=this, 1490 kind=kind.text, 1491 expression=self._parse_string(), 1492 exists=exists, 1493 materialized=materialized, 1494 ) 1495 1496 def _parse_to_table( 1497 self, 1498 ) -> exp.ToTableProperty: 1499 table = self._parse_table_parts(schema=True) 1500 return self.expression(exp.ToTableProperty, this=table) 1501 1502 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1503 def _parse_ttl(self) -> exp.Expression: 1504 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1505 this = self._parse_bitwise() 1506 1507 if self._match_text_seq("DELETE"): 1508 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1509 if self._match_text_seq("RECOMPRESS"): 1510 return self.expression( 1511 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1512 ) 1513 if self._match_text_seq("TO", "DISK"): 1514 return self.expression( 1515 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1516 ) 1517 if self._match_text_seq("TO", "VOLUME"): 1518 return self.expression( 1519 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1520 ) 1521 1522 return this 1523 1524 expressions = self._parse_csv(_parse_ttl_action) 1525 where = self._parse_where() 1526 group = self._parse_group() 1527 1528 aggregates = None 1529 if group and self._match(TokenType.SET): 1530 aggregates = self._parse_csv(self._parse_set_item) 1531 1532 return self.expression( 1533 exp.MergeTreeTTL, 1534 expressions=expressions, 1535 where=where, 1536 group=group, 1537 aggregates=aggregates, 1538 ) 1539 1540 def _parse_statement(self) -> t.Optional[exp.Expression]: 1541 if self._curr is None: 1542 return None 1543 1544 if self._match_set(self.STATEMENT_PARSERS): 1545 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1546 1547 if self._match_set(self.dialect.tokenizer.COMMANDS): 1548 return self._parse_command() 1549 1550 expression = self._parse_expression() 1551 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1552 return self._parse_query_modifiers(expression) 1553 1554 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1555 start = self._prev 1556 temporary = self._match(TokenType.TEMPORARY) 1557 materialized = self._match_text_seq("MATERIALIZED") 1558 1559 kind = self._match_set(self.CREATABLES) and self._prev.text 1560 if not kind: 1561 return self._parse_as_command(start) 1562 1563 if_exists = exists or self._parse_exists() 1564 table = self._parse_table_parts( 1565 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1566 ) 1567 1568 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1569 1570 if self._match(TokenType.L_PAREN, advance=False): 1571 expressions = self._parse_wrapped_csv(self._parse_types) 1572 else: 1573 expressions = None 1574 1575 return self.expression( 1576 exp.Drop, 1577 comments=start.comments, 1578 exists=if_exists, 1579 this=table, 1580 expressions=expressions, 1581 kind=kind.upper(), 1582 temporary=temporary, 1583 materialized=materialized, 1584 cascade=self._match_text_seq("CASCADE"), 1585 constraints=self._match_text_seq("CONSTRAINTS"), 1586 purge=self._match_text_seq("PURGE"), 1587 cluster=cluster, 1588 ) 1589 1590 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1591 return ( 1592 self._match_text_seq("IF") 1593 and (not not_ or self._match(TokenType.NOT)) 1594 and self._match(TokenType.EXISTS) 1595 ) 1596 1597 def _parse_create(self) -> exp.Create | exp.Command: 1598 # Note: this can't be None because we've matched a statement parser 1599 start = self._prev 1600 comments = self._prev_comments 1601 1602 replace = ( 1603 start.token_type == TokenType.REPLACE 1604 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1605 or self._match_pair(TokenType.OR, TokenType.ALTER) 1606 ) 1607 1608 unique = self._match(TokenType.UNIQUE) 1609 1610 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1611 self._advance() 1612 1613 properties = None 1614 create_token = self._match_set(self.CREATABLES) and self._prev 1615 1616 if not create_token: 1617 # exp.Properties.Location.POST_CREATE 1618 properties = self._parse_properties() 1619 create_token = self._match_set(self.CREATABLES) and self._prev 1620 1621 if not properties or not create_token: 1622 return self._parse_as_command(start) 1623 1624 exists = self._parse_exists(not_=True) 1625 this = None 1626 expression: t.Optional[exp.Expression] = None 1627 indexes = None 1628 no_schema_binding = None 1629 begin = None 1630 end = None 1631 clone = None 1632 1633 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1634 nonlocal properties 1635 if properties and temp_props: 1636 properties.expressions.extend(temp_props.expressions) 1637 elif temp_props: 1638 properties = temp_props 1639 1640 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1641 this = self._parse_user_defined_function(kind=create_token.token_type) 1642 1643 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1644 extend_props(self._parse_properties()) 1645 1646 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1647 extend_props(self._parse_properties()) 1648 1649 if not expression: 1650 if self._match(TokenType.COMMAND): 1651 expression = self._parse_as_command(self._prev) 1652 else: 1653 begin = self._match(TokenType.BEGIN) 1654 return_ = self._match_text_seq("RETURN") 1655 1656 if self._match(TokenType.STRING, advance=False): 1657 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1658 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1659 expression = self._parse_string() 1660 extend_props(self._parse_properties()) 1661 else: 1662 expression = self._parse_statement() 1663 1664 end = self._match_text_seq("END") 1665 1666 if return_: 1667 expression = self.expression(exp.Return, this=expression) 1668 elif create_token.token_type == TokenType.INDEX: 1669 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1670 if not self._match(TokenType.ON): 1671 index = self._parse_id_var() 1672 anonymous = False 1673 else: 1674 index = None 1675 anonymous = True 1676 1677 this = self._parse_index(index=index, anonymous=anonymous) 1678 elif create_token.token_type in self.DB_CREATABLES: 1679 table_parts = self._parse_table_parts( 1680 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1681 ) 1682 1683 # exp.Properties.Location.POST_NAME 1684 self._match(TokenType.COMMA) 1685 extend_props(self._parse_properties(before=True)) 1686 1687 this = self._parse_schema(this=table_parts) 1688 1689 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1690 extend_props(self._parse_properties()) 1691 1692 self._match(TokenType.ALIAS) 1693 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1694 # exp.Properties.Location.POST_ALIAS 1695 extend_props(self._parse_properties()) 1696 1697 if create_token.token_type == TokenType.SEQUENCE: 1698 expression = self._parse_types() 1699 extend_props(self._parse_properties()) 1700 else: 1701 expression = self._parse_ddl_select() 1702 1703 if create_token.token_type == TokenType.TABLE: 1704 # exp.Properties.Location.POST_EXPRESSION 1705 extend_props(self._parse_properties()) 1706 1707 indexes = [] 1708 while True: 1709 index = self._parse_index() 1710 1711 # exp.Properties.Location.POST_INDEX 1712 extend_props(self._parse_properties()) 1713 1714 if not index: 1715 break 1716 else: 1717 self._match(TokenType.COMMA) 1718 indexes.append(index) 1719 elif create_token.token_type == TokenType.VIEW: 1720 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1721 no_schema_binding = True 1722 1723 shallow = self._match_text_seq("SHALLOW") 1724 1725 if self._match_texts(self.CLONE_KEYWORDS): 1726 copy = self._prev.text.lower() == "copy" 1727 clone = self.expression( 1728 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1729 ) 1730 1731 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1732 return self._parse_as_command(start) 1733 1734 return self.expression( 1735 exp.Create, 1736 comments=comments, 1737 this=this, 1738 kind=create_token.text.upper(), 1739 replace=replace, 1740 unique=unique, 1741 expression=expression, 1742 exists=exists, 1743 properties=properties, 1744 indexes=indexes, 1745 no_schema_binding=no_schema_binding, 1746 begin=begin, 1747 end=end, 1748 clone=clone, 1749 ) 1750 1751 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1752 seq = exp.SequenceProperties() 1753 1754 options = [] 1755 index = self._index 1756 1757 while self._curr: 1758 self._match(TokenType.COMMA) 1759 if self._match_text_seq("INCREMENT"): 1760 self._match_text_seq("BY") 1761 self._match_text_seq("=") 1762 seq.set("increment", self._parse_term()) 1763 elif self._match_text_seq("MINVALUE"): 1764 seq.set("minvalue", self._parse_term()) 1765 elif self._match_text_seq("MAXVALUE"): 1766 seq.set("maxvalue", self._parse_term()) 1767 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1768 self._match_text_seq("=") 1769 seq.set("start", self._parse_term()) 1770 elif self._match_text_seq("CACHE"): 1771 # T-SQL allows empty CACHE which is initialized dynamically 1772 seq.set("cache", self._parse_number() or True) 1773 elif self._match_text_seq("OWNED", "BY"): 1774 # "OWNED BY NONE" is the default 1775 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1776 else: 1777 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1778 if opt: 1779 options.append(opt) 1780 else: 1781 break 1782 1783 seq.set("options", options if options else None) 1784 return None if self._index == index else seq 1785 1786 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1787 # only used for teradata currently 1788 self._match(TokenType.COMMA) 1789 1790 kwargs = { 1791 "no": self._match_text_seq("NO"), 1792 "dual": self._match_text_seq("DUAL"), 1793 "before": self._match_text_seq("BEFORE"), 1794 "default": self._match_text_seq("DEFAULT"), 1795 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1796 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1797 "after": self._match_text_seq("AFTER"), 1798 "minimum": self._match_texts(("MIN", "MINIMUM")), 1799 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1800 } 1801 1802 if self._match_texts(self.PROPERTY_PARSERS): 1803 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1804 try: 1805 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1806 except TypeError: 1807 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1808 1809 return None 1810 1811 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1812 return self._parse_wrapped_csv(self._parse_property) 1813 1814 def _parse_property(self) -> t.Optional[exp.Expression]: 1815 if self._match_texts(self.PROPERTY_PARSERS): 1816 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1817 1818 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1819 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1820 1821 if self._match_text_seq("COMPOUND", "SORTKEY"): 1822 return self._parse_sortkey(compound=True) 1823 1824 if self._match_text_seq("SQL", "SECURITY"): 1825 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1826 1827 index = self._index 1828 key = self._parse_column() 1829 1830 if not self._match(TokenType.EQ): 1831 self._retreat(index) 1832 return self._parse_sequence_properties() 1833 1834 return self.expression( 1835 exp.Property, 1836 this=key.to_dot() if isinstance(key, exp.Column) else key, 1837 value=self._parse_bitwise() or self._parse_var(any_token=True), 1838 ) 1839 1840 def _parse_stored(self) -> exp.FileFormatProperty: 1841 self._match(TokenType.ALIAS) 1842 1843 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1844 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1845 1846 return self.expression( 1847 exp.FileFormatProperty, 1848 this=( 1849 self.expression( 1850 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1851 ) 1852 if input_format or output_format 1853 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1854 ), 1855 ) 1856 1857 def _parse_unquoted_field(self): 1858 field = self._parse_field() 1859 if isinstance(field, exp.Identifier) and not field.quoted: 1860 field = exp.var(field) 1861 1862 return field 1863 1864 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1865 self._match(TokenType.EQ) 1866 self._match(TokenType.ALIAS) 1867 1868 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1869 1870 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1871 properties = [] 1872 while True: 1873 if before: 1874 prop = self._parse_property_before() 1875 else: 1876 prop = self._parse_property() 1877 if not prop: 1878 break 1879 for p in ensure_list(prop): 1880 properties.append(p) 1881 1882 if properties: 1883 return self.expression(exp.Properties, expressions=properties) 1884 1885 return None 1886 1887 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1888 return self.expression( 1889 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1890 ) 1891 1892 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1893 if self._index >= 2: 1894 pre_volatile_token = self._tokens[self._index - 2] 1895 else: 1896 pre_volatile_token = None 1897 1898 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1899 return exp.VolatileProperty() 1900 1901 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1902 1903 def _parse_retention_period(self) -> exp.Var: 1904 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1905 number = self._parse_number() 1906 number_str = f"{number} " if number else "" 1907 unit = self._parse_var(any_token=True) 1908 return exp.var(f"{number_str}{unit}") 1909 1910 def _parse_system_versioning_property( 1911 self, with_: bool = False 1912 ) -> exp.WithSystemVersioningProperty: 1913 self._match(TokenType.EQ) 1914 prop = self.expression( 1915 exp.WithSystemVersioningProperty, 1916 **{ # type: ignore 1917 "on": True, 1918 "with": with_, 1919 }, 1920 ) 1921 1922 if self._match_text_seq("OFF"): 1923 prop.set("on", False) 1924 return prop 1925 1926 self._match(TokenType.ON) 1927 if self._match(TokenType.L_PAREN): 1928 while self._curr and not self._match(TokenType.R_PAREN): 1929 if self._match_text_seq("HISTORY_TABLE", "="): 1930 prop.set("this", self._parse_table_parts()) 1931 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1932 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1933 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1934 prop.set("retention_period", self._parse_retention_period()) 1935 1936 self._match(TokenType.COMMA) 1937 1938 return prop 1939 1940 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1941 self._match(TokenType.EQ) 1942 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1943 prop = self.expression(exp.DataDeletionProperty, on=on) 1944 1945 if self._match(TokenType.L_PAREN): 1946 while self._curr and not self._match(TokenType.R_PAREN): 1947 if self._match_text_seq("FILTER_COLUMN", "="): 1948 prop.set("filter_column", self._parse_column()) 1949 elif self._match_text_seq("RETENTION_PERIOD", "="): 1950 prop.set("retention_period", self._parse_retention_period()) 1951 1952 self._match(TokenType.COMMA) 1953 1954 return prop 1955 1956 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1957 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1958 prop = self._parse_system_versioning_property(with_=True) 1959 self._match_r_paren() 1960 return prop 1961 1962 if self._match(TokenType.L_PAREN, advance=False): 1963 return self._parse_wrapped_properties() 1964 1965 if self._match_text_seq("JOURNAL"): 1966 return self._parse_withjournaltable() 1967 1968 if self._match_texts(self.VIEW_ATTRIBUTES): 1969 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1970 1971 if self._match_text_seq("DATA"): 1972 return self._parse_withdata(no=False) 1973 elif self._match_text_seq("NO", "DATA"): 1974 return self._parse_withdata(no=True) 1975 1976 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1977 return self._parse_serde_properties(with_=True) 1978 1979 if not self._next: 1980 return None 1981 1982 return self._parse_withisolatedloading() 1983 1984 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1985 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1986 self._match(TokenType.EQ) 1987 1988 user = self._parse_id_var() 1989 self._match(TokenType.PARAMETER) 1990 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1991 1992 if not user or not host: 1993 return None 1994 1995 return exp.DefinerProperty(this=f"{user}@{host}") 1996 1997 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1998 self._match(TokenType.TABLE) 1999 self._match(TokenType.EQ) 2000 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2001 2002 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2003 return self.expression(exp.LogProperty, no=no) 2004 2005 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2006 return self.expression(exp.JournalProperty, **kwargs) 2007 2008 def _parse_checksum(self) -> exp.ChecksumProperty: 2009 self._match(TokenType.EQ) 2010 2011 on = None 2012 if self._match(TokenType.ON): 2013 on = True 2014 elif self._match_text_seq("OFF"): 2015 on = False 2016 2017 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2018 2019 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2020 return self.expression( 2021 exp.Cluster, 2022 expressions=( 2023 self._parse_wrapped_csv(self._parse_ordered) 2024 if wrapped 2025 else self._parse_csv(self._parse_ordered) 2026 ), 2027 ) 2028 2029 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2030 self._match_text_seq("BY") 2031 2032 self._match_l_paren() 2033 expressions = self._parse_csv(self._parse_column) 2034 self._match_r_paren() 2035 2036 if self._match_text_seq("SORTED", "BY"): 2037 self._match_l_paren() 2038 sorted_by = self._parse_csv(self._parse_ordered) 2039 self._match_r_paren() 2040 else: 2041 sorted_by = None 2042 2043 self._match(TokenType.INTO) 2044 buckets = self._parse_number() 2045 self._match_text_seq("BUCKETS") 2046 2047 return self.expression( 2048 exp.ClusteredByProperty, 2049 expressions=expressions, 2050 sorted_by=sorted_by, 2051 buckets=buckets, 2052 ) 2053 2054 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2055 if not self._match_text_seq("GRANTS"): 2056 self._retreat(self._index - 1) 2057 return None 2058 2059 return self.expression(exp.CopyGrantsProperty) 2060 2061 def _parse_freespace(self) -> exp.FreespaceProperty: 2062 self._match(TokenType.EQ) 2063 return self.expression( 2064 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2065 ) 2066 2067 def _parse_mergeblockratio( 2068 self, no: bool = False, default: bool = False 2069 ) -> exp.MergeBlockRatioProperty: 2070 if self._match(TokenType.EQ): 2071 return self.expression( 2072 exp.MergeBlockRatioProperty, 2073 this=self._parse_number(), 2074 percent=self._match(TokenType.PERCENT), 2075 ) 2076 2077 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2078 2079 def _parse_datablocksize( 2080 self, 2081 default: t.Optional[bool] = None, 2082 minimum: t.Optional[bool] = None, 2083 maximum: t.Optional[bool] = None, 2084 ) -> exp.DataBlocksizeProperty: 2085 self._match(TokenType.EQ) 2086 size = self._parse_number() 2087 2088 units = None 2089 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2090 units = self._prev.text 2091 2092 return self.expression( 2093 exp.DataBlocksizeProperty, 2094 size=size, 2095 units=units, 2096 default=default, 2097 minimum=minimum, 2098 maximum=maximum, 2099 ) 2100 2101 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2102 self._match(TokenType.EQ) 2103 always = self._match_text_seq("ALWAYS") 2104 manual = self._match_text_seq("MANUAL") 2105 never = self._match_text_seq("NEVER") 2106 default = self._match_text_seq("DEFAULT") 2107 2108 autotemp = None 2109 if self._match_text_seq("AUTOTEMP"): 2110 autotemp = self._parse_schema() 2111 2112 return self.expression( 2113 exp.BlockCompressionProperty, 2114 always=always, 2115 manual=manual, 2116 never=never, 2117 default=default, 2118 autotemp=autotemp, 2119 ) 2120 2121 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2122 index = self._index 2123 no = self._match_text_seq("NO") 2124 concurrent = self._match_text_seq("CONCURRENT") 2125 2126 if not self._match_text_seq("ISOLATED", "LOADING"): 2127 self._retreat(index) 2128 return None 2129 2130 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2131 return self.expression( 2132 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2133 ) 2134 2135 def _parse_locking(self) -> exp.LockingProperty: 2136 if self._match(TokenType.TABLE): 2137 kind = "TABLE" 2138 elif self._match(TokenType.VIEW): 2139 kind = "VIEW" 2140 elif self._match(TokenType.ROW): 2141 kind = "ROW" 2142 elif self._match_text_seq("DATABASE"): 2143 kind = "DATABASE" 2144 else: 2145 kind = None 2146 2147 if kind in ("DATABASE", "TABLE", "VIEW"): 2148 this = self._parse_table_parts() 2149 else: 2150 this = None 2151 2152 if self._match(TokenType.FOR): 2153 for_or_in = "FOR" 2154 elif self._match(TokenType.IN): 2155 for_or_in = "IN" 2156 else: 2157 for_or_in = None 2158 2159 if self._match_text_seq("ACCESS"): 2160 lock_type = "ACCESS" 2161 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2162 lock_type = "EXCLUSIVE" 2163 elif self._match_text_seq("SHARE"): 2164 lock_type = "SHARE" 2165 elif self._match_text_seq("READ"): 2166 lock_type = "READ" 2167 elif self._match_text_seq("WRITE"): 2168 lock_type = "WRITE" 2169 elif self._match_text_seq("CHECKSUM"): 2170 lock_type = "CHECKSUM" 2171 else: 2172 lock_type = None 2173 2174 override = self._match_text_seq("OVERRIDE") 2175 2176 return self.expression( 2177 exp.LockingProperty, 2178 this=this, 2179 kind=kind, 2180 for_or_in=for_or_in, 2181 lock_type=lock_type, 2182 override=override, 2183 ) 2184 2185 def _parse_partition_by(self) -> t.List[exp.Expression]: 2186 if self._match(TokenType.PARTITION_BY): 2187 return self._parse_csv(self._parse_conjunction) 2188 return [] 2189 2190 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2191 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2192 if self._match_text_seq("MINVALUE"): 2193 return exp.var("MINVALUE") 2194 if self._match_text_seq("MAXVALUE"): 2195 return exp.var("MAXVALUE") 2196 return self._parse_bitwise() 2197 2198 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2199 expression = None 2200 from_expressions = None 2201 to_expressions = None 2202 2203 if self._match(TokenType.IN): 2204 this = self._parse_wrapped_csv(self._parse_bitwise) 2205 elif self._match(TokenType.FROM): 2206 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2207 self._match_text_seq("TO") 2208 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2209 elif self._match_text_seq("WITH", "(", "MODULUS"): 2210 this = self._parse_number() 2211 self._match_text_seq(",", "REMAINDER") 2212 expression = self._parse_number() 2213 self._match_r_paren() 2214 else: 2215 self.raise_error("Failed to parse partition bound spec.") 2216 2217 return self.expression( 2218 exp.PartitionBoundSpec, 2219 this=this, 2220 expression=expression, 2221 from_expressions=from_expressions, 2222 to_expressions=to_expressions, 2223 ) 2224 2225 # https://www.postgresql.org/docs/current/sql-createtable.html 2226 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2227 if not self._match_text_seq("OF"): 2228 self._retreat(self._index - 1) 2229 return None 2230 2231 this = self._parse_table(schema=True) 2232 2233 if self._match(TokenType.DEFAULT): 2234 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2235 elif self._match_text_seq("FOR", "VALUES"): 2236 expression = self._parse_partition_bound_spec() 2237 else: 2238 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2239 2240 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2241 2242 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2243 self._match(TokenType.EQ) 2244 return self.expression( 2245 exp.PartitionedByProperty, 2246 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2247 ) 2248 2249 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2250 if self._match_text_seq("AND", "STATISTICS"): 2251 statistics = True 2252 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2253 statistics = False 2254 else: 2255 statistics = None 2256 2257 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2258 2259 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2260 if self._match_text_seq("SQL"): 2261 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2262 return None 2263 2264 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2265 if self._match_text_seq("SQL", "DATA"): 2266 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2267 return None 2268 2269 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2270 if self._match_text_seq("PRIMARY", "INDEX"): 2271 return exp.NoPrimaryIndexProperty() 2272 if self._match_text_seq("SQL"): 2273 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2274 return None 2275 2276 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2277 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2278 return exp.OnCommitProperty() 2279 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2280 return exp.OnCommitProperty(delete=True) 2281 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2282 2283 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2284 if self._match_text_seq("SQL", "DATA"): 2285 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2286 return None 2287 2288 def _parse_distkey(self) -> exp.DistKeyProperty: 2289 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2290 2291 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2292 table = self._parse_table(schema=True) 2293 2294 options = [] 2295 while self._match_texts(("INCLUDING", "EXCLUDING")): 2296 this = self._prev.text.upper() 2297 2298 id_var = self._parse_id_var() 2299 if not id_var: 2300 return None 2301 2302 options.append( 2303 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2304 ) 2305 2306 return self.expression(exp.LikeProperty, this=table, expressions=options) 2307 2308 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2309 return self.expression( 2310 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2311 ) 2312 2313 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2314 self._match(TokenType.EQ) 2315 return self.expression( 2316 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2317 ) 2318 2319 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2320 self._match_text_seq("WITH", "CONNECTION") 2321 return self.expression( 2322 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2323 ) 2324 2325 def _parse_returns(self) -> exp.ReturnsProperty: 2326 value: t.Optional[exp.Expression] 2327 null = None 2328 is_table = self._match(TokenType.TABLE) 2329 2330 if is_table: 2331 if self._match(TokenType.LT): 2332 value = self.expression( 2333 exp.Schema, 2334 this="TABLE", 2335 expressions=self._parse_csv(self._parse_struct_types), 2336 ) 2337 if not self._match(TokenType.GT): 2338 self.raise_error("Expecting >") 2339 else: 2340 value = self._parse_schema(exp.var("TABLE")) 2341 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2342 null = True 2343 value = None 2344 else: 2345 value = self._parse_types() 2346 2347 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2348 2349 def _parse_describe(self) -> exp.Describe: 2350 kind = self._match_set(self.CREATABLES) and self._prev.text 2351 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2352 if self._match(TokenType.DOT): 2353 style = None 2354 self._retreat(self._index - 2) 2355 this = self._parse_table(schema=True) 2356 properties = self._parse_properties() 2357 expressions = properties.expressions if properties else None 2358 return self.expression( 2359 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2360 ) 2361 2362 def _parse_insert(self) -> exp.Insert: 2363 comments = ensure_list(self._prev_comments) 2364 hint = self._parse_hint() 2365 overwrite = self._match(TokenType.OVERWRITE) 2366 ignore = self._match(TokenType.IGNORE) 2367 local = self._match_text_seq("LOCAL") 2368 alternative = None 2369 is_function = None 2370 2371 if self._match_text_seq("DIRECTORY"): 2372 this: t.Optional[exp.Expression] = self.expression( 2373 exp.Directory, 2374 this=self._parse_var_or_string(), 2375 local=local, 2376 row_format=self._parse_row_format(match_row=True), 2377 ) 2378 else: 2379 if self._match(TokenType.OR): 2380 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2381 2382 self._match(TokenType.INTO) 2383 comments += ensure_list(self._prev_comments) 2384 self._match(TokenType.TABLE) 2385 is_function = self._match(TokenType.FUNCTION) 2386 2387 this = ( 2388 self._parse_table(schema=True, parse_partition=True) 2389 if not is_function 2390 else self._parse_function() 2391 ) 2392 2393 returning = self._parse_returning() 2394 2395 return self.expression( 2396 exp.Insert, 2397 comments=comments, 2398 hint=hint, 2399 is_function=is_function, 2400 this=this, 2401 stored=self._match_text_seq("STORED") and self._parse_stored(), 2402 by_name=self._match_text_seq("BY", "NAME"), 2403 exists=self._parse_exists(), 2404 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2405 and self._parse_conjunction(), 2406 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2407 conflict=self._parse_on_conflict(), 2408 returning=returning or self._parse_returning(), 2409 overwrite=overwrite, 2410 alternative=alternative, 2411 ignore=ignore, 2412 ) 2413 2414 def _parse_kill(self) -> exp.Kill: 2415 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2416 2417 return self.expression( 2418 exp.Kill, 2419 this=self._parse_primary(), 2420 kind=kind, 2421 ) 2422 2423 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2424 conflict = self._match_text_seq("ON", "CONFLICT") 2425 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2426 2427 if not conflict and not duplicate: 2428 return None 2429 2430 conflict_keys = None 2431 constraint = None 2432 2433 if conflict: 2434 if self._match_text_seq("ON", "CONSTRAINT"): 2435 constraint = self._parse_id_var() 2436 elif self._match(TokenType.L_PAREN): 2437 conflict_keys = self._parse_csv(self._parse_id_var) 2438 self._match_r_paren() 2439 2440 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2441 if self._prev.token_type == TokenType.UPDATE: 2442 self._match(TokenType.SET) 2443 expressions = self._parse_csv(self._parse_equality) 2444 else: 2445 expressions = None 2446 2447 return self.expression( 2448 exp.OnConflict, 2449 duplicate=duplicate, 2450 expressions=expressions, 2451 action=action, 2452 conflict_keys=conflict_keys, 2453 constraint=constraint, 2454 ) 2455 2456 def _parse_returning(self) -> t.Optional[exp.Returning]: 2457 if not self._match(TokenType.RETURNING): 2458 return None 2459 return self.expression( 2460 exp.Returning, 2461 expressions=self._parse_csv(self._parse_expression), 2462 into=self._match(TokenType.INTO) and self._parse_table_part(), 2463 ) 2464 2465 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2466 if not self._match(TokenType.FORMAT): 2467 return None 2468 return self._parse_row_format() 2469 2470 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2471 index = self._index 2472 with_ = with_ or self._match_text_seq("WITH") 2473 2474 if not self._match(TokenType.SERDE_PROPERTIES): 2475 self._retreat(index) 2476 return None 2477 return self.expression( 2478 exp.SerdeProperties, 2479 **{ # type: ignore 2480 "expressions": self._parse_wrapped_properties(), 2481 "with": with_, 2482 }, 2483 ) 2484 2485 def _parse_row_format( 2486 self, match_row: bool = False 2487 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2488 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2489 return None 2490 2491 if self._match_text_seq("SERDE"): 2492 this = self._parse_string() 2493 2494 serde_properties = self._parse_serde_properties() 2495 2496 return self.expression( 2497 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2498 ) 2499 2500 self._match_text_seq("DELIMITED") 2501 2502 kwargs = {} 2503 2504 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2505 kwargs["fields"] = self._parse_string() 2506 if self._match_text_seq("ESCAPED", "BY"): 2507 kwargs["escaped"] = self._parse_string() 2508 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2509 kwargs["collection_items"] = self._parse_string() 2510 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2511 kwargs["map_keys"] = self._parse_string() 2512 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2513 kwargs["lines"] = self._parse_string() 2514 if self._match_text_seq("NULL", "DEFINED", "AS"): 2515 kwargs["null"] = self._parse_string() 2516 2517 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2518 2519 def _parse_load(self) -> exp.LoadData | exp.Command: 2520 if self._match_text_seq("DATA"): 2521 local = self._match_text_seq("LOCAL") 2522 self._match_text_seq("INPATH") 2523 inpath = self._parse_string() 2524 overwrite = self._match(TokenType.OVERWRITE) 2525 self._match_pair(TokenType.INTO, TokenType.TABLE) 2526 2527 return self.expression( 2528 exp.LoadData, 2529 this=self._parse_table(schema=True), 2530 local=local, 2531 overwrite=overwrite, 2532 inpath=inpath, 2533 partition=self._parse_partition(), 2534 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2535 serde=self._match_text_seq("SERDE") and self._parse_string(), 2536 ) 2537 return self._parse_as_command(self._prev) 2538 2539 def _parse_delete(self) -> exp.Delete: 2540 # This handles MySQL's "Multiple-Table Syntax" 2541 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2542 tables = None 2543 comments = self._prev_comments 2544 if not self._match(TokenType.FROM, advance=False): 2545 tables = self._parse_csv(self._parse_table) or None 2546 2547 returning = self._parse_returning() 2548 2549 return self.expression( 2550 exp.Delete, 2551 comments=comments, 2552 tables=tables, 2553 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2554 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2555 where=self._parse_where(), 2556 returning=returning or self._parse_returning(), 2557 limit=self._parse_limit(), 2558 ) 2559 2560 def _parse_update(self) -> exp.Update: 2561 comments = self._prev_comments 2562 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2563 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2564 returning = self._parse_returning() 2565 return self.expression( 2566 exp.Update, 2567 comments=comments, 2568 **{ # type: ignore 2569 "this": this, 2570 "expressions": expressions, 2571 "from": self._parse_from(joins=True), 2572 "where": self._parse_where(), 2573 "returning": returning or self._parse_returning(), 2574 "order": self._parse_order(), 2575 "limit": self._parse_limit(), 2576 }, 2577 ) 2578 2579 def _parse_uncache(self) -> exp.Uncache: 2580 if not self._match(TokenType.TABLE): 2581 self.raise_error("Expecting TABLE after UNCACHE") 2582 2583 return self.expression( 2584 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2585 ) 2586 2587 def _parse_cache(self) -> exp.Cache: 2588 lazy = self._match_text_seq("LAZY") 2589 self._match(TokenType.TABLE) 2590 table = self._parse_table(schema=True) 2591 2592 options = [] 2593 if self._match_text_seq("OPTIONS"): 2594 self._match_l_paren() 2595 k = self._parse_string() 2596 self._match(TokenType.EQ) 2597 v = self._parse_string() 2598 options = [k, v] 2599 self._match_r_paren() 2600 2601 self._match(TokenType.ALIAS) 2602 return self.expression( 2603 exp.Cache, 2604 this=table, 2605 lazy=lazy, 2606 options=options, 2607 expression=self._parse_select(nested=True), 2608 ) 2609 2610 def _parse_partition(self) -> t.Optional[exp.Partition]: 2611 if not self._match(TokenType.PARTITION): 2612 return None 2613 2614 return self.expression( 2615 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2616 ) 2617 2618 def _parse_value(self) -> t.Optional[exp.Tuple]: 2619 if self._match(TokenType.L_PAREN): 2620 expressions = self._parse_csv(self._parse_expression) 2621 self._match_r_paren() 2622 return self.expression(exp.Tuple, expressions=expressions) 2623 2624 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2625 expression = self._parse_expression() 2626 if expression: 2627 return self.expression(exp.Tuple, expressions=[expression]) 2628 return None 2629 2630 def _parse_projections(self) -> t.List[exp.Expression]: 2631 return self._parse_expressions() 2632 2633 def _parse_select( 2634 self, 2635 nested: bool = False, 2636 table: bool = False, 2637 parse_subquery_alias: bool = True, 2638 parse_set_operation: bool = True, 2639 ) -> t.Optional[exp.Expression]: 2640 cte = self._parse_with() 2641 2642 if cte: 2643 this = self._parse_statement() 2644 2645 if not this: 2646 self.raise_error("Failed to parse any statement following CTE") 2647 return cte 2648 2649 if "with" in this.arg_types: 2650 this.set("with", cte) 2651 else: 2652 self.raise_error(f"{this.key} does not support CTE") 2653 this = cte 2654 2655 return this 2656 2657 # duckdb supports leading with FROM x 2658 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2659 2660 if self._match(TokenType.SELECT): 2661 comments = self._prev_comments 2662 2663 hint = self._parse_hint() 2664 all_ = self._match(TokenType.ALL) 2665 distinct = self._match_set(self.DISTINCT_TOKENS) 2666 2667 kind = ( 2668 self._match(TokenType.ALIAS) 2669 and self._match_texts(("STRUCT", "VALUE")) 2670 and self._prev.text.upper() 2671 ) 2672 2673 if distinct: 2674 distinct = self.expression( 2675 exp.Distinct, 2676 on=self._parse_value() if self._match(TokenType.ON) else None, 2677 ) 2678 2679 if all_ and distinct: 2680 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2681 2682 limit = self._parse_limit(top=True) 2683 projections = self._parse_projections() 2684 2685 this = self.expression( 2686 exp.Select, 2687 kind=kind, 2688 hint=hint, 2689 distinct=distinct, 2690 expressions=projections, 2691 limit=limit, 2692 ) 2693 this.comments = comments 2694 2695 into = self._parse_into() 2696 if into: 2697 this.set("into", into) 2698 2699 if not from_: 2700 from_ = self._parse_from() 2701 2702 if from_: 2703 this.set("from", from_) 2704 2705 this = self._parse_query_modifiers(this) 2706 elif (table or nested) and self._match(TokenType.L_PAREN): 2707 if self._match(TokenType.PIVOT): 2708 this = self._parse_simplified_pivot() 2709 elif self._match(TokenType.FROM): 2710 this = exp.select("*").from_( 2711 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2712 ) 2713 else: 2714 this = ( 2715 self._parse_table() 2716 if table 2717 else self._parse_select(nested=True, parse_set_operation=False) 2718 ) 2719 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2720 2721 self._match_r_paren() 2722 2723 # We return early here so that the UNION isn't attached to the subquery by the 2724 # following call to _parse_set_operations, but instead becomes the parent node 2725 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2726 elif self._match(TokenType.VALUES, advance=False): 2727 this = self._parse_derived_table_values() 2728 elif from_: 2729 this = exp.select("*").from_(from_.this, copy=False) 2730 else: 2731 this = None 2732 2733 if parse_set_operation: 2734 return self._parse_set_operations(this) 2735 return this 2736 2737 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2738 if not skip_with_token and not self._match(TokenType.WITH): 2739 return None 2740 2741 comments = self._prev_comments 2742 recursive = self._match(TokenType.RECURSIVE) 2743 2744 expressions = [] 2745 while True: 2746 expressions.append(self._parse_cte()) 2747 2748 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2749 break 2750 else: 2751 self._match(TokenType.WITH) 2752 2753 return self.expression( 2754 exp.With, comments=comments, expressions=expressions, recursive=recursive 2755 ) 2756 2757 def _parse_cte(self) -> exp.CTE: 2758 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2759 if not alias or not alias.this: 2760 self.raise_error("Expected CTE to have alias") 2761 2762 self._match(TokenType.ALIAS) 2763 2764 if self._match_text_seq("NOT", "MATERIALIZED"): 2765 materialized = False 2766 elif self._match_text_seq("MATERIALIZED"): 2767 materialized = True 2768 else: 2769 materialized = None 2770 2771 return self.expression( 2772 exp.CTE, 2773 this=self._parse_wrapped(self._parse_statement), 2774 alias=alias, 2775 materialized=materialized, 2776 ) 2777 2778 def _parse_table_alias( 2779 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2780 ) -> t.Optional[exp.TableAlias]: 2781 any_token = self._match(TokenType.ALIAS) 2782 alias = ( 2783 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2784 or self._parse_string_as_identifier() 2785 ) 2786 2787 index = self._index 2788 if self._match(TokenType.L_PAREN): 2789 columns = self._parse_csv(self._parse_function_parameter) 2790 self._match_r_paren() if columns else self._retreat(index) 2791 else: 2792 columns = None 2793 2794 if not alias and not columns: 2795 return None 2796 2797 return self.expression(exp.TableAlias, this=alias, columns=columns) 2798 2799 def _parse_subquery( 2800 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2801 ) -> t.Optional[exp.Subquery]: 2802 if not this: 2803 return None 2804 2805 return self.expression( 2806 exp.Subquery, 2807 this=this, 2808 pivots=self._parse_pivots(), 2809 alias=self._parse_table_alias() if parse_alias else None, 2810 ) 2811 2812 def _implicit_unnests_to_explicit(self, this: E) -> E: 2813 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2814 2815 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2816 for i, join in enumerate(this.args.get("joins") or []): 2817 table = join.this 2818 normalized_table = table.copy() 2819 normalized_table.meta["maybe_column"] = True 2820 normalized_table = _norm(normalized_table, dialect=self.dialect) 2821 2822 if isinstance(table, exp.Table) and not join.args.get("on"): 2823 if normalized_table.parts[0].name in refs: 2824 table_as_column = table.to_column() 2825 unnest = exp.Unnest(expressions=[table_as_column]) 2826 2827 # Table.to_column creates a parent Alias node that we want to convert to 2828 # a TableAlias and attach to the Unnest, so it matches the parser's output 2829 if isinstance(table.args.get("alias"), exp.TableAlias): 2830 table_as_column.replace(table_as_column.this) 2831 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2832 2833 table.replace(unnest) 2834 2835 refs.add(normalized_table.alias_or_name) 2836 2837 return this 2838 2839 def _parse_query_modifiers( 2840 self, this: t.Optional[exp.Expression] 2841 ) -> t.Optional[exp.Expression]: 2842 if isinstance(this, (exp.Query, exp.Table)): 2843 for join in self._parse_joins(): 2844 this.append("joins", join) 2845 for lateral in iter(self._parse_lateral, None): 2846 this.append("laterals", lateral) 2847 2848 while True: 2849 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2850 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2851 key, expression = parser(self) 2852 2853 if expression: 2854 this.set(key, expression) 2855 if key == "limit": 2856 offset = expression.args.pop("offset", None) 2857 2858 if offset: 2859 offset = exp.Offset(expression=offset) 2860 this.set("offset", offset) 2861 2862 limit_by_expressions = expression.expressions 2863 expression.set("expressions", None) 2864 offset.set("expressions", limit_by_expressions) 2865 continue 2866 break 2867 2868 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2869 this = self._implicit_unnests_to_explicit(this) 2870 2871 return this 2872 2873 def _parse_hint(self) -> t.Optional[exp.Hint]: 2874 if self._match(TokenType.HINT): 2875 hints = [] 2876 for hint in iter( 2877 lambda: self._parse_csv( 2878 lambda: self._parse_function() or self._parse_var(upper=True) 2879 ), 2880 [], 2881 ): 2882 hints.extend(hint) 2883 2884 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2885 self.raise_error("Expected */ after HINT") 2886 2887 return self.expression(exp.Hint, expressions=hints) 2888 2889 return None 2890 2891 def _parse_into(self) -> t.Optional[exp.Into]: 2892 if not self._match(TokenType.INTO): 2893 return None 2894 2895 temp = self._match(TokenType.TEMPORARY) 2896 unlogged = self._match_text_seq("UNLOGGED") 2897 self._match(TokenType.TABLE) 2898 2899 return self.expression( 2900 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2901 ) 2902 2903 def _parse_from( 2904 self, joins: bool = False, skip_from_token: bool = False 2905 ) -> t.Optional[exp.From]: 2906 if not skip_from_token and not self._match(TokenType.FROM): 2907 return None 2908 2909 return self.expression( 2910 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2911 ) 2912 2913 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2914 return self.expression( 2915 exp.MatchRecognizeMeasure, 2916 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2917 this=self._parse_expression(), 2918 ) 2919 2920 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2921 if not self._match(TokenType.MATCH_RECOGNIZE): 2922 return None 2923 2924 self._match_l_paren() 2925 2926 partition = self._parse_partition_by() 2927 order = self._parse_order() 2928 2929 measures = ( 2930 self._parse_csv(self._parse_match_recognize_measure) 2931 if self._match_text_seq("MEASURES") 2932 else None 2933 ) 2934 2935 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2936 rows = exp.var("ONE ROW PER MATCH") 2937 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2938 text = "ALL ROWS PER MATCH" 2939 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2940 text += " SHOW EMPTY MATCHES" 2941 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2942 text += " OMIT EMPTY MATCHES" 2943 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2944 text += " WITH UNMATCHED ROWS" 2945 rows = exp.var(text) 2946 else: 2947 rows = None 2948 2949 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2950 text = "AFTER MATCH SKIP" 2951 if self._match_text_seq("PAST", "LAST", "ROW"): 2952 text += " PAST LAST ROW" 2953 elif self._match_text_seq("TO", "NEXT", "ROW"): 2954 text += " TO NEXT ROW" 2955 elif self._match_text_seq("TO", "FIRST"): 2956 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2957 elif self._match_text_seq("TO", "LAST"): 2958 text += f" TO LAST {self._advance_any().text}" # type: ignore 2959 after = exp.var(text) 2960 else: 2961 after = None 2962 2963 if self._match_text_seq("PATTERN"): 2964 self._match_l_paren() 2965 2966 if not self._curr: 2967 self.raise_error("Expecting )", self._curr) 2968 2969 paren = 1 2970 start = self._curr 2971 2972 while self._curr and paren > 0: 2973 if self._curr.token_type == TokenType.L_PAREN: 2974 paren += 1 2975 if self._curr.token_type == TokenType.R_PAREN: 2976 paren -= 1 2977 2978 end = self._prev 2979 self._advance() 2980 2981 if paren > 0: 2982 self.raise_error("Expecting )", self._curr) 2983 2984 pattern = exp.var(self._find_sql(start, end)) 2985 else: 2986 pattern = None 2987 2988 define = ( 2989 self._parse_csv(self._parse_name_as_expression) 2990 if self._match_text_seq("DEFINE") 2991 else None 2992 ) 2993 2994 self._match_r_paren() 2995 2996 return self.expression( 2997 exp.MatchRecognize, 2998 partition_by=partition, 2999 order=order, 3000 measures=measures, 3001 rows=rows, 3002 after=after, 3003 pattern=pattern, 3004 define=define, 3005 alias=self._parse_table_alias(), 3006 ) 3007 3008 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3009 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3010 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3011 cross_apply = False 3012 3013 if cross_apply is not None: 3014 this = self._parse_select(table=True) 3015 view = None 3016 outer = None 3017 elif self._match(TokenType.LATERAL): 3018 this = self._parse_select(table=True) 3019 view = self._match(TokenType.VIEW) 3020 outer = self._match(TokenType.OUTER) 3021 else: 3022 return None 3023 3024 if not this: 3025 this = ( 3026 self._parse_unnest() 3027 or self._parse_function() 3028 or self._parse_id_var(any_token=False) 3029 ) 3030 3031 while self._match(TokenType.DOT): 3032 this = exp.Dot( 3033 this=this, 3034 expression=self._parse_function() or self._parse_id_var(any_token=False), 3035 ) 3036 3037 if view: 3038 table = self._parse_id_var(any_token=False) 3039 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3040 table_alias: t.Optional[exp.TableAlias] = self.expression( 3041 exp.TableAlias, this=table, columns=columns 3042 ) 3043 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3044 # We move the alias from the lateral's child node to the lateral itself 3045 table_alias = this.args["alias"].pop() 3046 else: 3047 table_alias = self._parse_table_alias() 3048 3049 return self.expression( 3050 exp.Lateral, 3051 this=this, 3052 view=view, 3053 outer=outer, 3054 alias=table_alias, 3055 cross_apply=cross_apply, 3056 ) 3057 3058 def _parse_join_parts( 3059 self, 3060 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3061 return ( 3062 self._match_set(self.JOIN_METHODS) and self._prev, 3063 self._match_set(self.JOIN_SIDES) and self._prev, 3064 self._match_set(self.JOIN_KINDS) and self._prev, 3065 ) 3066 3067 def _parse_join( 3068 self, skip_join_token: bool = False, parse_bracket: bool = False 3069 ) -> t.Optional[exp.Join]: 3070 if self._match(TokenType.COMMA): 3071 return self.expression(exp.Join, this=self._parse_table()) 3072 3073 index = self._index 3074 method, side, kind = self._parse_join_parts() 3075 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3076 join = self._match(TokenType.JOIN) 3077 3078 if not skip_join_token and not join: 3079 self._retreat(index) 3080 kind = None 3081 method = None 3082 side = None 3083 3084 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3085 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3086 3087 if not skip_join_token and not join and not outer_apply and not cross_apply: 3088 return None 3089 3090 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3091 3092 if method: 3093 kwargs["method"] = method.text 3094 if side: 3095 kwargs["side"] = side.text 3096 if kind: 3097 kwargs["kind"] = kind.text 3098 if hint: 3099 kwargs["hint"] = hint 3100 3101 if self._match(TokenType.MATCH_CONDITION): 3102 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3103 3104 if self._match(TokenType.ON): 3105 kwargs["on"] = self._parse_conjunction() 3106 elif self._match(TokenType.USING): 3107 kwargs["using"] = self._parse_wrapped_id_vars() 3108 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3109 kind and kind.token_type == TokenType.CROSS 3110 ): 3111 index = self._index 3112 joins: t.Optional[list] = list(self._parse_joins()) 3113 3114 if joins and self._match(TokenType.ON): 3115 kwargs["on"] = self._parse_conjunction() 3116 elif joins and self._match(TokenType.USING): 3117 kwargs["using"] = self._parse_wrapped_id_vars() 3118 else: 3119 joins = None 3120 self._retreat(index) 3121 3122 kwargs["this"].set("joins", joins if joins else None) 3123 3124 comments = [c for token in (method, side, kind) if token for c in token.comments] 3125 return self.expression(exp.Join, comments=comments, **kwargs) 3126 3127 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3128 this = self._parse_conjunction() 3129 3130 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3131 return this 3132 3133 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3134 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3135 3136 return this 3137 3138 def _parse_index_params(self) -> exp.IndexParameters: 3139 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3140 3141 if self._match(TokenType.L_PAREN, advance=False): 3142 columns = self._parse_wrapped_csv(self._parse_with_operator) 3143 else: 3144 columns = None 3145 3146 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3147 partition_by = self._parse_partition_by() 3148 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3149 tablespace = ( 3150 self._parse_var(any_token=True) 3151 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3152 else None 3153 ) 3154 where = self._parse_where() 3155 3156 return self.expression( 3157 exp.IndexParameters, 3158 using=using, 3159 columns=columns, 3160 include=include, 3161 partition_by=partition_by, 3162 where=where, 3163 with_storage=with_storage, 3164 tablespace=tablespace, 3165 ) 3166 3167 def _parse_index( 3168 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3169 ) -> t.Optional[exp.Index]: 3170 if index or anonymous: 3171 unique = None 3172 primary = None 3173 amp = None 3174 3175 self._match(TokenType.ON) 3176 self._match(TokenType.TABLE) # hive 3177 table = self._parse_table_parts(schema=True) 3178 else: 3179 unique = self._match(TokenType.UNIQUE) 3180 primary = self._match_text_seq("PRIMARY") 3181 amp = self._match_text_seq("AMP") 3182 3183 if not self._match(TokenType.INDEX): 3184 return None 3185 3186 index = self._parse_id_var() 3187 table = None 3188 3189 params = self._parse_index_params() 3190 3191 return self.expression( 3192 exp.Index, 3193 this=index, 3194 table=table, 3195 unique=unique, 3196 primary=primary, 3197 amp=amp, 3198 params=params, 3199 ) 3200 3201 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3202 hints: t.List[exp.Expression] = [] 3203 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3204 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3205 hints.append( 3206 self.expression( 3207 exp.WithTableHint, 3208 expressions=self._parse_csv( 3209 lambda: self._parse_function() or self._parse_var(any_token=True) 3210 ), 3211 ) 3212 ) 3213 self._match_r_paren() 3214 else: 3215 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3216 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3217 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3218 3219 self._match_texts(("INDEX", "KEY")) 3220 if self._match(TokenType.FOR): 3221 hint.set("target", self._advance_any() and self._prev.text.upper()) 3222 3223 hint.set("expressions", self._parse_wrapped_id_vars()) 3224 hints.append(hint) 3225 3226 return hints or None 3227 3228 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3229 return ( 3230 (not schema and self._parse_function(optional_parens=False)) 3231 or self._parse_id_var(any_token=False) 3232 or self._parse_string_as_identifier() 3233 or self._parse_placeholder() 3234 ) 3235 3236 def _parse_table_parts( 3237 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3238 ) -> exp.Table: 3239 catalog = None 3240 db = None 3241 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3242 3243 while self._match(TokenType.DOT): 3244 if catalog: 3245 # This allows nesting the table in arbitrarily many dot expressions if needed 3246 table = self.expression( 3247 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3248 ) 3249 else: 3250 catalog = db 3251 db = table 3252 # "" used for tsql FROM a..b case 3253 table = self._parse_table_part(schema=schema) or "" 3254 3255 if ( 3256 wildcard 3257 and self._is_connected() 3258 and (isinstance(table, exp.Identifier) or not table) 3259 and self._match(TokenType.STAR) 3260 ): 3261 if isinstance(table, exp.Identifier): 3262 table.args["this"] += "*" 3263 else: 3264 table = exp.Identifier(this="*") 3265 3266 # We bubble up comments from the Identifier to the Table 3267 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3268 3269 if is_db_reference: 3270 catalog = db 3271 db = table 3272 table = None 3273 3274 if not table and not is_db_reference: 3275 self.raise_error(f"Expected table name but got {self._curr}") 3276 if not db and is_db_reference: 3277 self.raise_error(f"Expected database name but got {self._curr}") 3278 3279 return self.expression( 3280 exp.Table, 3281 comments=comments, 3282 this=table, 3283 db=db, 3284 catalog=catalog, 3285 pivots=self._parse_pivots(), 3286 ) 3287 3288 def _parse_table( 3289 self, 3290 schema: bool = False, 3291 joins: bool = False, 3292 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3293 parse_bracket: bool = False, 3294 is_db_reference: bool = False, 3295 parse_partition: bool = False, 3296 ) -> t.Optional[exp.Expression]: 3297 lateral = self._parse_lateral() 3298 if lateral: 3299 return lateral 3300 3301 unnest = self._parse_unnest() 3302 if unnest: 3303 return unnest 3304 3305 values = self._parse_derived_table_values() 3306 if values: 3307 return values 3308 3309 subquery = self._parse_select(table=True) 3310 if subquery: 3311 if not subquery.args.get("pivots"): 3312 subquery.set("pivots", self._parse_pivots()) 3313 return subquery 3314 3315 bracket = parse_bracket and self._parse_bracket(None) 3316 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3317 3318 only = self._match(TokenType.ONLY) 3319 3320 this = t.cast( 3321 exp.Expression, 3322 bracket 3323 or self._parse_bracket( 3324 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3325 ), 3326 ) 3327 3328 if only: 3329 this.set("only", only) 3330 3331 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3332 self._match_text_seq("*") 3333 3334 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3335 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3336 this.set("partition", self._parse_partition()) 3337 3338 if schema: 3339 return self._parse_schema(this=this) 3340 3341 version = self._parse_version() 3342 3343 if version: 3344 this.set("version", version) 3345 3346 if self.dialect.ALIAS_POST_TABLESAMPLE: 3347 table_sample = self._parse_table_sample() 3348 3349 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3350 if alias: 3351 this.set("alias", alias) 3352 3353 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3354 return self.expression( 3355 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3356 ) 3357 3358 this.set("hints", self._parse_table_hints()) 3359 3360 if not this.args.get("pivots"): 3361 this.set("pivots", self._parse_pivots()) 3362 3363 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3364 table_sample = self._parse_table_sample() 3365 3366 if table_sample: 3367 table_sample.set("this", this) 3368 this = table_sample 3369 3370 if joins: 3371 for join in self._parse_joins(): 3372 this.append("joins", join) 3373 3374 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3375 this.set("ordinality", True) 3376 this.set("alias", self._parse_table_alias()) 3377 3378 return this 3379 3380 def _parse_version(self) -> t.Optional[exp.Version]: 3381 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3382 this = "TIMESTAMP" 3383 elif self._match(TokenType.VERSION_SNAPSHOT): 3384 this = "VERSION" 3385 else: 3386 return None 3387 3388 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3389 kind = self._prev.text.upper() 3390 start = self._parse_bitwise() 3391 self._match_texts(("TO", "AND")) 3392 end = self._parse_bitwise() 3393 expression: t.Optional[exp.Expression] = self.expression( 3394 exp.Tuple, expressions=[start, end] 3395 ) 3396 elif self._match_text_seq("CONTAINED", "IN"): 3397 kind = "CONTAINED IN" 3398 expression = self.expression( 3399 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3400 ) 3401 elif self._match(TokenType.ALL): 3402 kind = "ALL" 3403 expression = None 3404 else: 3405 self._match_text_seq("AS", "OF") 3406 kind = "AS OF" 3407 expression = self._parse_type() 3408 3409 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3410 3411 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3412 if not self._match(TokenType.UNNEST): 3413 return None 3414 3415 expressions = self._parse_wrapped_csv(self._parse_equality) 3416 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3417 3418 alias = self._parse_table_alias() if with_alias else None 3419 3420 if alias: 3421 if self.dialect.UNNEST_COLUMN_ONLY: 3422 if alias.args.get("columns"): 3423 self.raise_error("Unexpected extra column alias in unnest.") 3424 3425 alias.set("columns", [alias.this]) 3426 alias.set("this", None) 3427 3428 columns = alias.args.get("columns") or [] 3429 if offset and len(expressions) < len(columns): 3430 offset = columns.pop() 3431 3432 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3433 self._match(TokenType.ALIAS) 3434 offset = self._parse_id_var( 3435 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3436 ) or exp.to_identifier("offset") 3437 3438 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3439 3440 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3441 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3442 if not is_derived and not self._match_text_seq("VALUES"): 3443 return None 3444 3445 expressions = self._parse_csv(self._parse_value) 3446 alias = self._parse_table_alias() 3447 3448 if is_derived: 3449 self._match_r_paren() 3450 3451 return self.expression( 3452 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3453 ) 3454 3455 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3456 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3457 as_modifier and self._match_text_seq("USING", "SAMPLE") 3458 ): 3459 return None 3460 3461 bucket_numerator = None 3462 bucket_denominator = None 3463 bucket_field = None 3464 percent = None 3465 size = None 3466 seed = None 3467 3468 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3469 matched_l_paren = self._match(TokenType.L_PAREN) 3470 3471 if self.TABLESAMPLE_CSV: 3472 num = None 3473 expressions = self._parse_csv(self._parse_primary) 3474 else: 3475 expressions = None 3476 num = ( 3477 self._parse_factor() 3478 if self._match(TokenType.NUMBER, advance=False) 3479 else self._parse_primary() or self._parse_placeholder() 3480 ) 3481 3482 if self._match_text_seq("BUCKET"): 3483 bucket_numerator = self._parse_number() 3484 self._match_text_seq("OUT", "OF") 3485 bucket_denominator = bucket_denominator = self._parse_number() 3486 self._match(TokenType.ON) 3487 bucket_field = self._parse_field() 3488 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3489 percent = num 3490 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3491 size = num 3492 else: 3493 percent = num 3494 3495 if matched_l_paren: 3496 self._match_r_paren() 3497 3498 if self._match(TokenType.L_PAREN): 3499 method = self._parse_var(upper=True) 3500 seed = self._match(TokenType.COMMA) and self._parse_number() 3501 self._match_r_paren() 3502 elif self._match_texts(("SEED", "REPEATABLE")): 3503 seed = self._parse_wrapped(self._parse_number) 3504 3505 if not method and self.DEFAULT_SAMPLING_METHOD: 3506 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3507 3508 return self.expression( 3509 exp.TableSample, 3510 expressions=expressions, 3511 method=method, 3512 bucket_numerator=bucket_numerator, 3513 bucket_denominator=bucket_denominator, 3514 bucket_field=bucket_field, 3515 percent=percent, 3516 size=size, 3517 seed=seed, 3518 ) 3519 3520 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3521 return list(iter(self._parse_pivot, None)) or None 3522 3523 def _parse_joins(self) -> t.Iterator[exp.Join]: 3524 return iter(self._parse_join, None) 3525 3526 # https://duckdb.org/docs/sql/statements/pivot 3527 def _parse_simplified_pivot(self) -> exp.Pivot: 3528 def _parse_on() -> t.Optional[exp.Expression]: 3529 this = self._parse_bitwise() 3530 return self._parse_in(this) if self._match(TokenType.IN) else this 3531 3532 this = self._parse_table() 3533 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3534 using = self._match(TokenType.USING) and self._parse_csv( 3535 lambda: self._parse_alias(self._parse_function()) 3536 ) 3537 group = self._parse_group() 3538 return self.expression( 3539 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3540 ) 3541 3542 def _parse_pivot_in(self) -> exp.In: 3543 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3544 this = self._parse_conjunction() 3545 3546 self._match(TokenType.ALIAS) 3547 alias = self._parse_field() 3548 if alias: 3549 return self.expression(exp.PivotAlias, this=this, alias=alias) 3550 3551 return this 3552 3553 value = self._parse_column() 3554 3555 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3556 self.raise_error("Expecting IN (") 3557 3558 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3559 3560 self._match_r_paren() 3561 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3562 3563 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3564 index = self._index 3565 include_nulls = None 3566 3567 if self._match(TokenType.PIVOT): 3568 unpivot = False 3569 elif self._match(TokenType.UNPIVOT): 3570 unpivot = True 3571 3572 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3573 if self._match_text_seq("INCLUDE", "NULLS"): 3574 include_nulls = True 3575 elif self._match_text_seq("EXCLUDE", "NULLS"): 3576 include_nulls = False 3577 else: 3578 return None 3579 3580 expressions = [] 3581 3582 if not self._match(TokenType.L_PAREN): 3583 self._retreat(index) 3584 return None 3585 3586 if unpivot: 3587 expressions = self._parse_csv(self._parse_column) 3588 else: 3589 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3590 3591 if not expressions: 3592 self.raise_error("Failed to parse PIVOT's aggregation list") 3593 3594 if not self._match(TokenType.FOR): 3595 self.raise_error("Expecting FOR") 3596 3597 field = self._parse_pivot_in() 3598 3599 self._match_r_paren() 3600 3601 pivot = self.expression( 3602 exp.Pivot, 3603 expressions=expressions, 3604 field=field, 3605 unpivot=unpivot, 3606 include_nulls=include_nulls, 3607 ) 3608 3609 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3610 pivot.set("alias", self._parse_table_alias()) 3611 3612 if not unpivot: 3613 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3614 3615 columns: t.List[exp.Expression] = [] 3616 for fld in pivot.args["field"].expressions: 3617 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3618 for name in names: 3619 if self.PREFIXED_PIVOT_COLUMNS: 3620 name = f"{name}_{field_name}" if name else field_name 3621 else: 3622 name = f"{field_name}_{name}" if name else field_name 3623 3624 columns.append(exp.to_identifier(name)) 3625 3626 pivot.set("columns", columns) 3627 3628 return pivot 3629 3630 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3631 return [agg.alias for agg in aggregations] 3632 3633 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3634 if not skip_where_token and not self._match(TokenType.PREWHERE): 3635 return None 3636 3637 return self.expression( 3638 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3639 ) 3640 3641 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3642 if not skip_where_token and not self._match(TokenType.WHERE): 3643 return None 3644 3645 return self.expression( 3646 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3647 ) 3648 3649 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3650 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3651 return None 3652 3653 elements: t.Dict[str, t.Any] = defaultdict(list) 3654 3655 if self._match(TokenType.ALL): 3656 elements["all"] = True 3657 elif self._match(TokenType.DISTINCT): 3658 elements["all"] = False 3659 3660 while True: 3661 expressions = self._parse_csv( 3662 lambda: None 3663 if self._match(TokenType.ROLLUP, advance=False) 3664 else self._parse_conjunction() 3665 ) 3666 if expressions: 3667 elements["expressions"].extend(expressions) 3668 3669 grouping_sets = self._parse_grouping_sets() 3670 if grouping_sets: 3671 elements["grouping_sets"].extend(grouping_sets) 3672 3673 rollup = None 3674 cube = None 3675 totals = None 3676 3677 index = self._index 3678 with_ = self._match(TokenType.WITH) 3679 if self._match(TokenType.ROLLUP): 3680 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3681 elements["rollup"].extend(ensure_list(rollup)) 3682 3683 if self._match(TokenType.CUBE): 3684 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3685 elements["cube"].extend(ensure_list(cube)) 3686 3687 if self._match_text_seq("TOTALS"): 3688 totals = True 3689 elements["totals"] = True # type: ignore 3690 3691 if not (grouping_sets or rollup or cube or totals): 3692 if with_: 3693 self._retreat(index) 3694 break 3695 3696 return self.expression(exp.Group, **elements) # type: ignore 3697 3698 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3699 if not self._match(TokenType.GROUPING_SETS): 3700 return None 3701 3702 return self._parse_wrapped_csv(self._parse_grouping_set) 3703 3704 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3705 if self._match(TokenType.L_PAREN): 3706 grouping_set = self._parse_csv(self._parse_column) 3707 self._match_r_paren() 3708 return self.expression(exp.Tuple, expressions=grouping_set) 3709 3710 return self._parse_column() 3711 3712 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3713 if not skip_having_token and not self._match(TokenType.HAVING): 3714 return None 3715 return self.expression(exp.Having, this=self._parse_conjunction()) 3716 3717 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3718 if not self._match(TokenType.QUALIFY): 3719 return None 3720 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3721 3722 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3723 if skip_start_token: 3724 start = None 3725 elif self._match(TokenType.START_WITH): 3726 start = self._parse_conjunction() 3727 else: 3728 return None 3729 3730 self._match(TokenType.CONNECT_BY) 3731 nocycle = self._match_text_seq("NOCYCLE") 3732 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3733 exp.Prior, this=self._parse_bitwise() 3734 ) 3735 connect = self._parse_conjunction() 3736 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3737 3738 if not start and self._match(TokenType.START_WITH): 3739 start = self._parse_conjunction() 3740 3741 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3742 3743 def _parse_name_as_expression(self) -> exp.Alias: 3744 return self.expression( 3745 exp.Alias, 3746 alias=self._parse_id_var(any_token=True), 3747 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3748 ) 3749 3750 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3751 if self._match_text_seq("INTERPOLATE"): 3752 return self._parse_wrapped_csv(self._parse_name_as_expression) 3753 return None 3754 3755 def _parse_order( 3756 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3757 ) -> t.Optional[exp.Expression]: 3758 siblings = None 3759 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3760 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3761 return this 3762 3763 siblings = True 3764 3765 return self.expression( 3766 exp.Order, 3767 this=this, 3768 expressions=self._parse_csv(self._parse_ordered), 3769 interpolate=self._parse_interpolate(), 3770 siblings=siblings, 3771 ) 3772 3773 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3774 if not self._match(token): 3775 return None 3776 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3777 3778 def _parse_ordered( 3779 self, parse_method: t.Optional[t.Callable] = None 3780 ) -> t.Optional[exp.Ordered]: 3781 this = parse_method() if parse_method else self._parse_conjunction() 3782 if not this: 3783 return None 3784 3785 asc = self._match(TokenType.ASC) 3786 desc = self._match(TokenType.DESC) or (asc and False) 3787 3788 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3789 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3790 3791 nulls_first = is_nulls_first or False 3792 explicitly_null_ordered = is_nulls_first or is_nulls_last 3793 3794 if ( 3795 not explicitly_null_ordered 3796 and ( 3797 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3798 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3799 ) 3800 and self.dialect.NULL_ORDERING != "nulls_are_last" 3801 ): 3802 nulls_first = True 3803 3804 if self._match_text_seq("WITH", "FILL"): 3805 with_fill = self.expression( 3806 exp.WithFill, 3807 **{ # type: ignore 3808 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3809 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3810 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3811 }, 3812 ) 3813 else: 3814 with_fill = None 3815 3816 return self.expression( 3817 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3818 ) 3819 3820 def _parse_limit( 3821 self, 3822 this: t.Optional[exp.Expression] = None, 3823 top: bool = False, 3824 skip_limit_token: bool = False, 3825 ) -> t.Optional[exp.Expression]: 3826 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3827 comments = self._prev_comments 3828 if top: 3829 limit_paren = self._match(TokenType.L_PAREN) 3830 expression = self._parse_term() if limit_paren else self._parse_number() 3831 3832 if limit_paren: 3833 self._match_r_paren() 3834 else: 3835 expression = self._parse_term() 3836 3837 if self._match(TokenType.COMMA): 3838 offset = expression 3839 expression = self._parse_term() 3840 else: 3841 offset = None 3842 3843 limit_exp = self.expression( 3844 exp.Limit, 3845 this=this, 3846 expression=expression, 3847 offset=offset, 3848 comments=comments, 3849 expressions=self._parse_limit_by(), 3850 ) 3851 3852 return limit_exp 3853 3854 if self._match(TokenType.FETCH): 3855 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3856 direction = self._prev.text.upper() if direction else "FIRST" 3857 3858 count = self._parse_field(tokens=self.FETCH_TOKENS) 3859 percent = self._match(TokenType.PERCENT) 3860 3861 self._match_set((TokenType.ROW, TokenType.ROWS)) 3862 3863 only = self._match_text_seq("ONLY") 3864 with_ties = self._match_text_seq("WITH", "TIES") 3865 3866 if only and with_ties: 3867 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3868 3869 return self.expression( 3870 exp.Fetch, 3871 direction=direction, 3872 count=count, 3873 percent=percent, 3874 with_ties=with_ties, 3875 ) 3876 3877 return this 3878 3879 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3880 if not self._match(TokenType.OFFSET): 3881 return this 3882 3883 count = self._parse_term() 3884 self._match_set((TokenType.ROW, TokenType.ROWS)) 3885 3886 return self.expression( 3887 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3888 ) 3889 3890 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3891 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3892 3893 def _parse_locks(self) -> t.List[exp.Lock]: 3894 locks = [] 3895 while True: 3896 if self._match_text_seq("FOR", "UPDATE"): 3897 update = True 3898 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3899 "LOCK", "IN", "SHARE", "MODE" 3900 ): 3901 update = False 3902 else: 3903 break 3904 3905 expressions = None 3906 if self._match_text_seq("OF"): 3907 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3908 3909 wait: t.Optional[bool | exp.Expression] = None 3910 if self._match_text_seq("NOWAIT"): 3911 wait = True 3912 elif self._match_text_seq("WAIT"): 3913 wait = self._parse_primary() 3914 elif self._match_text_seq("SKIP", "LOCKED"): 3915 wait = False 3916 3917 locks.append( 3918 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3919 ) 3920 3921 return locks 3922 3923 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3924 while this and self._match_set(self.SET_OPERATIONS): 3925 token_type = self._prev.token_type 3926 3927 if token_type == TokenType.UNION: 3928 operation = exp.Union 3929 elif token_type == TokenType.EXCEPT: 3930 operation = exp.Except 3931 else: 3932 operation = exp.Intersect 3933 3934 comments = self._prev.comments 3935 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3936 by_name = self._match_text_seq("BY", "NAME") 3937 expression = self._parse_select(nested=True, parse_set_operation=False) 3938 3939 this = self.expression( 3940 operation, 3941 comments=comments, 3942 this=this, 3943 distinct=distinct, 3944 by_name=by_name, 3945 expression=expression, 3946 ) 3947 3948 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3949 expression = this.expression 3950 3951 if expression: 3952 for arg in self.UNION_MODIFIERS: 3953 expr = expression.args.get(arg) 3954 if expr: 3955 this.set(arg, expr.pop()) 3956 3957 return this 3958 3959 def _parse_expression(self) -> t.Optional[exp.Expression]: 3960 return self._parse_alias(self._parse_conjunction()) 3961 3962 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3963 this = self._parse_equality() 3964 3965 if self._match(TokenType.COLON_EQ): 3966 this = self.expression( 3967 exp.PropertyEQ, 3968 this=this, 3969 comments=self._prev_comments, 3970 expression=self._parse_conjunction(), 3971 ) 3972 3973 while self._match_set(self.CONJUNCTION): 3974 this = self.expression( 3975 self.CONJUNCTION[self._prev.token_type], 3976 this=this, 3977 comments=self._prev_comments, 3978 expression=self._parse_equality(), 3979 ) 3980 return this 3981 3982 def _parse_equality(self) -> t.Optional[exp.Expression]: 3983 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3984 3985 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3986 return self._parse_tokens(self._parse_range, self.COMPARISON) 3987 3988 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3989 this = this or self._parse_bitwise() 3990 negate = self._match(TokenType.NOT) 3991 3992 if self._match_set(self.RANGE_PARSERS): 3993 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3994 if not expression: 3995 return this 3996 3997 this = expression 3998 elif self._match(TokenType.ISNULL): 3999 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4000 4001 # Postgres supports ISNULL and NOTNULL for conditions. 4002 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4003 if self._match(TokenType.NOTNULL): 4004 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4005 this = self.expression(exp.Not, this=this) 4006 4007 if negate: 4008 this = self.expression(exp.Not, this=this) 4009 4010 if self._match(TokenType.IS): 4011 this = self._parse_is(this) 4012 4013 return this 4014 4015 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4016 index = self._index - 1 4017 negate = self._match(TokenType.NOT) 4018 4019 if self._match_text_seq("DISTINCT", "FROM"): 4020 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4021 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4022 4023 expression = self._parse_null() or self._parse_boolean() 4024 if not expression: 4025 self._retreat(index) 4026 return None 4027 4028 this = self.expression(exp.Is, this=this, expression=expression) 4029 return self.expression(exp.Not, this=this) if negate else this 4030 4031 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4032 unnest = self._parse_unnest(with_alias=False) 4033 if unnest: 4034 this = self.expression(exp.In, this=this, unnest=unnest) 4035 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4036 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4037 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4038 4039 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4040 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4041 else: 4042 this = self.expression(exp.In, this=this, expressions=expressions) 4043 4044 if matched_l_paren: 4045 self._match_r_paren(this) 4046 elif not self._match(TokenType.R_BRACKET, expression=this): 4047 self.raise_error("Expecting ]") 4048 else: 4049 this = self.expression(exp.In, this=this, field=self._parse_field()) 4050 4051 return this 4052 4053 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4054 low = self._parse_bitwise() 4055 self._match(TokenType.AND) 4056 high = self._parse_bitwise() 4057 return self.expression(exp.Between, this=this, low=low, high=high) 4058 4059 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4060 if not self._match(TokenType.ESCAPE): 4061 return this 4062 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4063 4064 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4065 index = self._index 4066 4067 if not self._match(TokenType.INTERVAL) and match_interval: 4068 return None 4069 4070 if self._match(TokenType.STRING, advance=False): 4071 this = self._parse_primary() 4072 else: 4073 this = self._parse_term() 4074 4075 if not this or ( 4076 isinstance(this, exp.Column) 4077 and not this.table 4078 and not this.this.quoted 4079 and this.name.upper() == "IS" 4080 ): 4081 self._retreat(index) 4082 return None 4083 4084 unit = self._parse_function() or ( 4085 not self._match(TokenType.ALIAS, advance=False) 4086 and self._parse_var(any_token=True, upper=True) 4087 ) 4088 4089 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4090 # each INTERVAL expression into this canonical form so it's easy to transpile 4091 if this and this.is_number: 4092 this = exp.Literal.string(this.name) 4093 elif this and this.is_string: 4094 parts = this.name.split() 4095 4096 if len(parts) == 2: 4097 if unit: 4098 # This is not actually a unit, it's something else (e.g. a "window side") 4099 unit = None 4100 self._retreat(self._index - 1) 4101 4102 this = exp.Literal.string(parts[0]) 4103 unit = self.expression(exp.Var, this=parts[1].upper()) 4104 4105 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4106 unit = self.expression( 4107 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4108 ) 4109 4110 interval = self.expression(exp.Interval, this=this, unit=unit) 4111 4112 index = self._index 4113 self._match(TokenType.PLUS) 4114 4115 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4116 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4117 return self.expression( 4118 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4119 ) 4120 4121 self._retreat(index) 4122 return interval 4123 4124 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4125 this = self._parse_term() 4126 4127 while True: 4128 if self._match_set(self.BITWISE): 4129 this = self.expression( 4130 self.BITWISE[self._prev.token_type], 4131 this=this, 4132 expression=self._parse_term(), 4133 ) 4134 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4135 this = self.expression( 4136 exp.DPipe, 4137 this=this, 4138 expression=self._parse_term(), 4139 safe=not self.dialect.STRICT_STRING_CONCAT, 4140 ) 4141 elif self._match(TokenType.DQMARK): 4142 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4143 elif self._match_pair(TokenType.LT, TokenType.LT): 4144 this = self.expression( 4145 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4146 ) 4147 elif self._match_pair(TokenType.GT, TokenType.GT): 4148 this = self.expression( 4149 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4150 ) 4151 else: 4152 break 4153 4154 return this 4155 4156 def _parse_term(self) -> t.Optional[exp.Expression]: 4157 return self._parse_tokens(self._parse_factor, self.TERM) 4158 4159 def _parse_factor(self) -> t.Optional[exp.Expression]: 4160 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4161 this = parse_method() 4162 4163 while self._match_set(self.FACTOR): 4164 this = self.expression( 4165 self.FACTOR[self._prev.token_type], 4166 this=this, 4167 comments=self._prev_comments, 4168 expression=parse_method(), 4169 ) 4170 if isinstance(this, exp.Div): 4171 this.args["typed"] = self.dialect.TYPED_DIVISION 4172 this.args["safe"] = self.dialect.SAFE_DIVISION 4173 4174 return this 4175 4176 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4177 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4178 4179 def _parse_unary(self) -> t.Optional[exp.Expression]: 4180 if self._match_set(self.UNARY_PARSERS): 4181 return self.UNARY_PARSERS[self._prev.token_type](self) 4182 return self._parse_at_time_zone(self._parse_type()) 4183 4184 def _parse_type( 4185 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4186 ) -> t.Optional[exp.Expression]: 4187 interval = parse_interval and self._parse_interval() 4188 if interval: 4189 return interval 4190 4191 index = self._index 4192 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4193 4194 if data_type: 4195 index2 = self._index 4196 this = self._parse_primary() 4197 4198 if isinstance(this, exp.Literal): 4199 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4200 if parser: 4201 return parser(self, this, data_type) 4202 4203 return self.expression(exp.Cast, this=this, to=data_type) 4204 4205 if data_type.expressions: 4206 self._retreat(index2) 4207 return self._parse_column_ops(data_type) 4208 4209 self._retreat(index) 4210 4211 if fallback_to_identifier: 4212 return self._parse_id_var() 4213 4214 this = self._parse_column() 4215 return this and self._parse_column_ops(this) 4216 4217 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4218 this = self._parse_type() 4219 if not this: 4220 return None 4221 4222 if isinstance(this, exp.Column) and not this.table: 4223 this = exp.var(this.name.upper()) 4224 4225 return self.expression( 4226 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4227 ) 4228 4229 def _parse_types( 4230 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4231 ) -> t.Optional[exp.Expression]: 4232 index = self._index 4233 4234 this: t.Optional[exp.Expression] = None 4235 prefix = self._match_text_seq("SYSUDTLIB", ".") 4236 4237 if not self._match_set(self.TYPE_TOKENS): 4238 identifier = allow_identifiers and self._parse_id_var( 4239 any_token=False, tokens=(TokenType.VAR,) 4240 ) 4241 if identifier: 4242 tokens = self.dialect.tokenize(identifier.name) 4243 4244 if len(tokens) != 1: 4245 self.raise_error("Unexpected identifier", self._prev) 4246 4247 if tokens[0].token_type in self.TYPE_TOKENS: 4248 self._prev = tokens[0] 4249 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4250 type_name = identifier.name 4251 4252 while self._match(TokenType.DOT): 4253 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4254 4255 this = exp.DataType.build(type_name, udt=True) 4256 else: 4257 self._retreat(self._index - 1) 4258 return None 4259 else: 4260 return None 4261 4262 type_token = self._prev.token_type 4263 4264 if type_token == TokenType.PSEUDO_TYPE: 4265 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4266 4267 if type_token == TokenType.OBJECT_IDENTIFIER: 4268 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4269 4270 nested = type_token in self.NESTED_TYPE_TOKENS 4271 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4272 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4273 expressions = None 4274 maybe_func = False 4275 4276 if self._match(TokenType.L_PAREN): 4277 if is_struct: 4278 expressions = self._parse_csv(self._parse_struct_types) 4279 elif nested: 4280 expressions = self._parse_csv( 4281 lambda: self._parse_types( 4282 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4283 ) 4284 ) 4285 elif type_token in self.ENUM_TYPE_TOKENS: 4286 expressions = self._parse_csv(self._parse_equality) 4287 elif is_aggregate: 4288 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4289 any_token=False, tokens=(TokenType.VAR,) 4290 ) 4291 if not func_or_ident or not self._match(TokenType.COMMA): 4292 return None 4293 expressions = self._parse_csv( 4294 lambda: self._parse_types( 4295 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4296 ) 4297 ) 4298 expressions.insert(0, func_or_ident) 4299 else: 4300 expressions = self._parse_csv(self._parse_type_size) 4301 4302 if not expressions or not self._match(TokenType.R_PAREN): 4303 self._retreat(index) 4304 return None 4305 4306 maybe_func = True 4307 4308 values: t.Optional[t.List[exp.Expression]] = None 4309 4310 if nested and self._match(TokenType.LT): 4311 if is_struct: 4312 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4313 else: 4314 expressions = self._parse_csv( 4315 lambda: self._parse_types( 4316 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4317 ) 4318 ) 4319 4320 if not self._match(TokenType.GT): 4321 self.raise_error("Expecting >") 4322 4323 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4324 values = self._parse_csv(self._parse_conjunction) 4325 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4326 4327 if type_token in self.TIMESTAMPS: 4328 if self._match_text_seq("WITH", "TIME", "ZONE"): 4329 maybe_func = False 4330 tz_type = ( 4331 exp.DataType.Type.TIMETZ 4332 if type_token in self.TIMES 4333 else exp.DataType.Type.TIMESTAMPTZ 4334 ) 4335 this = exp.DataType(this=tz_type, expressions=expressions) 4336 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4337 maybe_func = False 4338 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4339 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4340 maybe_func = False 4341 elif type_token == TokenType.INTERVAL: 4342 unit = self._parse_var(upper=True) 4343 if unit: 4344 if self._match_text_seq("TO"): 4345 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4346 4347 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4348 else: 4349 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4350 4351 if maybe_func and check_func: 4352 index2 = self._index 4353 peek = self._parse_string() 4354 4355 if not peek: 4356 self._retreat(index) 4357 return None 4358 4359 self._retreat(index2) 4360 4361 if not this: 4362 if self._match_text_seq("UNSIGNED"): 4363 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4364 if not unsigned_type_token: 4365 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4366 4367 type_token = unsigned_type_token or type_token 4368 4369 this = exp.DataType( 4370 this=exp.DataType.Type[type_token.value], 4371 expressions=expressions, 4372 nested=nested, 4373 values=values, 4374 prefix=prefix, 4375 ) 4376 elif expressions: 4377 this.set("expressions", expressions) 4378 4379 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4380 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4381 4382 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4383 converter = self.TYPE_CONVERTER.get(this.this) 4384 if converter: 4385 this = converter(t.cast(exp.DataType, this)) 4386 4387 return this 4388 4389 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4390 index = self._index 4391 this = ( 4392 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4393 or self._parse_id_var() 4394 ) 4395 self._match(TokenType.COLON) 4396 column_def = self._parse_column_def(this) 4397 4398 if type_required and ( 4399 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4400 ): 4401 self._retreat(index) 4402 return self._parse_types() 4403 4404 return column_def 4405 4406 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4407 if not self._match_text_seq("AT", "TIME", "ZONE"): 4408 return this 4409 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4410 4411 def _parse_column(self) -> t.Optional[exp.Expression]: 4412 this = self._parse_column_reference() 4413 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4414 4415 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4416 this = self._parse_field() 4417 if ( 4418 not this 4419 and self._match(TokenType.VALUES, advance=False) 4420 and self.VALUES_FOLLOWED_BY_PAREN 4421 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4422 ): 4423 this = self._parse_id_var() 4424 4425 if isinstance(this, exp.Identifier): 4426 # We bubble up comments from the Identifier to the Column 4427 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4428 4429 return this 4430 4431 def _parse_colon_as_json_extract( 4432 self, this: t.Optional[exp.Expression] 4433 ) -> t.Optional[exp.Expression]: 4434 casts = [] 4435 json_path = [] 4436 4437 while self._match(TokenType.COLON): 4438 start_index = self._index 4439 path = self._parse_column_ops(self._parse_field(any_token=True)) 4440 4441 # The cast :: operator has a lower precedence than the extraction operator :, so 4442 # we rearrange the AST appropriately to avoid casting the JSON path 4443 while isinstance(path, exp.Cast): 4444 casts.append(path.to) 4445 path = path.this 4446 4447 if casts: 4448 dcolon_offset = next( 4449 i 4450 for i, t in enumerate(self._tokens[start_index:]) 4451 if t.token_type == TokenType.DCOLON 4452 ) 4453 end_token = self._tokens[start_index + dcolon_offset - 1] 4454 else: 4455 end_token = self._prev 4456 4457 if path: 4458 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4459 4460 if json_path: 4461 this = self.expression( 4462 exp.JSONExtract, 4463 this=this, 4464 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4465 ) 4466 4467 while casts: 4468 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4469 4470 return this 4471 4472 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4473 this = self._parse_bracket(this) 4474 4475 while self._match_set(self.COLUMN_OPERATORS): 4476 op_token = self._prev.token_type 4477 op = self.COLUMN_OPERATORS.get(op_token) 4478 4479 if op_token == TokenType.DCOLON: 4480 field = self._parse_types() 4481 if not field: 4482 self.raise_error("Expected type") 4483 elif op and self._curr: 4484 field = self._parse_column_reference() 4485 else: 4486 field = self._parse_field(any_token=True, anonymous_func=True) 4487 4488 if isinstance(field, exp.Func) and this: 4489 # bigquery allows function calls like x.y.count(...) 4490 # SAFE.SUBSTR(...) 4491 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4492 this = exp.replace_tree( 4493 this, 4494 lambda n: ( 4495 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4496 if n.table 4497 else n.this 4498 ) 4499 if isinstance(n, exp.Column) 4500 else n, 4501 ) 4502 4503 if op: 4504 this = op(self, this, field) 4505 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4506 this = self.expression( 4507 exp.Column, 4508 this=field, 4509 table=this.this, 4510 db=this.args.get("table"), 4511 catalog=this.args.get("db"), 4512 ) 4513 else: 4514 this = self.expression(exp.Dot, this=this, expression=field) 4515 4516 this = self._parse_bracket(this) 4517 4518 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4519 4520 def _parse_primary(self) -> t.Optional[exp.Expression]: 4521 if self._match_set(self.PRIMARY_PARSERS): 4522 token_type = self._prev.token_type 4523 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4524 4525 if token_type == TokenType.STRING: 4526 expressions = [primary] 4527 while self._match(TokenType.STRING): 4528 expressions.append(exp.Literal.string(self._prev.text)) 4529 4530 if len(expressions) > 1: 4531 return self.expression(exp.Concat, expressions=expressions) 4532 4533 return primary 4534 4535 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4536 return exp.Literal.number(f"0.{self._prev.text}") 4537 4538 if self._match(TokenType.L_PAREN): 4539 comments = self._prev_comments 4540 query = self._parse_select() 4541 4542 if query: 4543 expressions = [query] 4544 else: 4545 expressions = self._parse_expressions() 4546 4547 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4548 4549 if not this and self._match(TokenType.R_PAREN, advance=False): 4550 this = self.expression(exp.Tuple) 4551 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4552 this = self._parse_subquery(this=this, parse_alias=False) 4553 elif isinstance(this, exp.Subquery): 4554 this = self._parse_subquery( 4555 this=self._parse_set_operations(this), parse_alias=False 4556 ) 4557 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4558 this = self.expression(exp.Tuple, expressions=expressions) 4559 else: 4560 this = self.expression(exp.Paren, this=this) 4561 4562 if this: 4563 this.add_comments(comments) 4564 4565 self._match_r_paren(expression=this) 4566 return this 4567 4568 return None 4569 4570 def _parse_field( 4571 self, 4572 any_token: bool = False, 4573 tokens: t.Optional[t.Collection[TokenType]] = None, 4574 anonymous_func: bool = False, 4575 ) -> t.Optional[exp.Expression]: 4576 if anonymous_func: 4577 field = ( 4578 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4579 or self._parse_primary() 4580 ) 4581 else: 4582 field = self._parse_primary() or self._parse_function( 4583 anonymous=anonymous_func, any_token=any_token 4584 ) 4585 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4586 4587 def _parse_function( 4588 self, 4589 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4590 anonymous: bool = False, 4591 optional_parens: bool = True, 4592 any_token: bool = False, 4593 ) -> t.Optional[exp.Expression]: 4594 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4595 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4596 fn_syntax = False 4597 if ( 4598 self._match(TokenType.L_BRACE, advance=False) 4599 and self._next 4600 and self._next.text.upper() == "FN" 4601 ): 4602 self._advance(2) 4603 fn_syntax = True 4604 4605 func = self._parse_function_call( 4606 functions=functions, 4607 anonymous=anonymous, 4608 optional_parens=optional_parens, 4609 any_token=any_token, 4610 ) 4611 4612 if fn_syntax: 4613 self._match(TokenType.R_BRACE) 4614 4615 return func 4616 4617 def _parse_function_call( 4618 self, 4619 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4620 anonymous: bool = False, 4621 optional_parens: bool = True, 4622 any_token: bool = False, 4623 ) -> t.Optional[exp.Expression]: 4624 if not self._curr: 4625 return None 4626 4627 comments = self._curr.comments 4628 token_type = self._curr.token_type 4629 this = self._curr.text 4630 upper = this.upper() 4631 4632 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4633 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4634 self._advance() 4635 return self._parse_window(parser(self)) 4636 4637 if not self._next or self._next.token_type != TokenType.L_PAREN: 4638 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4639 self._advance() 4640 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4641 4642 return None 4643 4644 if any_token: 4645 if token_type in self.RESERVED_TOKENS: 4646 return None 4647 elif token_type not in self.FUNC_TOKENS: 4648 return None 4649 4650 self._advance(2) 4651 4652 parser = self.FUNCTION_PARSERS.get(upper) 4653 if parser and not anonymous: 4654 this = parser(self) 4655 else: 4656 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4657 4658 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4659 this = self.expression(subquery_predicate, this=self._parse_select()) 4660 self._match_r_paren() 4661 return this 4662 4663 if functions is None: 4664 functions = self.FUNCTIONS 4665 4666 function = functions.get(upper) 4667 4668 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4669 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4670 4671 if alias: 4672 args = self._kv_to_prop_eq(args) 4673 4674 if function and not anonymous: 4675 if "dialect" in function.__code__.co_varnames: 4676 func = function(args, dialect=self.dialect) 4677 else: 4678 func = function(args) 4679 4680 func = self.validate_expression(func, args) 4681 if not self.dialect.NORMALIZE_FUNCTIONS: 4682 func.meta["name"] = this 4683 4684 this = func 4685 else: 4686 if token_type == TokenType.IDENTIFIER: 4687 this = exp.Identifier(this=this, quoted=True) 4688 this = self.expression(exp.Anonymous, this=this, expressions=args) 4689 4690 if isinstance(this, exp.Expression): 4691 this.add_comments(comments) 4692 4693 self._match_r_paren(this) 4694 return self._parse_window(this) 4695 4696 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4697 transformed = [] 4698 4699 for e in expressions: 4700 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4701 if isinstance(e, exp.Alias): 4702 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4703 4704 if not isinstance(e, exp.PropertyEQ): 4705 e = self.expression( 4706 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4707 ) 4708 4709 if isinstance(e.this, exp.Column): 4710 e.this.replace(e.this.this) 4711 4712 transformed.append(e) 4713 4714 return transformed 4715 4716 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4717 return self._parse_column_def(self._parse_id_var()) 4718 4719 def _parse_user_defined_function( 4720 self, kind: t.Optional[TokenType] = None 4721 ) -> t.Optional[exp.Expression]: 4722 this = self._parse_id_var() 4723 4724 while self._match(TokenType.DOT): 4725 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4726 4727 if not self._match(TokenType.L_PAREN): 4728 return this 4729 4730 expressions = self._parse_csv(self._parse_function_parameter) 4731 self._match_r_paren() 4732 return self.expression( 4733 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4734 ) 4735 4736 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4737 literal = self._parse_primary() 4738 if literal: 4739 return self.expression(exp.Introducer, this=token.text, expression=literal) 4740 4741 return self.expression(exp.Identifier, this=token.text) 4742 4743 def _parse_session_parameter(self) -> exp.SessionParameter: 4744 kind = None 4745 this = self._parse_id_var() or self._parse_primary() 4746 4747 if this and self._match(TokenType.DOT): 4748 kind = this.name 4749 this = self._parse_var() or self._parse_primary() 4750 4751 return self.expression(exp.SessionParameter, this=this, kind=kind) 4752 4753 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4754 return self._parse_id_var() 4755 4756 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4757 index = self._index 4758 4759 if self._match(TokenType.L_PAREN): 4760 expressions = t.cast( 4761 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4762 ) 4763 4764 if not self._match(TokenType.R_PAREN): 4765 self._retreat(index) 4766 else: 4767 expressions = [self._parse_lambda_arg()] 4768 4769 if self._match_set(self.LAMBDAS): 4770 return self.LAMBDAS[self._prev.token_type](self, expressions) 4771 4772 self._retreat(index) 4773 4774 this: t.Optional[exp.Expression] 4775 4776 if self._match(TokenType.DISTINCT): 4777 this = self.expression( 4778 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4779 ) 4780 else: 4781 this = self._parse_select_or_expression(alias=alias) 4782 4783 return self._parse_limit( 4784 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4785 ) 4786 4787 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4788 index = self._index 4789 if not self._match(TokenType.L_PAREN): 4790 return this 4791 4792 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4793 # expr can be of both types 4794 if self._match_set(self.SELECT_START_TOKENS): 4795 self._retreat(index) 4796 return this 4797 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4798 self._match_r_paren() 4799 return self.expression(exp.Schema, this=this, expressions=args) 4800 4801 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4802 return self._parse_column_def(self._parse_field(any_token=True)) 4803 4804 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4805 # column defs are not really columns, they're identifiers 4806 if isinstance(this, exp.Column): 4807 this = this.this 4808 4809 kind = self._parse_types(schema=True) 4810 4811 if self._match_text_seq("FOR", "ORDINALITY"): 4812 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4813 4814 constraints: t.List[exp.Expression] = [] 4815 4816 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4817 ("ALIAS", "MATERIALIZED") 4818 ): 4819 persisted = self._prev.text.upper() == "MATERIALIZED" 4820 constraints.append( 4821 self.expression( 4822 exp.ComputedColumnConstraint, 4823 this=self._parse_conjunction(), 4824 persisted=persisted or self._match_text_seq("PERSISTED"), 4825 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4826 ) 4827 ) 4828 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4829 self._match(TokenType.ALIAS) 4830 constraints.append( 4831 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4832 ) 4833 4834 while True: 4835 constraint = self._parse_column_constraint() 4836 if not constraint: 4837 break 4838 constraints.append(constraint) 4839 4840 if not kind and not constraints: 4841 return this 4842 4843 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4844 4845 def _parse_auto_increment( 4846 self, 4847 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4848 start = None 4849 increment = None 4850 4851 if self._match(TokenType.L_PAREN, advance=False): 4852 args = self._parse_wrapped_csv(self._parse_bitwise) 4853 start = seq_get(args, 0) 4854 increment = seq_get(args, 1) 4855 elif self._match_text_seq("START"): 4856 start = self._parse_bitwise() 4857 self._match_text_seq("INCREMENT") 4858 increment = self._parse_bitwise() 4859 4860 if start and increment: 4861 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4862 4863 return exp.AutoIncrementColumnConstraint() 4864 4865 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4866 if not self._match_text_seq("REFRESH"): 4867 self._retreat(self._index - 1) 4868 return None 4869 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4870 4871 def _parse_compress(self) -> exp.CompressColumnConstraint: 4872 if self._match(TokenType.L_PAREN, advance=False): 4873 return self.expression( 4874 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4875 ) 4876 4877 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4878 4879 def _parse_generated_as_identity( 4880 self, 4881 ) -> ( 4882 exp.GeneratedAsIdentityColumnConstraint 4883 | exp.ComputedColumnConstraint 4884 | exp.GeneratedAsRowColumnConstraint 4885 ): 4886 if self._match_text_seq("BY", "DEFAULT"): 4887 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4888 this = self.expression( 4889 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4890 ) 4891 else: 4892 self._match_text_seq("ALWAYS") 4893 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4894 4895 self._match(TokenType.ALIAS) 4896 4897 if self._match_text_seq("ROW"): 4898 start = self._match_text_seq("START") 4899 if not start: 4900 self._match(TokenType.END) 4901 hidden = self._match_text_seq("HIDDEN") 4902 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4903 4904 identity = self._match_text_seq("IDENTITY") 4905 4906 if self._match(TokenType.L_PAREN): 4907 if self._match(TokenType.START_WITH): 4908 this.set("start", self._parse_bitwise()) 4909 if self._match_text_seq("INCREMENT", "BY"): 4910 this.set("increment", self._parse_bitwise()) 4911 if self._match_text_seq("MINVALUE"): 4912 this.set("minvalue", self._parse_bitwise()) 4913 if self._match_text_seq("MAXVALUE"): 4914 this.set("maxvalue", self._parse_bitwise()) 4915 4916 if self._match_text_seq("CYCLE"): 4917 this.set("cycle", True) 4918 elif self._match_text_seq("NO", "CYCLE"): 4919 this.set("cycle", False) 4920 4921 if not identity: 4922 this.set("expression", self._parse_range()) 4923 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4924 args = self._parse_csv(self._parse_bitwise) 4925 this.set("start", seq_get(args, 0)) 4926 this.set("increment", seq_get(args, 1)) 4927 4928 self._match_r_paren() 4929 4930 return this 4931 4932 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4933 self._match_text_seq("LENGTH") 4934 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4935 4936 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4937 if self._match_text_seq("NULL"): 4938 return self.expression(exp.NotNullColumnConstraint) 4939 if self._match_text_seq("CASESPECIFIC"): 4940 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4941 if self._match_text_seq("FOR", "REPLICATION"): 4942 return self.expression(exp.NotForReplicationColumnConstraint) 4943 return None 4944 4945 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4946 if self._match(TokenType.CONSTRAINT): 4947 this = self._parse_id_var() 4948 else: 4949 this = None 4950 4951 if self._match_texts(self.CONSTRAINT_PARSERS): 4952 return self.expression( 4953 exp.ColumnConstraint, 4954 this=this, 4955 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4956 ) 4957 4958 return this 4959 4960 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4961 if not self._match(TokenType.CONSTRAINT): 4962 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4963 4964 return self.expression( 4965 exp.Constraint, 4966 this=self._parse_id_var(), 4967 expressions=self._parse_unnamed_constraints(), 4968 ) 4969 4970 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4971 constraints = [] 4972 while True: 4973 constraint = self._parse_unnamed_constraint() or self._parse_function() 4974 if not constraint: 4975 break 4976 constraints.append(constraint) 4977 4978 return constraints 4979 4980 def _parse_unnamed_constraint( 4981 self, constraints: t.Optional[t.Collection[str]] = None 4982 ) -> t.Optional[exp.Expression]: 4983 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4984 constraints or self.CONSTRAINT_PARSERS 4985 ): 4986 return None 4987 4988 constraint = self._prev.text.upper() 4989 if constraint not in self.CONSTRAINT_PARSERS: 4990 self.raise_error(f"No parser found for schema constraint {constraint}.") 4991 4992 return self.CONSTRAINT_PARSERS[constraint](self) 4993 4994 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4995 self._match_text_seq("KEY") 4996 return self.expression( 4997 exp.UniqueColumnConstraint, 4998 this=self._parse_schema(self._parse_id_var(any_token=False)), 4999 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5000 on_conflict=self._parse_on_conflict(), 5001 ) 5002 5003 def _parse_key_constraint_options(self) -> t.List[str]: 5004 options = [] 5005 while True: 5006 if not self._curr: 5007 break 5008 5009 if self._match(TokenType.ON): 5010 action = None 5011 on = self._advance_any() and self._prev.text 5012 5013 if self._match_text_seq("NO", "ACTION"): 5014 action = "NO ACTION" 5015 elif self._match_text_seq("CASCADE"): 5016 action = "CASCADE" 5017 elif self._match_text_seq("RESTRICT"): 5018 action = "RESTRICT" 5019 elif self._match_pair(TokenType.SET, TokenType.NULL): 5020 action = "SET NULL" 5021 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5022 action = "SET DEFAULT" 5023 else: 5024 self.raise_error("Invalid key constraint") 5025 5026 options.append(f"ON {on} {action}") 5027 elif self._match_text_seq("NOT", "ENFORCED"): 5028 options.append("NOT ENFORCED") 5029 elif self._match_text_seq("DEFERRABLE"): 5030 options.append("DEFERRABLE") 5031 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5032 options.append("INITIALLY DEFERRED") 5033 elif self._match_text_seq("NORELY"): 5034 options.append("NORELY") 5035 elif self._match_text_seq("MATCH", "FULL"): 5036 options.append("MATCH FULL") 5037 else: 5038 break 5039 5040 return options 5041 5042 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5043 if match and not self._match(TokenType.REFERENCES): 5044 return None 5045 5046 expressions = None 5047 this = self._parse_table(schema=True) 5048 options = self._parse_key_constraint_options() 5049 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5050 5051 def _parse_foreign_key(self) -> exp.ForeignKey: 5052 expressions = self._parse_wrapped_id_vars() 5053 reference = self._parse_references() 5054 options = {} 5055 5056 while self._match(TokenType.ON): 5057 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5058 self.raise_error("Expected DELETE or UPDATE") 5059 5060 kind = self._prev.text.lower() 5061 5062 if self._match_text_seq("NO", "ACTION"): 5063 action = "NO ACTION" 5064 elif self._match(TokenType.SET): 5065 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5066 action = "SET " + self._prev.text.upper() 5067 else: 5068 self._advance() 5069 action = self._prev.text.upper() 5070 5071 options[kind] = action 5072 5073 return self.expression( 5074 exp.ForeignKey, 5075 expressions=expressions, 5076 reference=reference, 5077 **options, # type: ignore 5078 ) 5079 5080 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5081 return self._parse_field() 5082 5083 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5084 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5085 self._retreat(self._index - 1) 5086 return None 5087 5088 id_vars = self._parse_wrapped_id_vars() 5089 return self.expression( 5090 exp.PeriodForSystemTimeConstraint, 5091 this=seq_get(id_vars, 0), 5092 expression=seq_get(id_vars, 1), 5093 ) 5094 5095 def _parse_primary_key( 5096 self, wrapped_optional: bool = False, in_props: bool = False 5097 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5098 desc = ( 5099 self._match_set((TokenType.ASC, TokenType.DESC)) 5100 and self._prev.token_type == TokenType.DESC 5101 ) 5102 5103 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5104 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5105 5106 expressions = self._parse_wrapped_csv( 5107 self._parse_primary_key_part, optional=wrapped_optional 5108 ) 5109 options = self._parse_key_constraint_options() 5110 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5111 5112 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5113 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 5114 5115 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5116 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5117 return this 5118 5119 bracket_kind = self._prev.token_type 5120 expressions = self._parse_csv( 5121 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5122 ) 5123 5124 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5125 self.raise_error("Expected ]") 5126 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5127 self.raise_error("Expected }") 5128 5129 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5130 if bracket_kind == TokenType.L_BRACE: 5131 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5132 elif not this or this.name.upper() == "ARRAY": 5133 this = self.expression(exp.Array, expressions=expressions) 5134 else: 5135 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5136 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5137 5138 self._add_comments(this) 5139 return self._parse_bracket(this) 5140 5141 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5142 if self._match(TokenType.COLON): 5143 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 5144 return this 5145 5146 def _parse_case(self) -> t.Optional[exp.Expression]: 5147 ifs = [] 5148 default = None 5149 5150 comments = self._prev_comments 5151 expression = self._parse_conjunction() 5152 5153 while self._match(TokenType.WHEN): 5154 this = self._parse_conjunction() 5155 self._match(TokenType.THEN) 5156 then = self._parse_conjunction() 5157 ifs.append(self.expression(exp.If, this=this, true=then)) 5158 5159 if self._match(TokenType.ELSE): 5160 default = self._parse_conjunction() 5161 5162 if not self._match(TokenType.END): 5163 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5164 default = exp.column("interval") 5165 else: 5166 self.raise_error("Expected END after CASE", self._prev) 5167 5168 return self.expression( 5169 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5170 ) 5171 5172 def _parse_if(self) -> t.Optional[exp.Expression]: 5173 if self._match(TokenType.L_PAREN): 5174 args = self._parse_csv(self._parse_conjunction) 5175 this = self.validate_expression(exp.If.from_arg_list(args), args) 5176 self._match_r_paren() 5177 else: 5178 index = self._index - 1 5179 5180 if self.NO_PAREN_IF_COMMANDS and index == 0: 5181 return self._parse_as_command(self._prev) 5182 5183 condition = self._parse_conjunction() 5184 5185 if not condition: 5186 self._retreat(index) 5187 return None 5188 5189 self._match(TokenType.THEN) 5190 true = self._parse_conjunction() 5191 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 5192 self._match(TokenType.END) 5193 this = self.expression(exp.If, this=condition, true=true, false=false) 5194 5195 return this 5196 5197 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5198 if not self._match_text_seq("VALUE", "FOR"): 5199 self._retreat(self._index - 1) 5200 return None 5201 5202 return self.expression( 5203 exp.NextValueFor, 5204 this=self._parse_column(), 5205 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5206 ) 5207 5208 def _parse_extract(self) -> exp.Extract: 5209 this = self._parse_function() or self._parse_var() or self._parse_type() 5210 5211 if self._match(TokenType.FROM): 5212 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5213 5214 if not self._match(TokenType.COMMA): 5215 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5216 5217 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5218 5219 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5220 this = self._parse_conjunction() 5221 5222 if not self._match(TokenType.ALIAS): 5223 if self._match(TokenType.COMMA): 5224 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5225 5226 self.raise_error("Expected AS after CAST") 5227 5228 fmt = None 5229 to = self._parse_types() 5230 5231 if self._match(TokenType.FORMAT): 5232 fmt_string = self._parse_string() 5233 fmt = self._parse_at_time_zone(fmt_string) 5234 5235 if not to: 5236 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5237 if to.this in exp.DataType.TEMPORAL_TYPES: 5238 this = self.expression( 5239 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5240 this=this, 5241 format=exp.Literal.string( 5242 format_time( 5243 fmt_string.this if fmt_string else "", 5244 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5245 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5246 ) 5247 ), 5248 ) 5249 5250 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5251 this.set("zone", fmt.args["zone"]) 5252 return this 5253 elif not to: 5254 self.raise_error("Expected TYPE after CAST") 5255 elif isinstance(to, exp.Identifier): 5256 to = exp.DataType.build(to.name, udt=True) 5257 elif to.this == exp.DataType.Type.CHAR: 5258 if self._match(TokenType.CHARACTER_SET): 5259 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5260 5261 return self.expression( 5262 exp.Cast if strict else exp.TryCast, 5263 this=this, 5264 to=to, 5265 format=fmt, 5266 safe=safe, 5267 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5268 ) 5269 5270 def _parse_string_agg(self) -> exp.Expression: 5271 if self._match(TokenType.DISTINCT): 5272 args: t.List[t.Optional[exp.Expression]] = [ 5273 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5274 ] 5275 if self._match(TokenType.COMMA): 5276 args.extend(self._parse_csv(self._parse_conjunction)) 5277 else: 5278 args = self._parse_csv(self._parse_conjunction) # type: ignore 5279 5280 index = self._index 5281 if not self._match(TokenType.R_PAREN) and args: 5282 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5283 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5284 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5285 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5286 5287 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5288 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5289 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5290 if not self._match_text_seq("WITHIN", "GROUP"): 5291 self._retreat(index) 5292 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5293 5294 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5295 order = self._parse_order(this=seq_get(args, 0)) 5296 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5297 5298 def _parse_convert( 5299 self, strict: bool, safe: t.Optional[bool] = None 5300 ) -> t.Optional[exp.Expression]: 5301 this = self._parse_bitwise() 5302 5303 if self._match(TokenType.USING): 5304 to: t.Optional[exp.Expression] = self.expression( 5305 exp.CharacterSet, this=self._parse_var() 5306 ) 5307 elif self._match(TokenType.COMMA): 5308 to = self._parse_types() 5309 else: 5310 to = None 5311 5312 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5313 5314 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5315 """ 5316 There are generally two variants of the DECODE function: 5317 5318 - DECODE(bin, charset) 5319 - DECODE(expression, search, result [, search, result] ... [, default]) 5320 5321 The second variant will always be parsed into a CASE expression. Note that NULL 5322 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5323 instead of relying on pattern matching. 5324 """ 5325 args = self._parse_csv(self._parse_conjunction) 5326 5327 if len(args) < 3: 5328 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5329 5330 expression, *expressions = args 5331 if not expression: 5332 return None 5333 5334 ifs = [] 5335 for search, result in zip(expressions[::2], expressions[1::2]): 5336 if not search or not result: 5337 return None 5338 5339 if isinstance(search, exp.Literal): 5340 ifs.append( 5341 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5342 ) 5343 elif isinstance(search, exp.Null): 5344 ifs.append( 5345 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5346 ) 5347 else: 5348 cond = exp.or_( 5349 exp.EQ(this=expression.copy(), expression=search), 5350 exp.and_( 5351 exp.Is(this=expression.copy(), expression=exp.Null()), 5352 exp.Is(this=search.copy(), expression=exp.Null()), 5353 copy=False, 5354 ), 5355 copy=False, 5356 ) 5357 ifs.append(exp.If(this=cond, true=result)) 5358 5359 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5360 5361 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5362 self._match_text_seq("KEY") 5363 key = self._parse_column() 5364 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5365 self._match_text_seq("VALUE") 5366 value = self._parse_bitwise() 5367 5368 if not key and not value: 5369 return None 5370 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5371 5372 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5373 if not this or not self._match_text_seq("FORMAT", "JSON"): 5374 return this 5375 5376 return self.expression(exp.FormatJson, this=this) 5377 5378 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5379 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5380 for value in values: 5381 if self._match_text_seq(value, "ON", on): 5382 return f"{value} ON {on}" 5383 5384 return None 5385 5386 @t.overload 5387 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5388 5389 @t.overload 5390 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5391 5392 def _parse_json_object(self, agg=False): 5393 star = self._parse_star() 5394 expressions = ( 5395 [star] 5396 if star 5397 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5398 ) 5399 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5400 5401 unique_keys = None 5402 if self._match_text_seq("WITH", "UNIQUE"): 5403 unique_keys = True 5404 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5405 unique_keys = False 5406 5407 self._match_text_seq("KEYS") 5408 5409 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5410 self._parse_type() 5411 ) 5412 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5413 5414 return self.expression( 5415 exp.JSONObjectAgg if agg else exp.JSONObject, 5416 expressions=expressions, 5417 null_handling=null_handling, 5418 unique_keys=unique_keys, 5419 return_type=return_type, 5420 encoding=encoding, 5421 ) 5422 5423 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5424 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5425 if not self._match_text_seq("NESTED"): 5426 this = self._parse_id_var() 5427 kind = self._parse_types(allow_identifiers=False) 5428 nested = None 5429 else: 5430 this = None 5431 kind = None 5432 nested = True 5433 5434 path = self._match_text_seq("PATH") and self._parse_string() 5435 nested_schema = nested and self._parse_json_schema() 5436 5437 return self.expression( 5438 exp.JSONColumnDef, 5439 this=this, 5440 kind=kind, 5441 path=path, 5442 nested_schema=nested_schema, 5443 ) 5444 5445 def _parse_json_schema(self) -> exp.JSONSchema: 5446 self._match_text_seq("COLUMNS") 5447 return self.expression( 5448 exp.JSONSchema, 5449 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5450 ) 5451 5452 def _parse_json_table(self) -> exp.JSONTable: 5453 this = self._parse_format_json(self._parse_bitwise()) 5454 path = self._match(TokenType.COMMA) and self._parse_string() 5455 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5456 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5457 schema = self._parse_json_schema() 5458 5459 return exp.JSONTable( 5460 this=this, 5461 schema=schema, 5462 path=path, 5463 error_handling=error_handling, 5464 empty_handling=empty_handling, 5465 ) 5466 5467 def _parse_match_against(self) -> exp.MatchAgainst: 5468 expressions = self._parse_csv(self._parse_column) 5469 5470 self._match_text_seq(")", "AGAINST", "(") 5471 5472 this = self._parse_string() 5473 5474 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5475 modifier = "IN NATURAL LANGUAGE MODE" 5476 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5477 modifier = f"{modifier} WITH QUERY EXPANSION" 5478 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5479 modifier = "IN BOOLEAN MODE" 5480 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5481 modifier = "WITH QUERY EXPANSION" 5482 else: 5483 modifier = None 5484 5485 return self.expression( 5486 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5487 ) 5488 5489 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5490 def _parse_open_json(self) -> exp.OpenJSON: 5491 this = self._parse_bitwise() 5492 path = self._match(TokenType.COMMA) and self._parse_string() 5493 5494 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5495 this = self._parse_field(any_token=True) 5496 kind = self._parse_types() 5497 path = self._parse_string() 5498 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5499 5500 return self.expression( 5501 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5502 ) 5503 5504 expressions = None 5505 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5506 self._match_l_paren() 5507 expressions = self._parse_csv(_parse_open_json_column_def) 5508 5509 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5510 5511 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5512 args = self._parse_csv(self._parse_bitwise) 5513 5514 if self._match(TokenType.IN): 5515 return self.expression( 5516 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5517 ) 5518 5519 if haystack_first: 5520 haystack = seq_get(args, 0) 5521 needle = seq_get(args, 1) 5522 else: 5523 needle = seq_get(args, 0) 5524 haystack = seq_get(args, 1) 5525 5526 return self.expression( 5527 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5528 ) 5529 5530 def _parse_predict(self) -> exp.Predict: 5531 self._match_text_seq("MODEL") 5532 this = self._parse_table() 5533 5534 self._match(TokenType.COMMA) 5535 self._match_text_seq("TABLE") 5536 5537 return self.expression( 5538 exp.Predict, 5539 this=this, 5540 expression=self._parse_table(), 5541 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5542 ) 5543 5544 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5545 args = self._parse_csv(self._parse_table) 5546 return exp.JoinHint(this=func_name.upper(), expressions=args) 5547 5548 def _parse_substring(self) -> exp.Substring: 5549 # Postgres supports the form: substring(string [from int] [for int]) 5550 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5551 5552 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5553 5554 if self._match(TokenType.FROM): 5555 args.append(self._parse_bitwise()) 5556 if self._match(TokenType.FOR): 5557 if len(args) == 1: 5558 args.append(exp.Literal.number(1)) 5559 args.append(self._parse_bitwise()) 5560 5561 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5562 5563 def _parse_trim(self) -> exp.Trim: 5564 # https://www.w3resource.com/sql/character-functions/trim.php 5565 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5566 5567 position = None 5568 collation = None 5569 expression = None 5570 5571 if self._match_texts(self.TRIM_TYPES): 5572 position = self._prev.text.upper() 5573 5574 this = self._parse_bitwise() 5575 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5576 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5577 expression = self._parse_bitwise() 5578 5579 if invert_order: 5580 this, expression = expression, this 5581 5582 if self._match(TokenType.COLLATE): 5583 collation = self._parse_bitwise() 5584 5585 return self.expression( 5586 exp.Trim, this=this, position=position, expression=expression, collation=collation 5587 ) 5588 5589 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5590 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5591 5592 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5593 return self._parse_window(self._parse_id_var(), alias=True) 5594 5595 def _parse_respect_or_ignore_nulls( 5596 self, this: t.Optional[exp.Expression] 5597 ) -> t.Optional[exp.Expression]: 5598 if self._match_text_seq("IGNORE", "NULLS"): 5599 return self.expression(exp.IgnoreNulls, this=this) 5600 if self._match_text_seq("RESPECT", "NULLS"): 5601 return self.expression(exp.RespectNulls, this=this) 5602 return this 5603 5604 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5605 if self._match(TokenType.HAVING): 5606 self._match_texts(("MAX", "MIN")) 5607 max = self._prev.text.upper() != "MIN" 5608 return self.expression( 5609 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5610 ) 5611 5612 return this 5613 5614 def _parse_window( 5615 self, this: t.Optional[exp.Expression], alias: bool = False 5616 ) -> t.Optional[exp.Expression]: 5617 func = this 5618 comments = func.comments if isinstance(func, exp.Expression) else None 5619 5620 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5621 self._match(TokenType.WHERE) 5622 this = self.expression( 5623 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5624 ) 5625 self._match_r_paren() 5626 5627 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5628 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5629 if self._match_text_seq("WITHIN", "GROUP"): 5630 order = self._parse_wrapped(self._parse_order) 5631 this = self.expression(exp.WithinGroup, this=this, expression=order) 5632 5633 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5634 # Some dialects choose to implement and some do not. 5635 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5636 5637 # There is some code above in _parse_lambda that handles 5638 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5639 5640 # The below changes handle 5641 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5642 5643 # Oracle allows both formats 5644 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5645 # and Snowflake chose to do the same for familiarity 5646 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5647 if isinstance(this, exp.AggFunc): 5648 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5649 5650 if ignore_respect and ignore_respect is not this: 5651 ignore_respect.replace(ignore_respect.this) 5652 this = self.expression(ignore_respect.__class__, this=this) 5653 5654 this = self._parse_respect_or_ignore_nulls(this) 5655 5656 # bigquery select from window x AS (partition by ...) 5657 if alias: 5658 over = None 5659 self._match(TokenType.ALIAS) 5660 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5661 return this 5662 else: 5663 over = self._prev.text.upper() 5664 5665 if comments and isinstance(func, exp.Expression): 5666 func.pop_comments() 5667 5668 if not self._match(TokenType.L_PAREN): 5669 return self.expression( 5670 exp.Window, 5671 comments=comments, 5672 this=this, 5673 alias=self._parse_id_var(False), 5674 over=over, 5675 ) 5676 5677 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5678 5679 first = self._match(TokenType.FIRST) 5680 if self._match_text_seq("LAST"): 5681 first = False 5682 5683 partition, order = self._parse_partition_and_order() 5684 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5685 5686 if kind: 5687 self._match(TokenType.BETWEEN) 5688 start = self._parse_window_spec() 5689 self._match(TokenType.AND) 5690 end = self._parse_window_spec() 5691 5692 spec = self.expression( 5693 exp.WindowSpec, 5694 kind=kind, 5695 start=start["value"], 5696 start_side=start["side"], 5697 end=end["value"], 5698 end_side=end["side"], 5699 ) 5700 else: 5701 spec = None 5702 5703 self._match_r_paren() 5704 5705 window = self.expression( 5706 exp.Window, 5707 comments=comments, 5708 this=this, 5709 partition_by=partition, 5710 order=order, 5711 spec=spec, 5712 alias=window_alias, 5713 over=over, 5714 first=first, 5715 ) 5716 5717 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5718 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5719 return self._parse_window(window, alias=alias) 5720 5721 return window 5722 5723 def _parse_partition_and_order( 5724 self, 5725 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5726 return self._parse_partition_by(), self._parse_order() 5727 5728 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5729 self._match(TokenType.BETWEEN) 5730 5731 return { 5732 "value": ( 5733 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5734 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5735 or self._parse_bitwise() 5736 ), 5737 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5738 } 5739 5740 def _parse_alias( 5741 self, this: t.Optional[exp.Expression], explicit: bool = False 5742 ) -> t.Optional[exp.Expression]: 5743 any_token = self._match(TokenType.ALIAS) 5744 comments = self._prev_comments or [] 5745 5746 if explicit and not any_token: 5747 return this 5748 5749 if self._match(TokenType.L_PAREN): 5750 aliases = self.expression( 5751 exp.Aliases, 5752 comments=comments, 5753 this=this, 5754 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5755 ) 5756 self._match_r_paren(aliases) 5757 return aliases 5758 5759 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5760 self.STRING_ALIASES and self._parse_string_as_identifier() 5761 ) 5762 5763 if alias: 5764 comments.extend(alias.pop_comments()) 5765 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5766 column = this.this 5767 5768 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5769 if not this.comments and column and column.comments: 5770 this.comments = column.pop_comments() 5771 5772 return this 5773 5774 def _parse_id_var( 5775 self, 5776 any_token: bool = True, 5777 tokens: t.Optional[t.Collection[TokenType]] = None, 5778 ) -> t.Optional[exp.Expression]: 5779 expression = self._parse_identifier() 5780 if not expression and ( 5781 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5782 ): 5783 quoted = self._prev.token_type == TokenType.STRING 5784 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5785 5786 return expression 5787 5788 def _parse_string(self) -> t.Optional[exp.Expression]: 5789 if self._match_set(self.STRING_PARSERS): 5790 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5791 return self._parse_placeholder() 5792 5793 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5794 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5795 5796 def _parse_number(self) -> t.Optional[exp.Expression]: 5797 if self._match_set(self.NUMERIC_PARSERS): 5798 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5799 return self._parse_placeholder() 5800 5801 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5802 if self._match(TokenType.IDENTIFIER): 5803 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5804 return self._parse_placeholder() 5805 5806 def _parse_var( 5807 self, 5808 any_token: bool = False, 5809 tokens: t.Optional[t.Collection[TokenType]] = None, 5810 upper: bool = False, 5811 ) -> t.Optional[exp.Expression]: 5812 if ( 5813 (any_token and self._advance_any()) 5814 or self._match(TokenType.VAR) 5815 or (self._match_set(tokens) if tokens else False) 5816 ): 5817 return self.expression( 5818 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5819 ) 5820 return self._parse_placeholder() 5821 5822 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5823 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5824 self._advance() 5825 return self._prev 5826 return None 5827 5828 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5829 return self._parse_var() or self._parse_string() 5830 5831 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5832 return self._parse_primary() or self._parse_var(any_token=True) 5833 5834 def _parse_null(self) -> t.Optional[exp.Expression]: 5835 if self._match_set(self.NULL_TOKENS): 5836 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5837 return self._parse_placeholder() 5838 5839 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5840 if self._match(TokenType.TRUE): 5841 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5842 if self._match(TokenType.FALSE): 5843 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5844 return self._parse_placeholder() 5845 5846 def _parse_star(self) -> t.Optional[exp.Expression]: 5847 if self._match(TokenType.STAR): 5848 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5849 return self._parse_placeholder() 5850 5851 def _parse_parameter(self) -> exp.Parameter: 5852 this = self._parse_identifier() or self._parse_primary_or_var() 5853 return self.expression(exp.Parameter, this=this) 5854 5855 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5856 if self._match_set(self.PLACEHOLDER_PARSERS): 5857 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5858 if placeholder: 5859 return placeholder 5860 self._advance(-1) 5861 return None 5862 5863 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5864 if not self._match_texts(keywords): 5865 return None 5866 if self._match(TokenType.L_PAREN, advance=False): 5867 return self._parse_wrapped_csv(self._parse_expression) 5868 5869 expression = self._parse_expression() 5870 return [expression] if expression else None 5871 5872 def _parse_csv( 5873 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5874 ) -> t.List[exp.Expression]: 5875 parse_result = parse_method() 5876 items = [parse_result] if parse_result is not None else [] 5877 5878 while self._match(sep): 5879 self._add_comments(parse_result) 5880 parse_result = parse_method() 5881 if parse_result is not None: 5882 items.append(parse_result) 5883 5884 return items 5885 5886 def _parse_tokens( 5887 self, parse_method: t.Callable, expressions: t.Dict 5888 ) -> t.Optional[exp.Expression]: 5889 this = parse_method() 5890 5891 while self._match_set(expressions): 5892 this = self.expression( 5893 expressions[self._prev.token_type], 5894 this=this, 5895 comments=self._prev_comments, 5896 expression=parse_method(), 5897 ) 5898 5899 return this 5900 5901 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5902 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5903 5904 def _parse_wrapped_csv( 5905 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5906 ) -> t.List[exp.Expression]: 5907 return self._parse_wrapped( 5908 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5909 ) 5910 5911 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5912 wrapped = self._match(TokenType.L_PAREN) 5913 if not wrapped and not optional: 5914 self.raise_error("Expecting (") 5915 parse_result = parse_method() 5916 if wrapped: 5917 self._match_r_paren() 5918 return parse_result 5919 5920 def _parse_expressions(self) -> t.List[exp.Expression]: 5921 return self._parse_csv(self._parse_expression) 5922 5923 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5924 return self._parse_select() or self._parse_set_operations( 5925 self._parse_expression() if alias else self._parse_conjunction() 5926 ) 5927 5928 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5929 return self._parse_query_modifiers( 5930 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5931 ) 5932 5933 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5934 this = None 5935 if self._match_texts(self.TRANSACTION_KIND): 5936 this = self._prev.text 5937 5938 self._match_texts(("TRANSACTION", "WORK")) 5939 5940 modes = [] 5941 while True: 5942 mode = [] 5943 while self._match(TokenType.VAR): 5944 mode.append(self._prev.text) 5945 5946 if mode: 5947 modes.append(" ".join(mode)) 5948 if not self._match(TokenType.COMMA): 5949 break 5950 5951 return self.expression(exp.Transaction, this=this, modes=modes) 5952 5953 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5954 chain = None 5955 savepoint = None 5956 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5957 5958 self._match_texts(("TRANSACTION", "WORK")) 5959 5960 if self._match_text_seq("TO"): 5961 self._match_text_seq("SAVEPOINT") 5962 savepoint = self._parse_id_var() 5963 5964 if self._match(TokenType.AND): 5965 chain = not self._match_text_seq("NO") 5966 self._match_text_seq("CHAIN") 5967 5968 if is_rollback: 5969 return self.expression(exp.Rollback, savepoint=savepoint) 5970 5971 return self.expression(exp.Commit, chain=chain) 5972 5973 def _parse_refresh(self) -> exp.Refresh: 5974 self._match(TokenType.TABLE) 5975 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5976 5977 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5978 if not self._match_text_seq("ADD"): 5979 return None 5980 5981 self._match(TokenType.COLUMN) 5982 exists_column = self._parse_exists(not_=True) 5983 expression = self._parse_field_def() 5984 5985 if expression: 5986 expression.set("exists", exists_column) 5987 5988 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5989 if self._match_texts(("FIRST", "AFTER")): 5990 position = self._prev.text 5991 column_position = self.expression( 5992 exp.ColumnPosition, this=self._parse_column(), position=position 5993 ) 5994 expression.set("position", column_position) 5995 5996 return expression 5997 5998 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5999 drop = self._match(TokenType.DROP) and self._parse_drop() 6000 if drop and not isinstance(drop, exp.Command): 6001 drop.set("kind", drop.args.get("kind", "COLUMN")) 6002 return drop 6003 6004 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6005 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6006 return self.expression( 6007 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6008 ) 6009 6010 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6011 index = self._index - 1 6012 6013 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6014 return self._parse_csv( 6015 lambda: self.expression( 6016 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6017 ) 6018 ) 6019 6020 self._retreat(index) 6021 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6022 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6023 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6024 6025 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6026 if self._match_texts(self.ALTER_ALTER_PARSERS): 6027 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6028 6029 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6030 # keyword after ALTER we default to parsing this statement 6031 self._match(TokenType.COLUMN) 6032 column = self._parse_field(any_token=True) 6033 6034 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6035 return self.expression(exp.AlterColumn, this=column, drop=True) 6036 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6037 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 6038 if self._match(TokenType.COMMENT): 6039 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6040 6041 self._match_text_seq("SET", "DATA") 6042 self._match_text_seq("TYPE") 6043 return self.expression( 6044 exp.AlterColumn, 6045 this=column, 6046 dtype=self._parse_types(), 6047 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6048 using=self._match(TokenType.USING) and self._parse_conjunction(), 6049 ) 6050 6051 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6052 if self._match_texts(("ALL", "EVEN", "AUTO")): 6053 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6054 6055 self._match_text_seq("KEY", "DISTKEY") 6056 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6057 6058 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6059 if compound: 6060 self._match_text_seq("SORTKEY") 6061 6062 if self._match(TokenType.L_PAREN, advance=False): 6063 return self.expression( 6064 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6065 ) 6066 6067 self._match_texts(("AUTO", "NONE")) 6068 return self.expression( 6069 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6070 ) 6071 6072 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6073 index = self._index - 1 6074 6075 partition_exists = self._parse_exists() 6076 if self._match(TokenType.PARTITION, advance=False): 6077 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6078 6079 self._retreat(index) 6080 return self._parse_csv(self._parse_drop_column) 6081 6082 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6083 if self._match(TokenType.COLUMN): 6084 exists = self._parse_exists() 6085 old_column = self._parse_column() 6086 to = self._match_text_seq("TO") 6087 new_column = self._parse_column() 6088 6089 if old_column is None or to is None or new_column is None: 6090 return None 6091 6092 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6093 6094 self._match_text_seq("TO") 6095 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6096 6097 def _parse_alter_table_set(self) -> exp.AlterSet: 6098 alter_set = self.expression(exp.AlterSet) 6099 6100 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6101 "TABLE", "PROPERTIES" 6102 ): 6103 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_conjunction)) 6104 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6105 alter_set.set("expressions", [self._parse_conjunction()]) 6106 elif self._match_texts(("LOGGED", "UNLOGGED")): 6107 alter_set.set("option", exp.var(self._prev.text.upper())) 6108 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6109 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6110 elif self._match_text_seq("LOCATION"): 6111 alter_set.set("location", self._parse_field()) 6112 elif self._match_text_seq("ACCESS", "METHOD"): 6113 alter_set.set("access_method", self._parse_field()) 6114 elif self._match_text_seq("TABLESPACE"): 6115 alter_set.set("tablespace", self._parse_field()) 6116 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6117 alter_set.set("file_format", [self._parse_field()]) 6118 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6119 alter_set.set("file_format", self._parse_wrapped_options()) 6120 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6121 alter_set.set("copy_options", self._parse_wrapped_options()) 6122 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6123 alter_set.set("tag", self._parse_csv(self._parse_conjunction)) 6124 else: 6125 if self._match_text_seq("SERDE"): 6126 alter_set.set("serde", self._parse_field()) 6127 6128 alter_set.set("expressions", [self._parse_properties()]) 6129 6130 return alter_set 6131 6132 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6133 start = self._prev 6134 6135 if not self._match(TokenType.TABLE): 6136 return self._parse_as_command(start) 6137 6138 exists = self._parse_exists() 6139 only = self._match_text_seq("ONLY") 6140 this = self._parse_table(schema=True) 6141 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6142 6143 if self._next: 6144 self._advance() 6145 6146 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6147 if parser: 6148 actions = ensure_list(parser(self)) 6149 options = self._parse_csv(self._parse_property) 6150 6151 if not self._curr and actions: 6152 return self.expression( 6153 exp.AlterTable, 6154 this=this, 6155 exists=exists, 6156 actions=actions, 6157 only=only, 6158 options=options, 6159 cluster=cluster, 6160 ) 6161 6162 return self._parse_as_command(start) 6163 6164 def _parse_merge(self) -> exp.Merge: 6165 self._match(TokenType.INTO) 6166 target = self._parse_table() 6167 6168 if target and self._match(TokenType.ALIAS, advance=False): 6169 target.set("alias", self._parse_table_alias()) 6170 6171 self._match(TokenType.USING) 6172 using = self._parse_table() 6173 6174 self._match(TokenType.ON) 6175 on = self._parse_conjunction() 6176 6177 return self.expression( 6178 exp.Merge, 6179 this=target, 6180 using=using, 6181 on=on, 6182 expressions=self._parse_when_matched(), 6183 ) 6184 6185 def _parse_when_matched(self) -> t.List[exp.When]: 6186 whens = [] 6187 6188 while self._match(TokenType.WHEN): 6189 matched = not self._match(TokenType.NOT) 6190 self._match_text_seq("MATCHED") 6191 source = ( 6192 False 6193 if self._match_text_seq("BY", "TARGET") 6194 else self._match_text_seq("BY", "SOURCE") 6195 ) 6196 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 6197 6198 self._match(TokenType.THEN) 6199 6200 if self._match(TokenType.INSERT): 6201 _this = self._parse_star() 6202 if _this: 6203 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6204 else: 6205 then = self.expression( 6206 exp.Insert, 6207 this=self._parse_value(), 6208 expression=self._match_text_seq("VALUES") and self._parse_value(), 6209 ) 6210 elif self._match(TokenType.UPDATE): 6211 expressions = self._parse_star() 6212 if expressions: 6213 then = self.expression(exp.Update, expressions=expressions) 6214 else: 6215 then = self.expression( 6216 exp.Update, 6217 expressions=self._match(TokenType.SET) 6218 and self._parse_csv(self._parse_equality), 6219 ) 6220 elif self._match(TokenType.DELETE): 6221 then = self.expression(exp.Var, this=self._prev.text) 6222 else: 6223 then = None 6224 6225 whens.append( 6226 self.expression( 6227 exp.When, 6228 matched=matched, 6229 source=source, 6230 condition=condition, 6231 then=then, 6232 ) 6233 ) 6234 return whens 6235 6236 def _parse_show(self) -> t.Optional[exp.Expression]: 6237 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6238 if parser: 6239 return parser(self) 6240 return self._parse_as_command(self._prev) 6241 6242 def _parse_set_item_assignment( 6243 self, kind: t.Optional[str] = None 6244 ) -> t.Optional[exp.Expression]: 6245 index = self._index 6246 6247 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6248 return self._parse_set_transaction(global_=kind == "GLOBAL") 6249 6250 left = self._parse_primary() or self._parse_column() 6251 assignment_delimiter = self._match_texts(("=", "TO")) 6252 6253 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6254 self._retreat(index) 6255 return None 6256 6257 right = self._parse_statement() or self._parse_id_var() 6258 if isinstance(right, (exp.Column, exp.Identifier)): 6259 right = exp.var(right.name) 6260 6261 this = self.expression(exp.EQ, this=left, expression=right) 6262 return self.expression(exp.SetItem, this=this, kind=kind) 6263 6264 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6265 self._match_text_seq("TRANSACTION") 6266 characteristics = self._parse_csv( 6267 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6268 ) 6269 return self.expression( 6270 exp.SetItem, 6271 expressions=characteristics, 6272 kind="TRANSACTION", 6273 **{"global": global_}, # type: ignore 6274 ) 6275 6276 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6277 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6278 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6279 6280 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6281 index = self._index 6282 set_ = self.expression( 6283 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6284 ) 6285 6286 if self._curr: 6287 self._retreat(index) 6288 return self._parse_as_command(self._prev) 6289 6290 return set_ 6291 6292 def _parse_var_from_options( 6293 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6294 ) -> t.Optional[exp.Var]: 6295 start = self._curr 6296 if not start: 6297 return None 6298 6299 option = start.text.upper() 6300 continuations = options.get(option) 6301 6302 index = self._index 6303 self._advance() 6304 for keywords in continuations or []: 6305 if isinstance(keywords, str): 6306 keywords = (keywords,) 6307 6308 if self._match_text_seq(*keywords): 6309 option = f"{option} {' '.join(keywords)}" 6310 break 6311 else: 6312 if continuations or continuations is None: 6313 if raise_unmatched: 6314 self.raise_error(f"Unknown option {option}") 6315 6316 self._retreat(index) 6317 return None 6318 6319 return exp.var(option) 6320 6321 def _parse_as_command(self, start: Token) -> exp.Command: 6322 while self._curr: 6323 self._advance() 6324 text = self._find_sql(start, self._prev) 6325 size = len(start.text) 6326 self._warn_unsupported() 6327 return exp.Command(this=text[:size], expression=text[size:]) 6328 6329 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6330 settings = [] 6331 6332 self._match_l_paren() 6333 kind = self._parse_id_var() 6334 6335 if self._match(TokenType.L_PAREN): 6336 while True: 6337 key = self._parse_id_var() 6338 value = self._parse_primary() 6339 6340 if not key and value is None: 6341 break 6342 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6343 self._match(TokenType.R_PAREN) 6344 6345 self._match_r_paren() 6346 6347 return self.expression( 6348 exp.DictProperty, 6349 this=this, 6350 kind=kind.this if kind else None, 6351 settings=settings, 6352 ) 6353 6354 def _parse_dict_range(self, this: str) -> exp.DictRange: 6355 self._match_l_paren() 6356 has_min = self._match_text_seq("MIN") 6357 if has_min: 6358 min = self._parse_var() or self._parse_primary() 6359 self._match_text_seq("MAX") 6360 max = self._parse_var() or self._parse_primary() 6361 else: 6362 max = self._parse_var() or self._parse_primary() 6363 min = exp.Literal.number(0) 6364 self._match_r_paren() 6365 return self.expression(exp.DictRange, this=this, min=min, max=max) 6366 6367 def _parse_comprehension( 6368 self, this: t.Optional[exp.Expression] 6369 ) -> t.Optional[exp.Comprehension]: 6370 index = self._index 6371 expression = self._parse_column() 6372 if not self._match(TokenType.IN): 6373 self._retreat(index - 1) 6374 return None 6375 iterator = self._parse_column() 6376 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6377 return self.expression( 6378 exp.Comprehension, 6379 this=this, 6380 expression=expression, 6381 iterator=iterator, 6382 condition=condition, 6383 ) 6384 6385 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6386 if self._match(TokenType.HEREDOC_STRING): 6387 return self.expression(exp.Heredoc, this=self._prev.text) 6388 6389 if not self._match_text_seq("$"): 6390 return None 6391 6392 tags = ["$"] 6393 tag_text = None 6394 6395 if self._is_connected(): 6396 self._advance() 6397 tags.append(self._prev.text.upper()) 6398 else: 6399 self.raise_error("No closing $ found") 6400 6401 if tags[-1] != "$": 6402 if self._is_connected() and self._match_text_seq("$"): 6403 tag_text = tags[-1] 6404 tags.append("$") 6405 else: 6406 self.raise_error("No closing $ found") 6407 6408 heredoc_start = self._curr 6409 6410 while self._curr: 6411 if self._match_text_seq(*tags, advance=False): 6412 this = self._find_sql(heredoc_start, self._prev) 6413 self._advance(len(tags)) 6414 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6415 6416 self._advance() 6417 6418 self.raise_error(f"No closing {''.join(tags)} found") 6419 return None 6420 6421 def _find_parser( 6422 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6423 ) -> t.Optional[t.Callable]: 6424 if not self._curr: 6425 return None 6426 6427 index = self._index 6428 this = [] 6429 while True: 6430 # The current token might be multiple words 6431 curr = self._curr.text.upper() 6432 key = curr.split(" ") 6433 this.append(curr) 6434 6435 self._advance() 6436 result, trie = in_trie(trie, key) 6437 if result == TrieResult.FAILED: 6438 break 6439 6440 if result == TrieResult.EXISTS: 6441 subparser = parsers[" ".join(this)] 6442 return subparser 6443 6444 self._retreat(index) 6445 return None 6446 6447 def _match(self, token_type, advance=True, expression=None): 6448 if not self._curr: 6449 return None 6450 6451 if self._curr.token_type == token_type: 6452 if advance: 6453 self._advance() 6454 self._add_comments(expression) 6455 return True 6456 6457 return None 6458 6459 def _match_set(self, types, advance=True): 6460 if not self._curr: 6461 return None 6462 6463 if self._curr.token_type in types: 6464 if advance: 6465 self._advance() 6466 return True 6467 6468 return None 6469 6470 def _match_pair(self, token_type_a, token_type_b, advance=True): 6471 if not self._curr or not self._next: 6472 return None 6473 6474 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6475 if advance: 6476 self._advance(2) 6477 return True 6478 6479 return None 6480 6481 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6482 if not self._match(TokenType.L_PAREN, expression=expression): 6483 self.raise_error("Expecting (") 6484 6485 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6486 if not self._match(TokenType.R_PAREN, expression=expression): 6487 self.raise_error("Expecting )") 6488 6489 def _match_texts(self, texts, advance=True): 6490 if self._curr and self._curr.text.upper() in texts: 6491 if advance: 6492 self._advance() 6493 return True 6494 return None 6495 6496 def _match_text_seq(self, *texts, advance=True): 6497 index = self._index 6498 for text in texts: 6499 if self._curr and self._curr.text.upper() == text: 6500 self._advance() 6501 else: 6502 self._retreat(index) 6503 return None 6504 6505 if not advance: 6506 self._retreat(index) 6507 6508 return True 6509 6510 def _replace_lambda( 6511 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6512 ) -> t.Optional[exp.Expression]: 6513 if not node: 6514 return node 6515 6516 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6517 6518 for column in node.find_all(exp.Column): 6519 typ = lambda_types.get(column.parts[0].name) 6520 if typ is not None: 6521 dot_or_id = column.to_dot() if column.table else column.this 6522 6523 if typ: 6524 dot_or_id = self.expression( 6525 exp.Cast, 6526 this=dot_or_id, 6527 to=typ, 6528 ) 6529 6530 parent = column.parent 6531 6532 while isinstance(parent, exp.Dot): 6533 if not isinstance(parent.parent, exp.Dot): 6534 parent.replace(dot_or_id) 6535 break 6536 parent = parent.parent 6537 else: 6538 if column is node: 6539 node = dot_or_id 6540 else: 6541 column.replace(dot_or_id) 6542 return node 6543 6544 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6545 start = self._prev 6546 6547 # Not to be confused with TRUNCATE(number, decimals) function call 6548 if self._match(TokenType.L_PAREN): 6549 self._retreat(self._index - 2) 6550 return self._parse_function() 6551 6552 # Clickhouse supports TRUNCATE DATABASE as well 6553 is_database = self._match(TokenType.DATABASE) 6554 6555 self._match(TokenType.TABLE) 6556 6557 exists = self._parse_exists(not_=False) 6558 6559 expressions = self._parse_csv( 6560 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6561 ) 6562 6563 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6564 6565 if self._match_text_seq("RESTART", "IDENTITY"): 6566 identity = "RESTART" 6567 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6568 identity = "CONTINUE" 6569 else: 6570 identity = None 6571 6572 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6573 option = self._prev.text 6574 else: 6575 option = None 6576 6577 partition = self._parse_partition() 6578 6579 # Fallback case 6580 if self._curr: 6581 return self._parse_as_command(start) 6582 6583 return self.expression( 6584 exp.TruncateTable, 6585 expressions=expressions, 6586 is_database=is_database, 6587 exists=exists, 6588 cluster=cluster, 6589 identity=identity, 6590 option=option, 6591 partition=partition, 6592 ) 6593 6594 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6595 this = self._parse_ordered(self._parse_opclass) 6596 6597 if not self._match(TokenType.WITH): 6598 return this 6599 6600 op = self._parse_var(any_token=True) 6601 6602 return self.expression(exp.WithOperator, this=this, op=op) 6603 6604 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6605 opts = [] 6606 self._match(TokenType.EQ) 6607 self._match(TokenType.L_PAREN) 6608 while self._curr and not self._match(TokenType.R_PAREN): 6609 opts.append(self._parse_conjunction()) 6610 self._match(TokenType.COMMA) 6611 return opts 6612 6613 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6614 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6615 6616 options = [] 6617 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6618 option = self._parse_unquoted_field() 6619 value = None 6620 6621 # Some options are defined as functions with the values as params 6622 if not isinstance(option, exp.Func): 6623 prev = self._prev.text.upper() 6624 # Different dialects might separate options and values by white space, "=" and "AS" 6625 self._match(TokenType.EQ) 6626 self._match(TokenType.ALIAS) 6627 6628 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6629 # Snowflake FILE_FORMAT case 6630 value = self._parse_wrapped_options() 6631 else: 6632 value = self._parse_unquoted_field() 6633 6634 param = self.expression(exp.CopyParameter, this=option, expression=value) 6635 options.append(param) 6636 6637 if sep: 6638 self._match(sep) 6639 6640 return options 6641 6642 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6643 expr = self.expression(exp.Credentials) 6644 6645 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6646 expr.set("storage", self._parse_conjunction()) 6647 if self._match_text_seq("CREDENTIALS"): 6648 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6649 creds = ( 6650 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6651 ) 6652 expr.set("credentials", creds) 6653 if self._match_text_seq("ENCRYPTION"): 6654 expr.set("encryption", self._parse_wrapped_options()) 6655 if self._match_text_seq("IAM_ROLE"): 6656 expr.set("iam_role", self._parse_field()) 6657 if self._match_text_seq("REGION"): 6658 expr.set("region", self._parse_field()) 6659 6660 return expr 6661 6662 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6663 return self._parse_field() 6664 6665 def _parse_copy(self) -> exp.Copy | exp.Command: 6666 start = self._prev 6667 6668 self._match(TokenType.INTO) 6669 6670 this = ( 6671 self._parse_conjunction() 6672 if self._match(TokenType.L_PAREN, advance=False) 6673 else self._parse_table(schema=True) 6674 ) 6675 6676 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6677 6678 files = self._parse_csv(self._parse_file_location) 6679 credentials = self._parse_credentials() 6680 6681 self._match_text_seq("WITH") 6682 6683 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6684 6685 # Fallback case 6686 if self._curr: 6687 return self._parse_as_command(start) 6688 6689 return self.expression( 6690 exp.Copy, 6691 this=this, 6692 kind=kind, 6693 credentials=credentials, 6694 files=files, 6695 params=params, 6696 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1208 def __init__( 1209 self, 1210 error_level: t.Optional[ErrorLevel] = None, 1211 error_message_context: int = 100, 1212 max_errors: int = 3, 1213 dialect: DialectType = None, 1214 ): 1215 from sqlglot.dialects import Dialect 1216 1217 self.error_level = error_level or ErrorLevel.IMMEDIATE 1218 self.error_message_context = error_message_context 1219 self.max_errors = max_errors 1220 self.dialect = Dialect.get_or_raise(dialect) 1221 self.reset()
1233 def parse( 1234 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1235 ) -> t.List[t.Optional[exp.Expression]]: 1236 """ 1237 Parses a list of tokens and returns a list of syntax trees, one tree 1238 per parsed SQL statement. 1239 1240 Args: 1241 raw_tokens: The list of tokens. 1242 sql: The original SQL string, used to produce helpful debug messages. 1243 1244 Returns: 1245 The list of the produced syntax trees. 1246 """ 1247 return self._parse( 1248 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1249 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1251 def parse_into( 1252 self, 1253 expression_types: exp.IntoType, 1254 raw_tokens: t.List[Token], 1255 sql: t.Optional[str] = None, 1256 ) -> t.List[t.Optional[exp.Expression]]: 1257 """ 1258 Parses a list of tokens into a given Expression type. If a collection of Expression 1259 types is given instead, this method will try to parse the token list into each one 1260 of them, stopping at the first for which the parsing succeeds. 1261 1262 Args: 1263 expression_types: The expression type(s) to try and parse the token list into. 1264 raw_tokens: The list of tokens. 1265 sql: The original SQL string, used to produce helpful debug messages. 1266 1267 Returns: 1268 The target Expression. 1269 """ 1270 errors = [] 1271 for expression_type in ensure_list(expression_types): 1272 parser = self.EXPRESSION_PARSERS.get(expression_type) 1273 if not parser: 1274 raise TypeError(f"No parser registered for {expression_type}") 1275 1276 try: 1277 return self._parse(parser, raw_tokens, sql) 1278 except ParseError as e: 1279 e.errors[0]["into_expression"] = expression_type 1280 errors.append(e) 1281 1282 raise ParseError( 1283 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1284 errors=merge_errors(errors), 1285 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1325 def check_errors(self) -> None: 1326 """Logs or raises any found errors, depending on the chosen error level setting.""" 1327 if self.error_level == ErrorLevel.WARN: 1328 for error in self.errors: 1329 logger.error(str(error)) 1330 elif self.error_level == ErrorLevel.RAISE and self.errors: 1331 raise ParseError( 1332 concat_messages(self.errors, self.max_errors), 1333 errors=merge_errors(self.errors), 1334 )
Logs or raises any found errors, depending on the chosen error level setting.
1336 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1337 """ 1338 Appends an error in the list of recorded errors or raises it, depending on the chosen 1339 error level setting. 1340 """ 1341 token = token or self._curr or self._prev or Token.string("") 1342 start = token.start 1343 end = token.end + 1 1344 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1345 highlight = self.sql[start:end] 1346 end_context = self.sql[end : end + self.error_message_context] 1347 1348 error = ParseError.new( 1349 f"{message}. Line {token.line}, Col: {token.col}.\n" 1350 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1351 description=message, 1352 line=token.line, 1353 col=token.col, 1354 start_context=start_context, 1355 highlight=highlight, 1356 end_context=end_context, 1357 ) 1358 1359 if self.error_level == ErrorLevel.IMMEDIATE: 1360 raise error 1361 1362 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1364 def expression( 1365 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1366 ) -> E: 1367 """ 1368 Creates a new, validated Expression. 1369 1370 Args: 1371 exp_class: The expression class to instantiate. 1372 comments: An optional list of comments to attach to the expression. 1373 kwargs: The arguments to set for the expression along with their respective values. 1374 1375 Returns: 1376 The target expression. 1377 """ 1378 instance = exp_class(**kwargs) 1379 instance.add_comments(comments) if comments else self._add_comments(instance) 1380 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1387 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1388 """ 1389 Validates an Expression, making sure that all its mandatory arguments are set. 1390 1391 Args: 1392 expression: The expression to validate. 1393 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1394 1395 Returns: 1396 The validated expression. 1397 """ 1398 if self.error_level != ErrorLevel.IGNORE: 1399 for error_message in expression.error_messages(args): 1400 self.raise_error(error_message) 1401 1402 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.